From 94cb2cc980f8946dfad5d5136880f855a972acfa Mon Sep 17 00:00:00 2001 From: Pavlos Rontidis Date: Thu, 14 Sep 2023 10:04:08 +0200 Subject: [PATCH 01/15] WIP --- .github/actions/spelling/allow.txt | 3 +- .github/workflows/workload_checks.yml | 355 ++++++++++++++++++ workload-checks/README.md | 9 + .../cases/http_text_to_http_json/README.md | 5 + .../http_text_to_http_json/experiment.yaml | 21 ++ .../http_text_to_http_json/lading/lading.yaml | 16 + .../http_text_to_http_json/vector/vector.toml | 12 + workload-checks/typical/machine.yaml | 7 + 8 files changed, 427 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/workload_checks.yml create mode 100644 workload-checks/README.md create mode 100644 workload-checks/typical/cases/http_text_to_http_json/README.md create mode 100644 workload-checks/typical/cases/http_text_to_http_json/experiment.yaml create mode 100644 workload-checks/typical/cases/http_text_to_http_json/lading/lading.yaml create mode 100644 workload-checks/typical/cases/http_text_to_http_json/vector/vector.toml create mode 100644 workload-checks/typical/machine.yaml diff --git a/.github/actions/spelling/allow.txt b/.github/actions/spelling/allow.txt index 360f67b173aed..19ed0a26e5543 100644 --- a/.github/actions/spelling/allow.txt +++ b/.github/actions/spelling/allow.txt @@ -45,9 +45,10 @@ Comcast Consolas Coolpad DEBHELPER -DOOV Danew +dkr Dockerfiles +DOOV Douban Enot Evercoss diff --git a/.github/workflows/workload_checks.yml b/.github/workflows/workload_checks.yml new file mode 100644 index 0000000000000..df2f5743b92d1 --- /dev/null +++ b/.github/workflows/workload_checks.yml @@ -0,0 +1,355 @@ +# Workload Checks +# +# Runs Vector Workload Checks. +# +# Runs on: +# - scheduled daily UTC midnight + +# This workflow runs the collection of our workload checks, using the repo HEAD SHA, +# which depends on when the workflow is invoked. +# +# The goal is to establish a baseline of check results for a variety of cases +# and visualize trends for important Vector use cases. +# +# The HEAD SHA is also used to tag the Vector Docker image. + +name: Workload Checks + +on: + workflow_call: + workflow_dispatch: + schedule: + - cron: '0 0 * * *' + pull_request: #TODO delete this before merging + types: + - opened + - synchronize +env: + SINGLE_MACHINE_PERFORMANCE_API: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_API }} + +jobs: + compute-metadata: + name: Compute metadata + runs-on: ubuntu-22.04 + needs: should-run + outputs: + target-sha: ${{ steps.pr-metadata-comment.outputs.TARGET_SHA }} + + # below are used in the experiment/analyze jobs + cpus: ${{ steps.system.outputs.CPUS }} + memory: ${{ steps.system.outputs.MEMORY }} + vector-cpus: ${{ steps.system.outputs.VECTOR_CPUS }} + + replicas: ${{ steps.experimental-meta.outputs.REPLICAS }} + warmup-seconds: ${{ steps.experimental-meta.outputs.WARMUP_SECONDS }} + total-samples: ${{ steps.experimental-meta.outputs.TOTAL_SAMPLES }} + p-value: ${{ steps.experimental-meta.outputs.P_VALUE }} + smp-version: ${{ steps.experimental-meta.outputs.SMP_CRATE_VERSION }} + lading-version: ${{ steps.experimental-meta.outputs.LADING_VERSION }} + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 1000 + + - name: Get git metadata + id: git-metadata + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + export TARGET_SHA=$(git merge-base master HEAD) + echo "TARGET_SHA=${TARGET_SHA}" >> $GITHUB_OUTPUT + + echo "target sha is: ${TARGET_SHA}" + + if [ "${TARGET_SHA}" = "" ] ; then + echo "TARGET_SHA not found, exiting." + exit 1 + fi + + - name: Setup experimental metadata + id: experimental-meta + run: | + export WARMUP_SECONDS="45" + export REPLICAS="10" + export TOTAL_SAMPLES="600" + export P_VALUE="0.1" + export SMP_CRATE_VERSION="0.10.0" + export LADING_VERSION="0.18.0" + + echo "warmup seconds: ${WARMUP_SECONDS}" + echo "replicas: ${REPLICAS}" + echo "total samples: ${TOTAL_SAMPLES}" + echo "regression p-value: ${P_VALUE}" + echo "smp crate version: ${SMP_CRATE_VERSION}" + echo "lading version: ${LADING_VERSION}" + + echo "WARMUP_SECONDS=${WARMUP_SECONDS}" >> $GITHUB_OUTPUT + echo "REPLICAS=${REPLICAS}" >> $GITHUB_OUTPUT + echo "TOTAL_SAMPLES=${TOTAL_SAMPLES}" >> $GITHUB_OUTPUT + echo "P_VALUE=${P_VALUE}" >> $GITHUB_OUTPUT + echo "SMP_CRATE_VERSION=${SMP_CRATE_VERSION}" >> $GITHUB_OUTPUT + echo "LADING_VERSION=${LADING_VERSION}" >> $GITHUB_OUTPUT + + - name: Setup system details + id: system + run: | + export CPUS="7" + export MEMORY="30g" + export VECTOR_CPUS="4" + + echo "cpus total: ${CPUS}" + echo "memory total: ${MEMORY}" + echo "vector cpus: ${VECTOR_CPUS}" + + echo "CPUS=${CPUS}" >> $GITHUB_OUTPUT + echo "MEMORY=${MEMORY}" >> $GITHUB_OUTPUT + echo "VECTOR_CPUS=${VECTOR_CPUS}" >> $GITHUB_OUTPUT + + ## + ## BUILD + ## + + build-target: + name: Build target Vector container + runs-on: [linux, ubuntu-20.04-4core] + needs: + - compute-metadata + steps: + - uses: colpal/actions-clean@v1 + + - uses: actions/checkout@v3 + with: + ref: ${{ needs.compute-metadata.outputs.target-sha }} + path: target-vector + + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v3.0.0 + + - name: Build 'vector' target image + uses: docker/build-push-action@v5.0.0 + with: + context: target-vector/ + cache-from: type=gha + cache-to: type=gha,mode=max + file: regression/Dockerfile + builder: ${{ steps.buildx.outputs.name }} + outputs: type=docker,dest=${{ runner.temp }}/target-image.tar + tags: | + vector:${{ needs.compute-metadata.outputs.target-sha }} + + - name: Upload image as artifact + uses: actions/upload-artifact@v3 + with: + name: target-image + path: "${{ runner.temp }}/target-image.tar" + + confirm-valid-credentials: + name: Confirm AWS credentials are minimally valid + runs-on: ubuntu-22.04 + needs: + - compute-metadata + steps: + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4.0.0 + with: + aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} + aws-region: us-west-2 + + - name: Download SMP binary + run: | + aws s3 cp s3://smp-cli-releases/v${{ needs.compute-metadata.outputs.smp-version }}/x86_64-unknown-linux-gnu/smp ${{ runner.temp }}/bin/smp + + ## + ## SUBMIT + ## + + upload-target-image-to-ecr: + name: Upload target images to ECR + runs-on: ubuntu-22.04 + needs: + - compute-metadata + - confirm-valid-credentials + - build-target + steps: + - name: 'Download target image' + uses: actions/download-artifact@v3 + with: + name: target-image + + - name: Load target image + run: | + docker load --input target-image.tar + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4.0.0 + with: + aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} + aws-region: us-west-2 + + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v1 + + - name: Docker Login to ECR + uses: docker/login-action@v2 + with: + registry: ${{ steps.login-ecr.outputs.registry }} + + - name: Tag & push target image + run: | + docker tag vector:${{ needs.compute-metadata.outputs.target-sha }} ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.target-sha }} + docker push ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.target-sha }} + + submit-job: + name: Submit workload checks job + runs-on: ubuntu-22.04 + needs: + - compute-metadata + - upload-target-image-to-ecr + steps: + - name: Check status, in-progress + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh api \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.target-sha }} \ + -f state='pending' \ + -f description='Experiments submitted to the Regression Detection cluster.' \ + -f context='Regression Detection Suite / submission' \ + -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + + - uses: actions/checkout@v3 + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4.0.0 + with: + aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} + aws-region: us-west-2 + + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v1 + + - name: Download SMP binary + run: | + aws s3 cp s3://smp-cli-releases/v${{ needs.compute-metadata.outputs.smp-version }}/x86_64-unknown-linux-gnu/smp ${{ runner.temp }}/bin/smp + + - name: Submit job + env: + RUST_LOG: info + run: | + git fetch origin + + # Setup AWS credentials for single-machine-performance AWS account + AWS_NAMED_PROFILE="single-machine-performance" + SMP_ACCOUNT_ID=$(aws ssm get-parameter --region us-east-1 --name ci.datadog-agent.single-machine-performance-account-id --with-decryption --query "Parameter.Value" --out text) + SMP_ECR_URL=${SMP_ACCOUNT_ID}.dkr.ecr.us-west-2.amazonaws.com + SMP_AGENT_TEAM_ID=$(aws ssm get-parameter --region us-east-1 --name ci.datadog-agent.single-machine-performance-agent-team-id --with-decryption --query "Parameter.Value" --out text) + SMP_API=$(aws ssm get-parameter --region us-east-1 --name ci.datadog-agent.single-machine-performance-api --with-decryption --query "Parameter.Value" --out text) + aws configure set aws_access_key_id $(aws ssm get-parameter --region us-east-1 --name ci.datadog-agent.single-machine-performance-bot-access-key-id --with-decryption --query "Parameter.Value" --out text) --profile ${AWS_NAMED_PROFILE} + aws configure set aws_secret_access_key $(aws ssm get-parameter --region us-east-1 --name ci.datadog-agent.single-machine-performance-bot-access-key --with-decryption --query "Parameter.Value" --out text) --profile ${AWS_NAMED_PROFILE} + aws configure set region us-west-2 --profile ${AWS_NAMED_PROFILE} + + # Download smp binary and prepare it for use + aws --profile single-machine-performance s3 cp s3://smp-cli-releases/v${SMP_VERSION}/x86_64-unknown-linux-gnu/smp smp + chmod +x smp + + TARGET_IMAGE =${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.target-sha }} + CURRENT_DATE=$(date --utc '+%Y_%m_%d') + + RUST_LOG="info,aws_config::profile::credentials=error" + RUST_LOG_DEBUG="debug,aws_config::profile::credentials=error" + + chmod +x ${{ runner.temp }}/bin/smp + + RUST_BACKTRACE=1 RUST_LOG="${RUST_LOG_DEBUG}" ${{ runner.temp }}/bin/smp \ + --team-id ${SMP_AGENT_TEAM_ID} --api-base ${SMP_API} --aws-named-profile ${AWS_NAMED_PROFILE} \ + job submit-workload \ + --lading-version ${LADING_VERSION} \ + --total-samples ${TOTAL_SAMPLES} \ + --warmup-seconds ${WARMUP_SECONDS} \ + --replicas ${REPLICAS} \ + --target-image ${TARGET_IMAGE} \ + --target-sha ${TARGET_SHA} \ + --target-config-dir ${{ github.workspace }}/workload-checks \ + --target-name vector \ + --target-command "/usr/local/bin/vector" \ + --target-environment-variables "DD_HOSTNAME=smp-workload-checks,DD_DD_URL=http://127.0.0.1:9092,DD_API_KEY=00000001" \ + --tags smp_status=nightly,client_team="agent",tag_date="${CURRENT_DATE}" \ + --submission-metadata ${{ runner.temp }}/submission-metadata + + - uses: actions/upload-artifact@v3 + with: + name: vector-submission-metadata + path: ${{ runner.temp }}/submission-metadata + + - name: Await job + timeout-minutes: 120 + env: + RUST_LOG: info + run: | + chmod +x ${{ runner.temp }}/bin/smp + + ${{ runner.temp }}/bin/smp --team-id ${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }} \ + job status \ + --wait \ + --wait-delay-seconds 60 \ + --wait-timeout-minutes 90 \ + --submission-metadata ${{ runner.temp }}/submission-metadata + + - name: Handle cancellation if necessary + if: ${{ cancelled() }} + env: + RUST_LOG: info + run: | + chmod +x ${{ runner.temp }}/bin/smp + ${{ runner.temp }}/bin/smp --team-id ${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }} job cancel \ + --submission-metadata ${{ runner.temp }}/submission-metadata + + - name: Check status, cancelled + if: ${{ cancelled() }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh api \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.target-sha }} \ + -f state='failure' \ + -f description='Experiments submitted to the Regression Detection cluster cancelled.' \ + -f context='Regression Detection Suite / submission' \ + -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + + - name: Check status, success + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh api \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.target-sha }} \ + -f state='success' \ + -f description='Experiments submitted to the Regression Detection cluster successfully.' \ + -f context='Regression Detection Suite / submission' \ + -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + + - name: Check status, failure + if: ${{ failure() }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh api \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.target-sha }} \ + -f state='success' \ + -f description='Experiments submitted to the Regression Detection Suite failed.' \ + -f context='Regression Detection Suite / submission' \ + -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} diff --git a/workload-checks/README.md b/workload-checks/README.md new file mode 100644 index 0000000000000..eee2c80bec72d --- /dev/null +++ b/workload-checks/README.md @@ -0,0 +1,9 @@ +# Workload Checks + +The `smp` tool performs a nightly run of 'checks' to determine if Vector is fit for purpose. +The 'checks' can help us answer questions about CPU usage, memory consumption, throughput etc. +By consistently running these checks we establish a historical dataset [here](https://app.datadoghq.com/dashboard/wj9-9ds-q49?refresh_mode=sliding&from_ts=1694089061369&to_ts=1694693861369&live=true). + +## Adding an Experiment + +You can read more about the workload requirements [here](https://github.com/DataDog/datadog-agent/blob/main/test/workload-checks/README.md). diff --git a/workload-checks/typical/cases/http_text_to_http_json/README.md b/workload-checks/typical/cases/http_text_to_http_json/README.md new file mode 100644 index 0000000000000..ec257135c5cf6 --- /dev/null +++ b/workload-checks/typical/cases/http_text_to_http_json/README.md @@ -0,0 +1,5 @@ +# HTTP Text To HTTP JSON + +## Purpose + +Simulates a simple Vector use with one HTTP server source and one HTTP sink. This was added as a proof of concept for the SMP workload checks. diff --git a/workload-checks/typical/cases/http_text_to_http_json/experiment.yaml b/workload-checks/typical/cases/http_text_to_http_json/experiment.yaml new file mode 100644 index 0000000000000..d640b98dfc1c5 --- /dev/null +++ b/workload-checks/typical/cases/http_text_to_http_json/experiment.yaml @@ -0,0 +1,21 @@ +description: > + Simulates a simple Vector use with one HTTP server source and one HTTP sink. + This was added as a proof of concept for the SMP workload checks. +teams: [] + +labels: {} + +checks: + - name: memory_usage + description: "Memory usage" + bounds: + series: rss_bytes + # The machine has 12Gb free. + upper_bound: 3.5Gb + + - name: cpu_utilization + description: "CPU utilization" + bounds: + series: cpu_percentage + # The machine has 8 cores available. + upper_bound: 400 diff --git a/workload-checks/typical/cases/http_text_to_http_json/lading/lading.yaml b/workload-checks/typical/cases/http_text_to_http_json/lading/lading.yaml new file mode 100644 index 0000000000000..24b27c1e26abb --- /dev/null +++ b/workload-checks/typical/cases/http_text_to_http_json/lading/lading.yaml @@ -0,0 +1,16 @@ +generator: + - http: + seed: [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, + 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131] + headers: {} + target_uri: "http://localhost:8282/" + bytes_per_second: "500 Mb" + parallel_connections: 10 + method: + post: + maximum_prebuild_cache_size_bytes: "256 Mb" + variant: "apache_common" + +blackhole: + - http: + binding_addr: "0.0.0.0:8080" diff --git a/workload-checks/typical/cases/http_text_to_http_json/vector/vector.toml b/workload-checks/typical/cases/http_text_to_http_json/vector/vector.toml new file mode 100644 index 0000000000000..0451005596036 --- /dev/null +++ b/workload-checks/typical/cases/http_text_to_http_json/vector/vector.toml @@ -0,0 +1,12 @@ +data_dir = "/var/lib/vector" + +[sources.logs] +type = "http_server" +address = "0.0.0.0:8282" +decoding.codec = "bytes" + +[sinks.http_sink] +type = "http" +uri = "http://localhost:8080" +inputs = ["logs"] +encoding.codec = "json" diff --git a/workload-checks/typical/machine.yaml b/workload-checks/typical/machine.yaml new file mode 100644 index 0000000000000..f16dab521f2fb --- /dev/null +++ b/workload-checks/typical/machine.yaml @@ -0,0 +1,7 @@ +description: > + An ‘average’ customer server on which the agent runs alongside user + software. This is equivalent to an AWS c5.2xlarge with 4Gb of system memory + held back for system processes. +name: typical +cpu: 8 +memory: 12Gb From a1012737464a41a10402a1f487f1aad8224ecbb4 Mon Sep 17 00:00:00 2001 From: Pavlos Rontidis Date: Mon, 18 Sep 2023 11:16:51 -0400 Subject: [PATCH 02/15] workflow name --- .github/workflows/workload_checks.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/workload_checks.yml b/.github/workflows/workload_checks.yml index df2f5743b92d1..fd670631c5cf5 100644 --- a/.github/workflows/workload_checks.yml +++ b/.github/workflows/workload_checks.yml @@ -1,11 +1,11 @@ -# Workload Checks +# Workload Checks Suite # # Runs Vector Workload Checks. # # Runs on: # - scheduled daily UTC midnight -# This workflow runs the collection of our workload checks, using the repo HEAD SHA, +# This workflow runs the collection of our workload checks, using the Vector repo HEAD SHA, # which depends on when the workflow is invoked. # # The goal is to establish a baseline of check results for a variety of cases @@ -13,7 +13,7 @@ # # The HEAD SHA is also used to tag the Vector Docker image. -name: Workload Checks +name: Workload Checks Suite on: workflow_call: From cf9ece68a9fe682daff11051a5eac6fd74c2d8c5 Mon Sep 17 00:00:00 2001 From: Pavlos Rontidis Date: Mon, 18 Sep 2023 11:23:46 -0400 Subject: [PATCH 03/15] more fixes to job deps --- .github/workflows/workload_checks.yml | 73 +++++++-------------------- 1 file changed, 18 insertions(+), 55 deletions(-) diff --git a/.github/workflows/workload_checks.yml b/.github/workflows/workload_checks.yml index fd670631c5cf5..19dc0690d24ee 100644 --- a/.github/workflows/workload_checks.yml +++ b/.github/workflows/workload_checks.yml @@ -31,22 +31,15 @@ jobs: compute-metadata: name: Compute metadata runs-on: ubuntu-22.04 - needs: should-run outputs: - target-sha: ${{ steps.pr-metadata-comment.outputs.TARGET_SHA }} - - # below are used in the experiment/analyze jobs - cpus: ${{ steps.system.outputs.CPUS }} - memory: ${{ steps.system.outputs.MEMORY }} - vector-cpus: ${{ steps.system.outputs.VECTOR_CPUS }} - replicas: ${{ steps.experimental-meta.outputs.REPLICAS }} warmup-seconds: ${{ steps.experimental-meta.outputs.WARMUP_SECONDS }} total-samples: ${{ steps.experimental-meta.outputs.TOTAL_SAMPLES }} - p-value: ${{ steps.experimental-meta.outputs.P_VALUE }} smp-version: ${{ steps.experimental-meta.outputs.SMP_CRATE_VERSION }} lading-version: ${{ steps.experimental-meta.outputs.LADING_VERSION }} + target-sha: ${{ steps.git-metadata.outputs.TARGET_SHA }} + steps: - uses: actions/checkout@v3 with: @@ -57,7 +50,7 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - export TARGET_SHA=$(git merge-base master HEAD) + export TARGET_SHA=$(git rev-parse HEAD) echo "TARGET_SHA=${TARGET_SHA}" >> $GITHUB_OUTPUT echo "target sha is: ${TARGET_SHA}" @@ -73,39 +66,21 @@ jobs: export WARMUP_SECONDS="45" export REPLICAS="10" export TOTAL_SAMPLES="600" - export P_VALUE="0.1" export SMP_CRATE_VERSION="0.10.0" export LADING_VERSION="0.18.0" echo "warmup seconds: ${WARMUP_SECONDS}" echo "replicas: ${REPLICAS}" echo "total samples: ${TOTAL_SAMPLES}" - echo "regression p-value: ${P_VALUE}" echo "smp crate version: ${SMP_CRATE_VERSION}" echo "lading version: ${LADING_VERSION}" echo "WARMUP_SECONDS=${WARMUP_SECONDS}" >> $GITHUB_OUTPUT echo "REPLICAS=${REPLICAS}" >> $GITHUB_OUTPUT echo "TOTAL_SAMPLES=${TOTAL_SAMPLES}" >> $GITHUB_OUTPUT - echo "P_VALUE=${P_VALUE}" >> $GITHUB_OUTPUT echo "SMP_CRATE_VERSION=${SMP_CRATE_VERSION}" >> $GITHUB_OUTPUT echo "LADING_VERSION=${LADING_VERSION}" >> $GITHUB_OUTPUT - - name: Setup system details - id: system - run: | - export CPUS="7" - export MEMORY="30g" - export VECTOR_CPUS="4" - - echo "cpus total: ${CPUS}" - echo "memory total: ${MEMORY}" - echo "vector cpus: ${VECTOR_CPUS}" - - echo "CPUS=${CPUS}" >> $GITHUB_OUTPUT - echo "MEMORY=${MEMORY}" >> $GITHUB_OUTPUT - echo "VECTOR_CPUS=${VECTOR_CPUS}" >> $GITHUB_OUTPUT - ## ## BUILD ## @@ -118,6 +93,8 @@ jobs: steps: - uses: colpal/actions-clean@v1 + - uses: actions/checkout@v3 + - uses: actions/checkout@v3 with: ref: ${{ needs.compute-metadata.outputs.target-sha }} @@ -162,10 +139,6 @@ jobs: run: | aws s3 cp s3://smp-cli-releases/v${{ needs.compute-metadata.outputs.smp-version }}/x86_64-unknown-linux-gnu/smp ${{ runner.temp }}/bin/smp - ## - ## SUBMIT - ## - upload-target-image-to-ecr: name: Upload target images to ECR runs-on: ubuntu-22.04 @@ -204,6 +177,10 @@ jobs: docker tag vector:${{ needs.compute-metadata.outputs.target-sha }} ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.target-sha }} docker push ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.target-sha }} + ## + ## SUBMIT + ## + submit-job: name: Submit workload checks job runs-on: ubuntu-22.04 @@ -220,8 +197,8 @@ jobs: -H "Accept: application/vnd.github+json" \ /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.target-sha }} \ -f state='pending' \ - -f description='Experiments submitted to the Regression Detection cluster.' \ - -f context='Regression Detection Suite / submission' \ + -f description='Experiments submitted to the Workload Checks cluster.' \ + -f context='Workload Checks Suite / submission' \ -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - uses: actions/checkout@v3 @@ -245,21 +222,7 @@ jobs: env: RUST_LOG: info run: | - git fetch origin - - # Setup AWS credentials for single-machine-performance AWS account - AWS_NAMED_PROFILE="single-machine-performance" - SMP_ACCOUNT_ID=$(aws ssm get-parameter --region us-east-1 --name ci.datadog-agent.single-machine-performance-account-id --with-decryption --query "Parameter.Value" --out text) - SMP_ECR_URL=${SMP_ACCOUNT_ID}.dkr.ecr.us-west-2.amazonaws.com - SMP_AGENT_TEAM_ID=$(aws ssm get-parameter --region us-east-1 --name ci.datadog-agent.single-machine-performance-agent-team-id --with-decryption --query "Parameter.Value" --out text) - SMP_API=$(aws ssm get-parameter --region us-east-1 --name ci.datadog-agent.single-machine-performance-api --with-decryption --query "Parameter.Value" --out text) - aws configure set aws_access_key_id $(aws ssm get-parameter --region us-east-1 --name ci.datadog-agent.single-machine-performance-bot-access-key-id --with-decryption --query "Parameter.Value" --out text) --profile ${AWS_NAMED_PROFILE} - aws configure set aws_secret_access_key $(aws ssm get-parameter --region us-east-1 --name ci.datadog-agent.single-machine-performance-bot-access-key --with-decryption --query "Parameter.Value" --out text) --profile ${AWS_NAMED_PROFILE} - aws configure set region us-west-2 --profile ${AWS_NAMED_PROFILE} - - # Download smp binary and prepare it for use - aws --profile single-machine-performance s3 cp s3://smp-cli-releases/v${SMP_VERSION}/x86_64-unknown-linux-gnu/smp smp - chmod +x smp + chmod +x ${{ runner.temp }}/bin/smp TARGET_IMAGE =${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.target-sha }} CURRENT_DATE=$(date --utc '+%Y_%m_%d') @@ -323,8 +286,8 @@ jobs: -H "Accept: application/vnd.github+json" \ /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.target-sha }} \ -f state='failure' \ - -f description='Experiments submitted to the Regression Detection cluster cancelled.' \ - -f context='Regression Detection Suite / submission' \ + -f description='Experiments submitted to the Workload Checks cluster cancelled.' \ + -f context='Workload Checks Suite / submission' \ -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - name: Check status, success @@ -336,8 +299,8 @@ jobs: -H "Accept: application/vnd.github+json" \ /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.target-sha }} \ -f state='success' \ - -f description='Experiments submitted to the Regression Detection cluster successfully.' \ - -f context='Regression Detection Suite / submission' \ + -f description='Experiments submitted to the Workload Checks cluster successfully.' \ + -f context='Workload Checks Suite / submission' \ -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - name: Check status, failure @@ -350,6 +313,6 @@ jobs: -H "Accept: application/vnd.github+json" \ /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.target-sha }} \ -f state='success' \ - -f description='Experiments submitted to the Regression Detection Suite failed.' \ - -f context='Regression Detection Suite / submission' \ + -f description='Experiments submitted to the Workload Checks Suite failed.' \ + -f context='Workload Checks Suite / submission' \ -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} From ce80946bdf3a8669c3af1f9f628bded09763441d Mon Sep 17 00:00:00 2001 From: Pavlos Rontidis Date: Mon, 18 Sep 2023 16:41:58 -0400 Subject: [PATCH 04/15] try with nightly image --- .github/workflows/workload_checks.yml | 185 +++++++++++++------------- 1 file changed, 93 insertions(+), 92 deletions(-) diff --git a/.github/workflows/workload_checks.yml b/.github/workflows/workload_checks.yml index 19dc0690d24ee..c333a6014f6e8 100644 --- a/.github/workflows/workload_checks.yml +++ b/.github/workflows/workload_checks.yml @@ -85,97 +85,97 @@ jobs: ## BUILD ## - build-target: - name: Build target Vector container - runs-on: [linux, ubuntu-20.04-4core] - needs: - - compute-metadata - steps: - - uses: colpal/actions-clean@v1 - - - uses: actions/checkout@v3 - - - uses: actions/checkout@v3 - with: - ref: ${{ needs.compute-metadata.outputs.target-sha }} - path: target-vector - - - name: Set up Docker Buildx - id: buildx - uses: docker/setup-buildx-action@v3.0.0 - - - name: Build 'vector' target image - uses: docker/build-push-action@v5.0.0 - with: - context: target-vector/ - cache-from: type=gha - cache-to: type=gha,mode=max - file: regression/Dockerfile - builder: ${{ steps.buildx.outputs.name }} - outputs: type=docker,dest=${{ runner.temp }}/target-image.tar - tags: | - vector:${{ needs.compute-metadata.outputs.target-sha }} - - - name: Upload image as artifact - uses: actions/upload-artifact@v3 - with: - name: target-image - path: "${{ runner.temp }}/target-image.tar" - - confirm-valid-credentials: - name: Confirm AWS credentials are minimally valid - runs-on: ubuntu-22.04 - needs: - - compute-metadata - steps: - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v4.0.0 - with: - aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} - aws-region: us-west-2 - - - name: Download SMP binary - run: | - aws s3 cp s3://smp-cli-releases/v${{ needs.compute-metadata.outputs.smp-version }}/x86_64-unknown-linux-gnu/smp ${{ runner.temp }}/bin/smp - - upload-target-image-to-ecr: - name: Upload target images to ECR - runs-on: ubuntu-22.04 - needs: - - compute-metadata - - confirm-valid-credentials - - build-target - steps: - - name: 'Download target image' - uses: actions/download-artifact@v3 - with: - name: target-image - - - name: Load target image - run: | - docker load --input target-image.tar - - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v4.0.0 - with: - aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} - aws-region: us-west-2 - - - name: Login to Amazon ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v1 - - - name: Docker Login to ECR - uses: docker/login-action@v2 - with: - registry: ${{ steps.login-ecr.outputs.registry }} - - - name: Tag & push target image - run: | - docker tag vector:${{ needs.compute-metadata.outputs.target-sha }} ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.target-sha }} - docker push ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.target-sha }} +# build-target: +# name: Build target Vector container +# runs-on: [linux, ubuntu-20.04-4core] +# needs: +# - compute-metadata +# steps: +# - uses: colpal/actions-clean@v1 +# +# - uses: actions/checkout@v3 +# +# - uses: actions/checkout@v3 +# with: +# ref: ${{ needs.compute-metadata.outputs.target-sha }} +# path: target-vector +# +# - name: Set up Docker Buildx +# id: buildx +# uses: docker/setup-buildx-action@v3.0.0 +# +# - name: Build 'vector' target image +# uses: docker/build-push-action@v5.0.0 +# with: +# context: target-vector/ +# cache-from: type=gha +# cache-to: type=gha,mode=max +# file: regression/Dockerfile +# builder: ${{ steps.buildx.outputs.name }} +# outputs: type=docker,dest=${{ runner.temp }}/target-image.tar +# tags: | +# vector:${{ needs.compute-metadata.outputs.target-sha }} +# +# - name: Upload image as artifact +# uses: actions/upload-artifact@v3 +# with: +# name: target-image +# path: "${{ runner.temp }}/target-image.tar" +# +# confirm-valid-credentials: +# name: Confirm AWS credentials are minimally valid +# runs-on: ubuntu-22.04 +# needs: +# - compute-metadata +# steps: +# - name: Configure AWS Credentials +# uses: aws-actions/configure-aws-credentials@v4.0.0 +# with: +# aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} +# aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} +# aws-region: us-west-2 +# +# - name: Download SMP binary +# run: | +# aws s3 cp s3://smp-cli-releases/v${{ needs.compute-metadata.outputs.smp-version }}/x86_64-unknown-linux-gnu/smp ${{ runner.temp }}/bin/smp +# +# upload-target-image-to-ecr: +# name: Upload target images to ECR +# runs-on: ubuntu-22.04 +# needs: +# - compute-metadata +# - confirm-valid-credentials +# - build-target +# steps: +# - name: 'Download target image' +# uses: actions/download-artifact@v3 +# with: +# name: target-image +# +# - name: Load target image +# run: | +# docker load --input target-image.tar +# +# - name: Configure AWS Credentials +# uses: aws-actions/configure-aws-credentials@v4.0.0 +# with: +# aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} +# aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} +# aws-region: us-west-2 +# +# - name: Login to Amazon ECR +# id: login-ecr +# uses: aws-actions/amazon-ecr-login@v1 +# +# - name: Docker Login to ECR +# uses: docker/login-action@v2 +# with: +# registry: ${{ steps.login-ecr.outputs.registry }} +# +# - name: Tag & push target image +# run: | +# docker tag vector:${{ needs.compute-metadata.outputs.target-sha }} ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.target-sha }} +# docker push ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.target-sha }} ## ## SUBMIT @@ -224,7 +224,8 @@ jobs: run: | chmod +x ${{ runner.temp }}/bin/smp - TARGET_IMAGE =${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.target-sha }} +# TARGET_IMAGE =${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.target-sha }} + TARGET_IMAGE=timberio/vector:nightly-debian CURRENT_DATE=$(date --utc '+%Y_%m_%d') RUST_LOG="info,aws_config::profile::credentials=error" From 572226503cb27ee2f707fdab103c1e8bc3e92d1e Mon Sep 17 00:00:00 2001 From: Pavlos Rontidis Date: Mon, 18 Sep 2023 16:47:30 -0400 Subject: [PATCH 05/15] Update workload_checks.yml --- .github/workflows/workload_checks.yml | 150 ++++---------------------- 1 file changed, 20 insertions(+), 130 deletions(-) diff --git a/.github/workflows/workload_checks.yml b/.github/workflows/workload_checks.yml index c333a6014f6e8..c72f2f94e0c32 100644 --- a/.github/workflows/workload_checks.yml +++ b/.github/workflows/workload_checks.yml @@ -5,13 +5,11 @@ # Runs on: # - scheduled daily UTC midnight -# This workflow runs the collection of our workload checks, using the Vector repo HEAD SHA, +# This workflow runs the collection of our workload checks, using the latest Vector nightly image, # which depends on when the workflow is invoked. # # The goal is to establish a baseline of check results for a variety of cases # and visualize trends for important Vector use cases. -# -# The HEAD SHA is also used to tag the Vector Docker image. name: Workload Checks Suite @@ -81,112 +79,11 @@ jobs: echo "SMP_CRATE_VERSION=${SMP_CRATE_VERSION}" >> $GITHUB_OUTPUT echo "LADING_VERSION=${LADING_VERSION}" >> $GITHUB_OUTPUT - ## - ## BUILD - ## - -# build-target: -# name: Build target Vector container -# runs-on: [linux, ubuntu-20.04-4core] -# needs: -# - compute-metadata -# steps: -# - uses: colpal/actions-clean@v1 -# -# - uses: actions/checkout@v3 -# -# - uses: actions/checkout@v3 -# with: -# ref: ${{ needs.compute-metadata.outputs.target-sha }} -# path: target-vector -# -# - name: Set up Docker Buildx -# id: buildx -# uses: docker/setup-buildx-action@v3.0.0 -# -# - name: Build 'vector' target image -# uses: docker/build-push-action@v5.0.0 -# with: -# context: target-vector/ -# cache-from: type=gha -# cache-to: type=gha,mode=max -# file: regression/Dockerfile -# builder: ${{ steps.buildx.outputs.name }} -# outputs: type=docker,dest=${{ runner.temp }}/target-image.tar -# tags: | -# vector:${{ needs.compute-metadata.outputs.target-sha }} -# -# - name: Upload image as artifact -# uses: actions/upload-artifact@v3 -# with: -# name: target-image -# path: "${{ runner.temp }}/target-image.tar" -# -# confirm-valid-credentials: -# name: Confirm AWS credentials are minimally valid -# runs-on: ubuntu-22.04 -# needs: -# - compute-metadata -# steps: -# - name: Configure AWS Credentials -# uses: aws-actions/configure-aws-credentials@v4.0.0 -# with: -# aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} -# aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} -# aws-region: us-west-2 -# -# - name: Download SMP binary -# run: | -# aws s3 cp s3://smp-cli-releases/v${{ needs.compute-metadata.outputs.smp-version }}/x86_64-unknown-linux-gnu/smp ${{ runner.temp }}/bin/smp -# -# upload-target-image-to-ecr: -# name: Upload target images to ECR -# runs-on: ubuntu-22.04 -# needs: -# - compute-metadata -# - confirm-valid-credentials -# - build-target -# steps: -# - name: 'Download target image' -# uses: actions/download-artifact@v3 -# with: -# name: target-image -# -# - name: Load target image -# run: | -# docker load --input target-image.tar -# -# - name: Configure AWS Credentials -# uses: aws-actions/configure-aws-credentials@v4.0.0 -# with: -# aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} -# aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} -# aws-region: us-west-2 -# -# - name: Login to Amazon ECR -# id: login-ecr -# uses: aws-actions/amazon-ecr-login@v1 -# -# - name: Docker Login to ECR -# uses: docker/login-action@v2 -# with: -# registry: ${{ steps.login-ecr.outputs.registry }} -# -# - name: Tag & push target image -# run: | -# docker tag vector:${{ needs.compute-metadata.outputs.target-sha }} ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.target-sha }} -# docker push ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.target-sha }} - - ## - ## SUBMIT - ## - submit-job: name: Submit workload checks job runs-on: ubuntu-22.04 needs: - compute-metadata - - upload-target-image-to-ecr steps: - name: Check status, in-progress env: @@ -222,32 +119,25 @@ jobs: env: RUST_LOG: info run: | - chmod +x ${{ runner.temp }}/bin/smp - -# TARGET_IMAGE =${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.target-sha }} - TARGET_IMAGE=timberio/vector:nightly-debian - CURRENT_DATE=$(date --utc '+%Y_%m_%d') - - RUST_LOG="info,aws_config::profile::credentials=error" - RUST_LOG_DEBUG="debug,aws_config::profile::credentials=error" - - chmod +x ${{ runner.temp }}/bin/smp - - RUST_BACKTRACE=1 RUST_LOG="${RUST_LOG_DEBUG}" ${{ runner.temp }}/bin/smp \ - --team-id ${SMP_AGENT_TEAM_ID} --api-base ${SMP_API} --aws-named-profile ${AWS_NAMED_PROFILE} \ - job submit-workload \ - --lading-version ${LADING_VERSION} \ - --total-samples ${TOTAL_SAMPLES} \ - --warmup-seconds ${WARMUP_SECONDS} \ - --replicas ${REPLICAS} \ - --target-image ${TARGET_IMAGE} \ - --target-sha ${TARGET_SHA} \ - --target-config-dir ${{ github.workspace }}/workload-checks \ - --target-name vector \ - --target-command "/usr/local/bin/vector" \ - --target-environment-variables "DD_HOSTNAME=smp-workload-checks,DD_DD_URL=http://127.0.0.1:9092,DD_API_KEY=00000001" \ - --tags smp_status=nightly,client_team="agent",tag_date="${CURRENT_DATE}" \ - --submission-metadata ${{ runner.temp }}/submission-metadata + CURRENT_DATE=$(date --utc '+%Y_%m_%d') + RUST_LOG_DEBUG="debug,aws_config::profile::credentials=error" + + chmod +x ${{ runner.temp }}/bin/smp + RUST_BACKTRACE=1 RUST_LOG="${RUST_LOG_DEBUG}" ${{ runner.temp }}/bin/smp \ + --team-id ${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }} \ + job submit-workload \ + --lading-version ${{ needs.compute-metadata.outputs.lading-version }} \ + --total-samples ${{ needs.compute-metadata.outputs.total-samples }} \ + --warmup-seconds ${{ needs.compute-metadata.outputs.warmup-seconds }} \ + --replicas ${{ needs.compute-metadata.outputs.replicas }} \ + --target-image timberio/vector:nightly-debian \ + --target-sha ${{ needs.compute-metadata.outputs.target-sha }} \ + --target-config-dir ${{ github.workspace }}/workload-checks \ + --target-name vector \ + --target-command "/usr/bin/vector" \ + --target-environment-variables "DD_HOSTNAME=smp-workload-checks,DD_DD_URL=http://127.0.0.1:9092,DD_API_KEY=00000001" \ + --tags smp_status=nightly,client_team="agent",tag_date="${CURRENT_DATE}" \ + --submission-metadata ${{ runner.temp }}/submission-metadata - uses: actions/upload-artifact@v3 with: From 68a00bff9a377708b275f2114e51ebe860b99e32 Mon Sep 17 00:00:00 2001 From: Pavlos Rontidis Date: Wed, 20 Sep 2023 14:15:08 -0400 Subject: [PATCH 06/15] fix client_team tag --- .github/workflows/workload_checks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/workload_checks.yml b/.github/workflows/workload_checks.yml index c72f2f94e0c32..998d6364e9276 100644 --- a/.github/workflows/workload_checks.yml +++ b/.github/workflows/workload_checks.yml @@ -136,7 +136,7 @@ jobs: --target-name vector \ --target-command "/usr/bin/vector" \ --target-environment-variables "DD_HOSTNAME=smp-workload-checks,DD_DD_URL=http://127.0.0.1:9092,DD_API_KEY=00000001" \ - --tags smp_status=nightly,client_team="agent",tag_date="${CURRENT_DATE}" \ + --tags smp_status=nightly,client_team="vector",tag_date="${CURRENT_DATE}" \ --submission-metadata ${{ runner.temp }}/submission-metadata - uses: actions/upload-artifact@v3 From 101fd2ee98e7857a5f15699200a2439d18b3bc5c Mon Sep 17 00:00:00 2001 From: Pavlos Rontidis Date: Fri, 22 Sep 2023 10:08:48 -0400 Subject: [PATCH 07/15] trigger another run with new bounds --- .../typical/cases/http_text_to_http_json/experiment.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/workload-checks/typical/cases/http_text_to_http_json/experiment.yaml b/workload-checks/typical/cases/http_text_to_http_json/experiment.yaml index d640b98dfc1c5..3e3c2ba73b7c7 100644 --- a/workload-checks/typical/cases/http_text_to_http_json/experiment.yaml +++ b/workload-checks/typical/cases/http_text_to_http_json/experiment.yaml @@ -18,4 +18,5 @@ checks: bounds: series: cpu_percentage # The machine has 8 cores available. - upper_bound: 400 + lower_bound: 400 + upper_bound: 800 From 2525c4016f3229c05d0c0dfbbb0706d0207e6cf7 Mon Sep 17 00:00:00 2001 From: Pavlos Rontidis Date: Tue, 26 Sep 2023 16:32:14 -0400 Subject: [PATCH 08/15] Update .github/workflows/workload_checks.yml Co-authored-by: neuronull --- .github/workflows/workload_checks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/workload_checks.yml b/.github/workflows/workload_checks.yml index 998d6364e9276..9ed446c872ce1 100644 --- a/.github/workflows/workload_checks.yml +++ b/.github/workflows/workload_checks.yml @@ -3,7 +3,7 @@ # Runs Vector Workload Checks. # # Runs on: -# - scheduled daily UTC midnight +# - scheduled daily 04:00 UTC # This workflow runs the collection of our workload checks, using the latest Vector nightly image, # which depends on when the workflow is invoked. From 9e8307ac22c59b522996a88e1f0d8ee03b951329 Mon Sep 17 00:00:00 2001 From: Pavlos Rontidis Date: Tue, 26 Sep 2023 16:35:08 -0400 Subject: [PATCH 09/15] Update .github/workflows/workload_checks.yml Co-authored-by: neuronull --- .github/workflows/workload_checks.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/workload_checks.yml b/.github/workflows/workload_checks.yml index 9ed446c872ce1..e8d10ca624bc3 100644 --- a/.github/workflows/workload_checks.yml +++ b/.github/workflows/workload_checks.yml @@ -40,8 +40,6 @@ jobs: steps: - uses: actions/checkout@v3 - with: - fetch-depth: 1000 - name: Get git metadata id: git-metadata From dd6ac9383a6ac9a5f161691a9e08d908c8cf475a Mon Sep 17 00:00:00 2001 From: Pavlos Rontidis Date: Tue, 26 Sep 2023 16:35:35 -0400 Subject: [PATCH 10/15] Update workload-checks/typical/machine.yaml Co-authored-by: neuronull --- workload-checks/typical/machine.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workload-checks/typical/machine.yaml b/workload-checks/typical/machine.yaml index f16dab521f2fb..ad34e1a080142 100644 --- a/workload-checks/typical/machine.yaml +++ b/workload-checks/typical/machine.yaml @@ -1,5 +1,5 @@ description: > - An ‘average’ customer server on which the agent runs alongside user + An ‘average’ customer server on which vector runs alongside user software. This is equivalent to an AWS c5.2xlarge with 4Gb of system memory held back for system processes. name: typical From c0762c0bc2476ac8152aee1e0ce71125ee78514a Mon Sep 17 00:00:00 2001 From: Pavlos Rontidis Date: Tue, 26 Sep 2023 16:38:56 -0400 Subject: [PATCH 11/15] address Kyle's review points --- .github/workflows/workload_checks.yml | 4 ++-- workload-checks/README.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/workload_checks.yml b/.github/workflows/workload_checks.yml index e8d10ca624bc3..18bfe58bbc655 100644 --- a/.github/workflows/workload_checks.yml +++ b/.github/workflows/workload_checks.yml @@ -28,7 +28,7 @@ env: jobs: compute-metadata: name: Compute metadata - runs-on: ubuntu-22.04 + runs-on: ubuntu-latest outputs: replicas: ${{ steps.experimental-meta.outputs.REPLICAS }} warmup-seconds: ${{ steps.experimental-meta.outputs.WARMUP_SECONDS }} @@ -79,7 +79,7 @@ jobs: submit-job: name: Submit workload checks job - runs-on: ubuntu-22.04 + runs-on: ubuntu-latest needs: - compute-metadata steps: diff --git a/workload-checks/README.md b/workload-checks/README.md index eee2c80bec72d..20b783fe0c6e3 100644 --- a/workload-checks/README.md +++ b/workload-checks/README.md @@ -2,7 +2,7 @@ The `smp` tool performs a nightly run of 'checks' to determine if Vector is fit for purpose. The 'checks' can help us answer questions about CPU usage, memory consumption, throughput etc. -By consistently running these checks we establish a historical dataset [here](https://app.datadoghq.com/dashboard/wj9-9ds-q49?refresh_mode=sliding&from_ts=1694089061369&to_ts=1694693861369&live=true). +By consistently running these checks we establish a historical baseline that we can compare against. ## Adding an Experiment From 2a0ad9d46be148afd4f00edd3e42b15dd3a093d9 Mon Sep 17 00:00:00 2001 From: Pavlos Rontidis Date: Tue, 26 Sep 2023 16:39:38 -0400 Subject: [PATCH 12/15] Update .github/workflows/workload_checks.yml Co-authored-by: neuronull --- .github/workflows/workload_checks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/workload_checks.yml b/.github/workflows/workload_checks.yml index 18bfe58bbc655..6b749d978aa55 100644 --- a/.github/workflows/workload_checks.yml +++ b/.github/workflows/workload_checks.yml @@ -17,7 +17,7 @@ on: workflow_call: workflow_dispatch: schedule: - - cron: '0 0 * * *' + - cron: '0 4 * * *' pull_request: #TODO delete this before merging types: - opened From 005cc54b79bdfbb06e42916fddda92f98d960c22 Mon Sep 17 00:00:00 2001 From: Pavlos Rontidis Date: Tue, 26 Sep 2023 17:05:25 -0400 Subject: [PATCH 13/15] address Brian's review points --- workload-checks/README.md | 2 +- .../typical/cases/http_text_to_http_json/experiment.yaml | 6 +++--- .../typical/cases/http_text_to_http_json/lading/lading.yaml | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/workload-checks/README.md b/workload-checks/README.md index 20b783fe0c6e3..70f551da8f29b 100644 --- a/workload-checks/README.md +++ b/workload-checks/README.md @@ -6,4 +6,4 @@ By consistently running these checks we establish a historical baseline that we ## Adding an Experiment -You can read more about the workload requirements [here](https://github.com/DataDog/datadog-agent/blob/main/test/workload-checks/README.md). +You can read more about the workload requirements [here](https://datadoghq.atlassian.net/wiki/spaces/SMP/pages/3183248544/Workload+Checks+-+Getting+Started). diff --git a/workload-checks/typical/cases/http_text_to_http_json/experiment.yaml b/workload-checks/typical/cases/http_text_to_http_json/experiment.yaml index 3e3c2ba73b7c7..aa561ba19f031 100644 --- a/workload-checks/typical/cases/http_text_to_http_json/experiment.yaml +++ b/workload-checks/typical/cases/http_text_to_http_json/experiment.yaml @@ -11,12 +11,12 @@ checks: bounds: series: rss_bytes # The machine has 12Gb free. - upper_bound: 3.5Gb + upper_bound: 2Gb - name: cpu_utilization description: "CPU utilization" bounds: series: cpu_percentage # The machine has 8 cores available. - lower_bound: 400 - upper_bound: 800 + lower_bound: 0 + upper_bound: 600 diff --git a/workload-checks/typical/cases/http_text_to_http_json/lading/lading.yaml b/workload-checks/typical/cases/http_text_to_http_json/lading/lading.yaml index 24b27c1e26abb..d176da51bb336 100644 --- a/workload-checks/typical/cases/http_text_to_http_json/lading/lading.yaml +++ b/workload-checks/typical/cases/http_text_to_http_json/lading/lading.yaml @@ -4,7 +4,7 @@ generator: 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131] headers: {} target_uri: "http://localhost:8282/" - bytes_per_second: "500 Mb" + bytes_per_second: "250 Mb" parallel_connections: 10 method: post: From 48b87c0fb940e7e4679d0c59272386a43e5383e9 Mon Sep 17 00:00:00 2001 From: Pavlos Rontidis Date: Tue, 26 Sep 2023 17:05:51 -0400 Subject: [PATCH 14/15] delete pull_request trigger --- .github/workflows/workload_checks.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/workload_checks.yml b/.github/workflows/workload_checks.yml index 6b749d978aa55..b26f064709db3 100644 --- a/.github/workflows/workload_checks.yml +++ b/.github/workflows/workload_checks.yml @@ -18,10 +18,6 @@ on: workflow_dispatch: schedule: - cron: '0 4 * * *' - pull_request: #TODO delete this before merging - types: - - opened - - synchronize env: SINGLE_MACHINE_PERFORMANCE_API: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_API }} From 8768a24341a8bda07210a2fc191648207e13f1af Mon Sep 17 00:00:00 2001 From: Pavlos Rontidis Date: Tue, 26 Sep 2023 17:34:33 -0400 Subject: [PATCH 15/15] changed schedule, no weekends --- .github/workflows/workload_checks.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/workload_checks.yml b/.github/workflows/workload_checks.yml index b26f064709db3..3610c78bdfa15 100644 --- a/.github/workflows/workload_checks.yml +++ b/.github/workflows/workload_checks.yml @@ -3,7 +3,7 @@ # Runs Vector Workload Checks. # # Runs on: -# - scheduled daily 04:00 UTC +# - scheduled UTC midnight Tues-Sat # This workflow runs the collection of our workload checks, using the latest Vector nightly image, # which depends on when the workflow is invoked. @@ -17,7 +17,8 @@ on: workflow_call: workflow_dispatch: schedule: - - cron: '0 4 * * *' + # At midnight UTC Tue-Sat + - cron: '0 0 * * 2-6' env: SINGLE_MACHINE_PERFORMANCE_API: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_API }}