Merge branch 'inject-build-pgo-profile' into update-pgo-1726276668

elastic · Sep 14, 2024 · 2b58bf3 · 2b58bf3
2 parents a606033 + 7bd059a
commit 2b58bf3
Show file tree

Hide file tree

Showing 27 changed files with 739 additions and 139 deletions.
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
@@ -3,6 +3,11 @@ name: benchmarks
 on:
   workflow_dispatch:
     inputs:
+      runStandalone:
+        description: 'Run the benchmarks against standalone APM Server with Moxy'
+        required: false
+        type: boolean
+        default: false
       profile:
         description: 'The system profile used to run the benchmarks'
         required: false
@@ -25,6 +30,7 @@ on:
 
 env:
   PNG_REPORT_FILE: out.png
+  BENCHMARK_CPU_OUT: default.pgo
   BENCHMARK_RESULT: benchmark-result.txt
   WORKING_DIRECTORY: testing/benchmark
 
@@ -38,12 +44,14 @@ jobs:
       run:
         working-directory: ${{ env.WORKING_DIRECTORY }}
     permissions:
-      contents: read
+      contents: write
       id-token: write
+      pull-requests: write
     env:
       SSH_KEY: ./id_rsa_terraform
       TF_VAR_private_key: ./id_rsa_terraform
       TF_VAR_public_key: ./id_rsa_terraform.pub
+      TF_VAR_run_standalone: ${{ inputs.runStandalone }}
       TFVARS_SOURCE: ${{ inputs.profile || 'system-profiles/8GBx1zone.tfvars' }} # // Default to use an 8gb profile
       TF_VAR_BUILD_ID: ${{ github.run_id }}
       TF_VAR_ENVIRONMENT: ci
@@ -52,6 +60,10 @@ jobs:
       GOBENCH_PASSWORD: ${{ secrets.GOBENCH_PASSWORD }}
       GOBENCH_USERNAME: ${{ secrets.GOBENCH_USERNAME }}
       GOBENCH_HOST: ${{ secrets.GOBENCH_HOST }}
+      # temporarily override to get faster feedback
+      BENCHMARK_WARMUP_TIME: 1m
+      BENCHMARK_COUNT: 2
+      BENCHMARK_TIME: 1m
     steps:
       - uses: actions/checkout@v4
 
@@ -104,8 +116,14 @@ jobs:
       - name: Build apmbench
         run: make apmbench $SSH_KEY terraform.tfvars
 
+      - name: Build APM Server and Moxy
+        if: ${{ inputs.runStandalone }}
+        run: |
+          make moxy
+          make apm-server
+
       - name: Override docker committed version
-        if: ${{ ! inputs.runOnStable }}
+        if: ${{ ! inputs.runOnStable && ! inputs.runStandalone}}
         run: make docker-override-committed-version
 
       - name: Spin up benchmark environment
@@ -118,13 +136,24 @@ jobs:
 
       - name: Run benchmarks autotuned
         if: ${{ inputs.benchmarkAgents == '' }}
-        run: make run-benchmark-autotuned index-benchmark-results
+        run: make run-benchmark-autotuned
 
       - name: Run benchmarks self tuned
         if: ${{ inputs.benchmarkAgents != '' }}
-        run: make run-benchmark index-benchmark-results
+        run: make run-benchmark
+
+      - name: Cat standalone server logs
+        if: ${{ inputs.runStandalone && failure() }}
+        run: make cat-apm-server-logs
+
+      # Results are only indexed and uploaded if the run happens on the main branch.
+
+      - name: Index benchmarks result
+        # if: github.ref == 'refs/heads/main'
+        run: make index-benchmark-results
 
       - name: Download PNG
+        # if: github.ref == 'refs/heads/main'
         run: >-
           ${{ github.workspace }}/.ci/scripts/download-png-from-kibana.sh
           ${{ secrets.KIBANA_BENCH_ENDPOINT }}
@@ -133,13 +162,15 @@ jobs:
           $PNG_REPORT_FILE
 
       - name: Upload PNG
+        # if: github.ref == 'refs/heads/main'
         uses: actions/upload-artifact@v4
         with:
           name: kibana-png-report
           path: ${{ env.WORKING_DIRECTORY }}/${{ env.PNG_REPORT_FILE }}
           if-no-files-found: error
 
       - name: Upload PNG to AWS S3
+        # if: github.ref == 'refs/heads/main'
         id: s3-upload-png
         env:
           AWS_DEFAULT_REGION: us-east-1
@@ -149,13 +180,49 @@ jobs:
           echo "png_report_url=https://elastic-apm-server-benchmark-reports.s3.amazonaws.com/${DEST_NAME}" >> "$GITHUB_OUTPUT"
 
       - name: Upload benchmark result
+        # if: github.ref == 'refs/heads/main'
         uses: actions/upload-artifact@v4
-        if: always()
         with:
           name: benchmark-result
           path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_RESULT }}
           if-no-files-found: error
 
+      # The next section injects CPU profile collected by apmbench into the build.
+      # By copying the profile, uploading it to the artifacts and pushing it
+      # via a PR to update default.pgo.
+
+      - name: Copy CPU profile
+        if: ${{ inputs.runStandalone }}
+        run: make cp-cpuprof
+
+      - name: Upload CPU profile
+        if: ${{ inputs.runStandalone }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: cpu-profile
+          path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }}
+          if-no-files-found: error
+
+      - name: Open PGO PR
+        if: ${{ inputs.runStandalone }}
+        run: |
+          cd "${{ github.workspace }}"
+          mv "$PROFILE_PATH" x-pack/apm-server/default.pgo
+          git config user.email "apm@elastic.co"
+          git config user.name "APM Server"
+          git fetch origin main
+          git checkout main
+          BRANCH="update-pgo-$(date +%s)"
+          git checkout -b "$BRANCH"
+          git add x-pack/apm-server/default.pgo
+          git commit -m "PGO: Update default.pgo from benchmarks $WORKFLOW."
+          git push -u origin "$BRANCH"
+          gh pr create -B main -H "$BRANCH" -t "PGO: Update default.pgo" -b "Update default.pgo CPU profile from the benchmarks [workflow]($WORKFLOW)." -R elastic/apm-server
+        env:
+          PROFILE_PATH: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          WORKFLOW: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}/attempts/${{ github.run_attempt }}
+
       - name: Tear down benchmark environment
         if: always()
         run: make destroy

diff --git a/systemtest/benchtest/profiles.go b/systemtest/benchtest/profiles.go
@@ -88,7 +88,7 @@ func (p *profiles) recordCPU() error {
 	if benchConfig.CPUProfile == "" {
 		return nil
 	}
-	duration := 2 * benchConfig.Benchtime
+	duration := benchConfig.Benchtime
 	profile, err := fetchProfile("/debug/pprof/profile", duration)
 	if err != nil {
 		return fmt.Errorf("failed to fetch CPU profile: %w", err)

diff --git a/systemtest/cmd/moxy/.gitignore b/systemtest/cmd/moxy/.gitignore
@@ -0,0 +1 @@
+moxy
diff --git a/systemtest/cmd/moxy/go.mod b/systemtest/cmd/moxy/go.mod
@@ -0,0 +1,10 @@
+module moxy
+
+go 1.22.5
+
+require (
+	github.com/klauspost/compress v1.17.9
+	go.uber.org/zap v1.27.0
+)
+
+require go.uber.org/multierr v1.10.0 // indirect
diff --git a/systemtest/cmd/moxy/go.sum b/systemtest/cmd/moxy/go.sum
@@ -0,0 +1,16 @@
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
+github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
+go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
+go.uber.org/multierr v1.10.0 h1:S0h4aNzvfcFsC3dRF1jLoaov7oRaKqRGC/pUEJ2yvPQ=
+go.uber.org/multierr v1.10.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
+go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8=
+go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/systemtest/cmd/moxy/main.go b/systemtest/cmd/moxy/main.go
@@ -0,0 +1,154 @@
+package main
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/base64"
+	"flag"
+	"fmt"
+	"io"
+	"net/http"
+	"sync"
+
+	"github.com/klauspost/compress/gzip"
+	"github.com/klauspost/compress/zstd"
+	"go.uber.org/zap"
+	"go.uber.org/zap/zapcore"
+)
+
+var memPool = sync.Pool{
+	New: func() interface{} {
+		return new(bytes.Buffer)
+	},
+}
+
+func main() {
+	logLevel := zap.LevelFlag(
+		"loglevel", zapcore.InfoLevel,
+		"set log level to one of: DEBUG, INFO (default), WARN, ERROR, DPANIC, PANIC, FATAL",
+	)
+	username := flag.String("username", "elastic", "authentication username to mimic ES")
+	password := flag.String("password", "", "authentication username to mimic ES")
+	flag.Parse()
+	zapcfg := zap.NewProductionConfig()
+	zapcfg.EncoderConfig.EncodeTime = zapcore.RFC3339TimeEncoder
+	zapcfg.EncoderConfig.EncodeLevel = zapcore.CapitalColorLevelEncoder
+	zapcfg.Encoding = "console"
+	zapcfg.Level = zap.NewAtomicLevelAt(*logLevel)
+	logger, err := zapcfg.Build()
+	if err != nil {
+		panic(err)
+	}
+	if *username == "" || *password == "" {
+		logger.Fatal("both username and password are required")
+	}
+	defer logger.Sync()
+	s := http.Server{
+		Addr:    ":9200",
+		Handler: handler(logger, *username, *password),
+	}
+	if err := s.ListenAndServe(); err != nil {
+		logger.Fatal("listen error", zap.Error(err))
+	}
+}
+
+func handler(logger *zap.Logger, username, password string) http.Handler {
+	expectedAuth := fmt.Sprintf("%s:%s", username, password)
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("X-Elastic-Product", "Elasticsearch")
+		switch r.URL.Path {
+		case "/":
+			w.Write([]byte(`{
+			"name": "instance-0000000001",
+			"cluster_name": "eca3b3c3bbee4816bb92f82184e328dd",
+			"cluster_uuid": "cc49813b6b8e2138fbb8243ae2b3deed",
+			"version": {
+				"number": "8.15.1",
+				"build_flavor": "default",
+				"build_type": "docker",
+				"build_hash": "253e8544a65ad44581194068936f2a5d57c2c051",
+				"build_date": "2024-09-02T22:04:47.310170297Z",
+				"build_snapshot": false,
+				"lucene_version": "9.11.1",
+				"minimum_wire_compatibility_version": "7.17.0",
+				"minimum_index_compatibility_version": "7.0.0"
+			},
+			"tagline": "You Know, for Search"
+			}`))
+			return
+		case "/_security/user/_has_privileges":
+			w.Write([]byte(`{"username":"admin","has_all_requested":true,"cluster":{},"index":{},"application":{"apm":{"-":{"event:write":true}}}}`))
+		case "/_bulk":
+			auth := r.Header.Get("Authorization")
+			actualAuth, err := base64.StdEncoding.DecodeString(auth)
+			if err != nil || string(actualAuth) != expectedAuth {
+				logger.Error(
+					"authentication failed",
+					zap.Error(err),
+					zap.String("actual", string(actualAuth)),
+					zap.String("expected", expectedAuth),
+				)
+				w.WriteHeader(http.StatusUnauthorized)
+				return
+			}
+
+			first := true
+			var body io.Reader
+			switch r.Header.Get("Content-Encoding") {
+			case "gzip":
+				r, err := gzip.NewReader(r.Body)
+				if err != nil {
+					logger.Error("gzip reader err", zap.Error(err))
+					http.Error(w, fmt.Sprintf("reader error: %v", err), http.StatusInternalServerError)
+					return
+				}
+				defer r.Close()
+				body = r
+			case "zstd":
+				r, err := zstd.NewReader(r.Body)
+				if err != nil {
+					logger.Error("zstd reader err", zap.Error(err))
+					http.Error(w, fmt.Sprintf("reader error: %v", err), http.StatusInternalServerError)
+					return
+				}
+				defer r.Close()
+				body = r
+			default:
+				body = r.Body
+			}
+
+			jsonw := memPool.Get().(*bytes.Buffer)
+			defer func() {
+				jsonw.Reset()
+				memPool.Put(jsonw)
+			}()
+
+			jsonw.Write([]byte(`{"items":[`))
+			scanner := bufio.NewScanner(body)
+			for scanner.Scan() {
+				// Action is always "create", skip decoding.
+				if !scanner.Scan() {
+					logger.Error("unexpected payload")
+					http.Error(w, "expected source", http.StatusInternalServerError)
+					return
+				}
+				if first {
+					first = false
+				} else {
+					jsonw.WriteByte(',')
+				}
+				jsonw.Write([]byte(`{"create":{"status":201}}`))
+			}
+			if err := scanner.Err(); err != nil {
+				logger.Error("scanner error", zap.Error(err))
+				http.Error(w, fmt.Sprintf("scanner error: %v", err), http.StatusInternalServerError)
+			} else {
+				jsonw.Write([]byte(`]}`))
+				w.Write(jsonw.Bytes())
+			}
+			// TODO additionally report events throughput metric here, to index into benchmarks.
+		default:
+			logger.Error("unknown path", zap.String("path", r.URL.Path))
+		}
+	})
+}