diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index 899cae2b8658..d038f64f15b0 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -56,15 +56,26 @@ concurrency: jobs: bench: if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }} + strategy: + matrix: + include: + - DEFAULT_PG_VERSION: 14 + PLATFORM: "neon-staging" + region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }} + provisioner: 'k8s-pod' + - DEFAULT_PG_VERSION: 16 + PLATFORM: "azure-staging" + region_id: 'azure-eastus2' + provisioner: 'k8s-neonvm' env: TEST_PG_BENCH_DURATIONS_MATRIX: "300" TEST_PG_BENCH_SCALES_MATRIX: "10,100" POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install - DEFAULT_PG_VERSION: 14 + DEFAULT_PG_VERSION: ${{ matrix.DEFAULT_PG_VERSION }} TEST_OUTPUT: /tmp/test_output BUILD_TYPE: remote SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }} - PLATFORM: "neon-staging" + PLATFORM: ${{ matrix.PLATFORM }} runs-on: [ self-hosted, us-east-2, x64 ] container: @@ -85,9 +96,10 @@ jobs: id: create-neon-project uses: ./.github/actions/neon-project-create with: - region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }} + region_id: ${{ matrix.region_id }} postgres_version: ${{ env.DEFAULT_PG_VERSION }} api_key: ${{ secrets.NEON_STAGING_API_KEY }} + provisioner: ${{ matrix.provisioner }} - name: Run benchmark uses: ./.github/actions/run-python-test-set @@ -96,13 +108,14 @@ jobs: test_selection: performance run_in_parallel: false save_perf_report: ${{ env.SAVE_PERF_REPORT }} + pg_version: ${{ env.DEFAULT_PG_VERSION }} # Set --sparse-ordering option of pytest-order plugin # to ensure tests are running in order of appears in the file. # It's important for test_perf_pgbench.py::test_pgbench_remote_* tests extra_params: -m remote_cluster --sparse-ordering - --timeout 5400 + --timeout 14400 --ignore test_runner/performance/test_perf_olap.py --ignore test_runner/performance/test_perf_pgvector_queries.py --ignore test_runner/performance/test_logical_replication.py diff --git a/test_runner/performance/test_hot_page.py b/test_runner/performance/test_hot_page.py index d9785dd87e04..5e97c7cddf13 100644 --- a/test_runner/performance/test_hot_page.py +++ b/test_runner/performance/test_hot_page.py @@ -16,20 +16,34 @@ ) def test_hot_page(env: PgCompare): # Update the same page many times, then measure read performance - num_writes = 1000000 with closing(env.pg.connect()) as conn: with conn.cursor() as cur: cur.execute("drop table if exists t, f;") + num_writes = 1000000 - # Write many updates to the same row + # Use a PL/pgSQL block to perform many updates to the same row + # without depending on the latency between database client and postgres + # server + # - however a single staement should not run into a timeout so we increase it + cur.execute("SET statement_timeout = '4h';") with env.record_duration("write"): - cur.execute("create table t (i integer);") - cur.execute("insert into t values (0);") - for i in range(num_writes): - cur.execute(f"update t set i = {i};") + cur.execute( + f""" + DO $$ + BEGIN + create table t (i integer); + insert into t values (0); - # Write 3-4 MB to evict t from compute cache + FOR j IN 1..{num_writes} LOOP + update t set i = j; + END LOOP; + END $$; + """ + ) + + # Write ca 350 MB to evict t from compute shared buffers (128 MB) + # however it will still be in LFC, so I do not really understand the point of this test cur.execute("create table f (i integer);") cur.execute("insert into f values (generate_series(1,100000));") diff --git a/test_runner/performance/test_hot_table.py b/test_runner/performance/test_hot_table.py index 5fcffc8afb7a..9a78c92ec0e1 100644 --- a/test_runner/performance/test_hot_table.py +++ b/test_runner/performance/test_hot_table.py @@ -16,8 +16,8 @@ ) def test_hot_table(env: PgCompare): # Update a small table many times, then measure read performance - num_rows = 100000 # Slightly larger than shared buffers size TODO validate - num_writes = 1000000 + num_rows = 100000 # initial table size only about 4 MB + num_writes = 10000000 # write approximately 349 MB blocks > 128 MB shared_buffers num_reads = 10 with closing(env.pg.connect()) as conn: @@ -28,8 +28,21 @@ def test_hot_table(env: PgCompare): with env.record_duration("write"): cur.execute("create table t (i integer primary key);") cur.execute(f"insert into t values (generate_series(1,{num_rows}));") - for i in range(num_writes): - cur.execute(f"update t set i = {i + num_rows} WHERE i = {i};") + # PL/pgSQL block to perform updates (and avoid latency between client and server) + # - however a single staement should not run into a timeout so we increase it + cur.execute("SET statement_timeout = '4h';") + cur.execute( + f""" + DO $$ + DECLARE + r integer := {num_rows}; + BEGIN + FOR j IN 1..{num_writes} LOOP + UPDATE t SET i = j + r WHERE i = j; + END LOOP; + END $$; + """ + ) # Read the table with env.record_duration("read"):