diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 7445501f00e8..a9f63ac92f83 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -443,7 +443,7 @@ jobs: container: image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${{ needs.build-buildtools-image.outputs.build-tools-tag }} # for changed limits, see comments on `options:` earlier in this file - options: --init --shm-size=512mb --ulimit memlock=67108864:67108864 + options: --init --shm-size=512mb --ulimit memlock=67108864:67108864 --cgroupns=private --security-opt umask=/sys/fs/cgroup strategy: fail-fast: false matrix: @@ -456,6 +456,9 @@ jobs: submodules: true fetch-depth: 1 + - name: Setup cgroup for use by test suite + run: sudo bash -x /setup_neon_testsuite_cgroup.bash + - name: Pytest regression tests uses: ./.github/actions/run-python-test-set with: @@ -472,6 +475,7 @@ jobs: CHECK_ONDISK_DATA_COMPATIBILITY: nonempty BUILD_TAG: ${{ needs.tag.outputs.build-tag }} PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring + NEON_TEST_SUITE_USE_CGROUPS: /sys/fs/cgroup/neon_testsuite - name: Merge and upload coverage data if: matrix.build_type == 'debug' && matrix.pg_version == 'v14' diff --git a/Dockerfile.buildtools b/Dockerfile.buildtools index 213aed167980..add46eca26d0 100644 --- a/Dockerfile.buildtools +++ b/Dockerfile.buildtools @@ -1,8 +1,5 @@ FROM debian:bullseye-slim -# Add nonroot user -RUN useradd -ms /bin/bash nonroot -b /home -SHELL ["/bin/bash", "-c"] # System deps RUN set -e \ @@ -43,6 +40,7 @@ RUN set -e \ openssh-client \ parallel \ pkg-config \ + sudo \ unzip \ wget \ xz-utils \ @@ -50,6 +48,17 @@ RUN set -e \ zstd \ && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* +# Add nonroot user +RUN useradd -ms /bin/bash nonroot -b /home +SHELL ["/bin/bash", "-c"] +RUN echo "#!/usr/bin/env bash \ +set -exuo pipefail \ +mkdir /sys/fs/cgroup/neon_testsuite \ +chown -R nonroot:nonroot /sys/fs/cgroup/neon_testsuite \ +echo SUCCESS: cgroup set up for user nonroot at /sys/fs/cgroup/neon_testsuite \ +" > /setup_neon_testsuite_cgroup.bash && chmod +x /setup_neon_testsuite_cgroup.bash +RUN echo "ALL ALL = (ALL) NOPASSWD: /setup_neon_testsuite_cgroup.bash" >> /etc/sudoers + # protobuf-compiler (protoc) ENV PROTOC_VERSION 25.1 RUN curl -fsSL "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-$(uname -m | sed 's/aarch64/aarch_64/g').zip" -o "protoc.zip" \ diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs index d3b0366d313e..dcad22b9925f 100644 --- a/control_plane/src/endpoint.rs +++ b/control_plane/src/endpoint.rs @@ -438,7 +438,7 @@ impl Endpoint { } fn wait_for_compute_ctl_to_exit(&self, send_sigterm: bool) -> Result<()> { - // TODO use background_process::stop_process instead + // TODO use background_process::stop_process instead: https://github.com/neondatabase/neon/pull/6482 let pidfile_path = self.endpoint_path().join("compute_ctl.pid"); let pid: u32 = std::fs::read_to_string(pidfile_path)?.parse()?; let pid = nix::unistd::Pid::from_raw(pid as i32); @@ -583,9 +583,21 @@ impl Endpoint { } let child = cmd.spawn()?; + // set up a scopeguard to kill & wait for the child in case we panic or bail below + let child = scopeguard::guard(child, |mut child| { + println!("SIGKILL & wait the started process"); + (|| { + // TODO: use another signal that can be caught by the child so it can clean up any children it spawned + child.kill().context("SIGKILL child")?; + child.wait().context("wait() for child process")?; + anyhow::Ok(()) + })() + .with_context(|| format!("scopeguard kill&wait child {child:?}")) + .unwrap(); + }); // Write down the pid so we can wait for it when we want to stop - // TODO use background_process::start_process instead + // TODO use background_process::start_process instead: https://github.com/neondatabase/neon/pull/6482 let pid = child.id(); let pidfile_path = self.endpoint_path().join("compute_ctl.pid"); std::fs::write(pidfile_path, pid.to_string())?; @@ -634,6 +646,9 @@ impl Endpoint { std::thread::sleep(ATTEMPT_INTERVAL); } + // disarm the scopeguard, let the child outlive this function (and neon_local invoction) + drop(scopeguard::ScopeGuard::into_inner(child)); + Ok(()) } diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 142c97d5c339..4da9cbd3f9cd 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -2,11 +2,13 @@ import abc import asyncio +import errno import filecmp import json import os import re import shutil +import signal import subprocess import tempfile import textwrap @@ -173,6 +175,35 @@ def versioned_pg_distrib_dir(pg_distrib_dir: Path, pg_version: PgVersion) -> Ite yield versioned_dir +@pytest.fixture(scope="function") +def test_cgroup_dir(request: FixtureRequest) -> Optional[Path]: + ## Use like so: + # systemd-run --user --shell + # cat /proc/self/cgroup + # # will look like so: 0::/user.slice/user-1000.slice/user@1000.service/app.slice/run-u5.service + # env \ + # NEON_TEST_SUITE_USE_CGROUPS=/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/app.slice/run-u5.service \ + # BUILD_TYPE=debug \ + # DEFAULT_PG_VERSION=15 \ + # ./scripts/pytest + + var = os.getenv("NEON_TEST_SUITE_USE_CGROUPS") + if var is None: + return None + root = Path(var) + assert root.is_dir() + assert ( + "cgroup2fs" + == subprocess.check_output( + ["stat", "--file-system", "--format=%T", root], + text=True, + ).strip() + ) + test_cgroup_dir: Path = get_test_cgroup_dir(request, root) + test_cgroup_dir.mkdir(exist_ok=False, parents=False) + return test_cgroup_dir + + def shareable_scope(fixture_name: str, config: Config) -> Literal["session", "function"]: """Return either session of function scope, depending on TEST_SHARED_FIXTURES envvar. @@ -447,6 +478,7 @@ def __init__( initial_tenant: Optional[TenantId] = None, initial_timeline: Optional[TimelineId] = None, pageserver_virtual_file_io_engine: Optional[str] = None, + test_cgroup_dir: Optional[Path] = None, ): self.repo_dir = repo_dir self.rust_log_override = rust_log_override @@ -483,6 +515,7 @@ def __init__( self.top_output_dir = top_output_dir self.pageserver_virtual_file_io_engine: Optional[str] = pageserver_virtual_file_io_engine + self.test_cgroup_dir = test_cgroup_dir assert test_name.startswith( "test_" @@ -876,6 +909,15 @@ def cleanup_remote_storage(self): x.do_cleanup() def __enter__(self) -> "NeonEnvBuilder": + if self.test_cgroup_dir is not None: + log.info(f"putting test runner into cgroup {self.test_cgroup_dir}") + procs_file = self.test_cgroup_dir / "cgroup.procs" + # TODO: auto-cleanup the cgroup, share code with __exit__ + pids = procs_file.read_text().split() + assert pids == [] + mypid = os.getpid() + with open(procs_file, "a") as f: + f.write(f" {mypid}\n") return self def __exit__( @@ -929,6 +971,57 @@ def __exit__( if cleanup_error is not None: cleanup_error = e + if self.test_cgroup_dir is not None: + # TODO: ensure that we're the only python thread running; + # otherwise, if a test leaks a thread, + # our checking here would race with that other thread. + # => https://github.com/neondatabase/neon/issues/6486 + log.info(f"check test runner cgroup for leaked processes: {self.test_cgroup_dir}") + procs_file = self.test_cgroup_dir / "cgroup.procs" + mypid = os.getpid() + # move ourselves out of cgroup + with open(self.test_cgroup_dir.parent / "cgroup.procs", "a") as f: + f.write(f"{mypid}") + # now freeze the test cgroup, so we can race-free list & SIGKILL the processes + freeze_file = self.test_cgroup_dir / "cgroup.freeze" + freeze_file.write_text("1") + # inspect remaining pids + pids = [int(pid) for pid in procs_file.read_text().split()] + had_leaked_pids = len(pids) > 0 + for pid in pids: + # dump info + try: + args_file = Path(f"/proc/{pid}/cmdline").read_bytes() + args_parsed = [ + bytes.decode("utf-8") for bytes in args_file.split(b"\x00") + ] # NULL-byte separated + args = f"{args_parsed}" + except Exception as e: + args = f"cmdline unavailable, {type(e)}, {e}" + log.warning(f"SIGKILLing leaked process: {pid}: {args}") + # send SIGKILL to the frozen process + os.kill(pid, signal.SIGKILL) + # thaw the cgroup + freeze_file.write_text("0") + # wait for processes to exit + while True: + try: + self.test_cgroup_dir.rmdir() + break + except OSError as e: + if e.errno != errno.EBUSY: + raise + pids = [int(pid) for pid in procs_file.read_text().split()] + if len(pids) == 0: + break + log.warning(f"waiting for pids to exit: {pids}") + time.sleep(0.1) + log.info("all pids exited and test cgroup has been deleted") + cleanup_cmd = ( + f"for pid in $(cat '{procs_file}'); do; kill -9 $pid; done; rmdir {procs_file}" + ) + assert not had_leaked_pids, f"had pids in test cgroup after NeonEnvBuilder teardown, see logs for details & cleanup help\ncleanup using bash command: {cleanup_cmd}\n" + class NeonEnv: """ @@ -1200,6 +1293,7 @@ def _shared_simple_env( pg_distrib_dir: Path, pg_version: PgVersion, pageserver_virtual_file_io_engine: str, + test_cgroup_dir: Optional[Path], ) -> Iterator[NeonEnv]: """ # Internal fixture backing the `neon_simple_env` fixture. If TEST_SHARED_FIXTURES @@ -1230,6 +1324,7 @@ def _shared_simple_env( test_name=request.node.name, test_output_dir=test_output_dir, pageserver_virtual_file_io_engine=pageserver_virtual_file_io_engine, + test_cgroup_dir=test_cgroup_dir, ) as builder: env = builder.init_start() @@ -1269,6 +1364,7 @@ def neon_env_builder( test_overlay_dir: Path, top_output_dir: Path, pageserver_virtual_file_io_engine: str, + test_cgroup_dir: Optional[Path], ) -> Iterator[NeonEnvBuilder]: """ Fixture to create a Neon environment for test. @@ -1302,6 +1398,7 @@ def neon_env_builder( test_name=request.node.name, test_output_dir=test_output_dir, test_overlay_dir=test_overlay_dir, + test_cgroup_dir=test_cgroup_dir, ) as builder: yield builder @@ -3636,6 +3733,10 @@ def get_test_repo_dir(request: FixtureRequest, top_output_dir: Path) -> Path: return get_test_output_dir(request, top_output_dir) / "repo" +def get_test_cgroup_dir(request: FixtureRequest, top_cgroup_dir: Path) -> Path: + return _get_test_dir(request, top_cgroup_dir, "") + + def pytest_addoption(parser: Parser): parser.addoption( "--preserve-database-files", diff --git a/test_runner/regress/test_import.py b/test_runner/regress/test_import.py index faedf5d9440b..30092fdc6e98 100644 --- a/test_runner/regress/test_import.py +++ b/test_runner/regress/test_import.py @@ -24,7 +24,8 @@ from fixtures.utils import subprocess_capture -def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_builder): +# fixture order matters here: neon_env_builder leaked process assertions +def test_import_from_vanilla(test_output_dir, pg_bin, neon_env_builder, vanilla_pg): # Put data in vanilla pg vanilla_pg.start() vanilla_pg.safe_psql("create user cloud_admin with password 'postgres' superuser") diff --git a/test_runner/regress/test_neon_local_cli.py b/test_runner/regress/test_neon_local_cli.py index 46b72fbca500..8edba49b8a6f 100644 --- a/test_runner/regress/test_neon_local_cli.py +++ b/test_runner/regress/test_neon_local_cli.py @@ -59,3 +59,5 @@ def test_neon_two_primary_endpoints_fail( env.neon_cli.endpoint_stop("ep1") # ep1 is stopped so create ep2 will succeed env.neon_cli.endpoint_start("ep2") + # cleanup + env.neon_cli.endpoint_stop("ep2")