Skip to content

Commit

Permalink
apacheGH-38183: [CI][Python] Use pipx to install GCS testbench
Browse files Browse the repository at this point in the history
  • Loading branch information
pitrou committed Aug 27, 2024
1 parent b836662 commit 2148b25
Show file tree
Hide file tree
Showing 8 changed files with 64 additions and 61 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -474,9 +474,9 @@ jobs:
shell: bash
run: |
ci/scripts/install_gcs_testbench.sh default
echo "PYTHON_BIN_DIR=$(cygpath --windows $(dirname $(which python3.exe)))" >> $GITHUB_ENV
echo "TESTBENCH_BIN_DIR=$(cygpath --windows $(dirname $(which storage-testbench)))" >> $GITHUB_ENV
- name: Test
shell: msys2 {0}
run: |
PATH="$(cygpath --unix ${PYTHON_BIN_DIR}):${PATH}"
PATH="$(cygpath --unix ${TESTBENCH_BIN_DIR}):${PATH}"
ci/scripts/cpp_test.sh "$(pwd)" "$(pwd)/build"
12 changes: 7 additions & 5 deletions ci/docker/conda-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,19 @@ RUN mamba install -q -y \
valgrind && \
mamba clean --all

# We want to install the GCS testbench using the Conda base environment's Python,
# because the test environment's Python may later change.
ENV PIPX_PYTHON=/opt/conda/bin/python3
COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

# Ensure npm, node and azurite are on path. npm and node are required to install azurite, which will then need to
# be on the path for the tests to run.
# be on the path for the tests to run.
ENV PATH=/opt/conda/envs/arrow/bin:$PATH

COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_azurite.sh

# We want to install the GCS testbench using the same Python binary that the Conda code will use.
COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin

Expand Down
5 changes: 0 additions & 5 deletions ci/docker/conda-python.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,6 @@ RUN mamba install -q -y \
nomkl && \
mamba clean --all

# XXX The GCS testbench was already installed in conda-cpp.dockerfile,
# but we changed the installed Python version above, so we need to reinstall it.
COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

ENV ARROW_ACERO=ON \
ARROW_BUILD_STATIC=OFF \
ARROW_BUILD_TESTS=OFF \
Expand Down
6 changes: 4 additions & 2 deletions ci/scripts/install_gcs_testbench.bat
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@

@echo on

set GCS_TESTBENCH_VERSION="v0.36.0"
set GCS_TESTBENCH_VERSION="v0.40.0"

python -m pip install pipx || exit /B 1

@REM Install GCS testbench %GCS_TESTBENCH_VERSION%
python -m pip install ^
pipx install ^
"https://github.com/googleapis/storage-testbench/archive/%GCS_TESTBENCH_VERSION%.tar.gz" ^
|| exit /B 1
19 changes: 12 additions & 7 deletions ci/scripts/install_gcs_testbench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,24 @@ case "$(uname -m)" in
;;
esac

# On newer pythons install into the system will fail, so override that
export PIP_BREAK_SYSTEM_PACKAGES=1

version=$1
if [[ "${version}" -eq "default" ]]; then
version="v0.39.0"
# Latests versions of Testbench require newer setuptools
python3 -m pip install --upgrade setuptools
fi

export PIP_BREAK_SYSTEM_PACKAGES=1
python3 -m pip install pipx

# This script is run with PYTHON undefined in some places,
# but those only use older pythons.
if [[ -z "${PYTHON_VERSION}" ]] || [[ "${PYTHON_VERSION}" != "3.13" ]]; then
python3 -m pip install \
"https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz"
pipx_flags=--verbose
if [[ $(id -un) == "root" ]]; then
# Install globally as /root/.local/bin is typically not in $PATH
pipx_flags="${pipx_flags} --global"
fi
if [[ ! -z "${PIPX_PYTHON}" ]]; then
pipx_flags="${pipx_flags} --python ${PIPX_PYTHON}"
fi
pipx install ${pipx_flags} "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz"
fi
68 changes: 34 additions & 34 deletions cpp/src/arrow/filesystem/gcsfs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,52 +95,52 @@ class GcsTestbench : public ::testing::Environment {
if (const auto* env = std::getenv("PYTHON")) {
names = {env};
}
auto error = std::string(
"Could not start GCS emulator."
" Used the following list of python interpreter names:");
for (const auto& interpreter : names) {
auto exe_path = bp::search_path(interpreter);
error += " " + interpreter;
if (exe_path.empty()) {
error += " (exe not found)";
continue;
}
auto error = std::string("Could not start GCS emulator 'storage-testbench'");

bp::ipstream output;
server_process_ = bp::child(exe_path, "-m", "testbench", "--port", port_, group_,
bp::std_err > output);
auto testbench_is_running = [](bp::child& process, bp::ipstream& output) {
// Wait for message: "* Restarting with"
auto testbench_is_running = [&output, this](bp::child& process) {
std::string line;
std::chrono::time_point<std::chrono::steady_clock> end =
std::chrono::steady_clock::now() + std::chrono::seconds(10);
while (server_process_.valid() && server_process_.running() &&
std::chrono::steady_clock::now() < end) {
if (output.peek() && std::getline(output, line)) {
std::cerr << line << std::endl;
if (line.find("* Restarting with") != std::string::npos) return true;
} else {
std::this_thread::sleep_for(std::chrono::milliseconds(20));
}
std::string line;
std::chrono::time_point<std::chrono::steady_clock> end =
std::chrono::steady_clock::now() + std::chrono::seconds(10);
while (process.valid() && process.running() &&
std::chrono::steady_clock::now() < end) {
if (output.peek() && std::getline(output, line)) {
std::cerr << line << std::endl;
if (line.find("* Restarting with") != std::string::npos) return true;
} else {
std::this_thread::sleep_for(std::chrono::milliseconds(20));
}
return false;
};
}
return false;
};

if (testbench_is_running(server_process_)) break;
error += " (failed to start)";
server_process_.terminate();
server_process_.wait();
auto exe_path = bp::search_path("storage-testbench");
if (!exe_path.empty()) {
bp::ipstream output;
server_process_ =
bp::child(exe_path, "--port", port_, group_, bp::std_err > output);
if (!testbench_is_running(server_process_, output)) {
error += " (failed to start)";
server_process_.terminate();
server_process_.wait();
}
} else {
error += " (exe not found)";
}
if (!server_process_.valid()) {
error_ = std::move(error);
}
if (server_process_.valid() && server_process_.valid()) return;
error_ = std::move(error);
}

bool running() { return server_process_.running(); }

~GcsTestbench() override {
// Brutal shutdown, kill the full process group because the GCS testbench may launch
// additional children.
group_.terminate();
try {
group_.terminate();
} catch (bp::process_error&) {
}
if (server_process_.valid()) {
server_process_.wait();
}
Expand Down
7 changes: 3 additions & 4 deletions python/pyarrow/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,17 +233,16 @@ def minio_server_health_check(address):
def gcs_server():
port = find_free_port()
env = os.environ.copy()
args = [sys.executable, '-m', 'testbench', '--port', str(port)]
exe = 'storage-testbench'
args = [exe, '--port', str(port)]
proc = None
try:
# check first if testbench module is available
import testbench # noqa:F401
# start server
proc = subprocess.Popen(args, env=env)
# Make sure the server is alive.
if proc.poll() is not None:
pytest.skip(f"Command {args} did not start server successfully!")
except (ModuleNotFoundError, OSError) as e:
except OSError as e:
pytest.skip(f"Command {args} failed to execute: {e}")
else:
yield {
Expand Down
4 changes: 2 additions & 2 deletions r/tests/testthat/test-gcs.R
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,12 @@ test_that("GcsFileSystem$create() can read json_credentials", {
})

skip_on_cran()
skip_if_not(system('python -c "import testbench"') == 0, message = "googleapis-storage-testbench is not installed.")
skip_if_not(system('storage-testbench -h') == 0, message = "googleapis-storage-testbench is not installed.")
library(dplyr)

testbench_port <- Sys.getenv("TESTBENCH_PORT", "9001")

pid_minio <- sys::exec_background("python", c("-m", "testbench", "--port", testbench_port),
pid_minio <- sys::exec_background("storage-testbench", c("--port", testbench_port),
std_out = FALSE,
std_err = FALSE # TODO: is there a good place to send output?
)
Expand Down

0 comments on commit 2148b25

Please sign in to comment.