test(python): Reorganize benchmark test folder (pola-rs#6695)

Vincenthays · Feb 9, 2023 · 2dee8fa · 2dee8fa
1 parent 171e54f
commit 2dee8fa
Show file tree

Hide file tree

Showing 16 changed files with 335 additions and 196 deletions.
diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml
@@ -5,7 +5,7 @@ on:
     paths:
       - "polars/**"
       - "Cargo.toml"
-      - "py-polars/tests/db-benchmark/**"
+      - "py-polars/tests/benchmark/**"
       - ".github/workflows/benchmark.yaml"
 
 concurrency:
@@ -21,9 +21,9 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
-          python-version: "3.10"
-          cache: "pip"
-          cache-dependency-path: "py-polars/requirements-dev.txt"
+          python-version: '3.11'
+          cache: pip
+          cache-dependency-path: py-polars/requirements-dev.txt
 
       - name: Create virtual environment
         working-directory: py-polars
@@ -43,9 +43,9 @@ jobs:
           r-version: "3.5.3"
 
       - name: Generate data
-        working-directory: py-polars/tests/db-benchmark
+        working-directory: py-polars/tests/benchmark
         run: |
-          Rscript -e 'install.packages("data.table",repos = "https://cloud.r-project.org")'
+          Rscript -e 'install.packages("data.table", repos="https://cloud.r-project.org")'
           Rscript groupby-datagen.R 1e7 1e2 5 0
 
       - name: Set up Rust
@@ -58,26 +58,22 @@ jobs:
         with:
           workspaces: py-polars
 
-      - name: Install Polars
+      - name: Install Polars release build
         env:
           RUSTFLAGS: -C embed-bitcode
         working-directory: py-polars
         run: |
           source activate
           maturin develop --release -- -C codegen-units=8 -C lto=thin -C target-cpu=native
 
-      - name: Run benchmark tests - lazy versus eager
-        working-directory: py-polars/tests/db-benchmark
-        run: python lazy_vs_eager.py
+      - name: Run H2O AI database benchmark - on strings
+        working-directory: py-polars/tests/benchmark
+        run: python run_h2oai_benchmark.py on_strings
 
-      - name: Run benchmark tests - various
-        working-directory: py-polars/tests/db-benchmark
-        run: python various.py
+      - name: Run H2O AI database benchmark - on categoricals
+        working-directory: py-polars/tests/benchmark
+        run: python run_h2oai_benchmark.py
 
-      - name: Run benchmark tests - on strings
-        working-directory: py-polars/tests/db-benchmark
-        run: python main.py on_strings
-
-      - name: Run benchmark tests - on categoricals
-        working-directory: py-polars/tests/db-benchmark
-        run: python main.py
+      - name: Run various benchmark tests
+        working-directory: py-polars
+        run: pytest -m benchmark --durations 0 -v
diff --git a/.github/workflows/test-python.yaml b/.github/workflows/test-python.yaml
@@ -65,10 +65,10 @@ jobs:
           maturin develop
 
       - name: Run tests and report coverage
-        run: pytest --cov -n auto -m "slow or not slow"
+        run: pytest --cov -n auto -m "not benchmark"
 
       - name: Run doctests
-        run: python tests/docs/run_doc_examples.py
+        run: python tests/docs/run_doctest.py
 
       - name: Check import without optional dependencies
         run: |
@@ -129,7 +129,7 @@ jobs:
           pip install target/wheels/polars-*.whl
 
       - name: Run tests
-        run: pytest -n auto -m "slow or not slow"
+        run: pytest -n auto -m "not benchmark"
 
       - name: Check import without optional dependencies
         run: |

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -117,7 +117,7 @@ Create a new git branch from the `master` branch in your local repository, and s
 The Rust codebase is located in the `polars` directory, while the Python codebase is located in the `py-polars` directory.
 Both directories contain a `Makefile` with helpful commands. Most notably:
 
-- `make test` to run the test suite
+- `make test` to run the test suite (see the [test suite docs](/py-polars/tests/README.md) for more info)
 - `make pre-commit` to run autoformatting and linting
 
 Note that your work cannot be merged if these checks fail!

diff --git a/py-polars/Makefile b/py-polars/Makefile
@@ -46,16 +46,16 @@ test: venv build  ## Run fast unittests
 
 .PHONY: doctest
 doctest: venv build  ## Run doctests
-	$(VENV_BIN)/python tests/docs/run_doc_examples.py
+	$(VENV_BIN)/python tests/docs/run_doctest.py
 
 .PHONY: test-all
 test-all: venv build  ## Run all tests
 	$(VENV_BIN)/pytest -n auto -m "slow or not slow"
-	$(VENV_BIN)/python tests/docs/run_doc_examples.py
+	$(VENV_BIN)/python tests/docs/run_doctest.py
 
 .PHONY: coverage
 coverage: venv build  ## Run tests and report coverage
-	$(VENV_BIN)/pytest --cov -n auto -m "slow or not slow"
+	$(VENV_BIN)/pytest --cov -n auto -m "not benchmark"
 
 .PHONY: clean
 clean:  ## Clean up caches and build artifacts

diff --git a/py-polars/pyproject.toml b/py-polars/pyproject.toml
@@ -95,31 +95,6 @@ module = ["polars.*"]
 # thin wrapper around the PyO3 api to start with.
 warn_return_any = false
 
-[tool.pytest.ini_options]
-addopts = [
-  "--import-mode=importlib",
-  "-m not slow and not hypothesis", # Default to running fast tests only. To run ALL tests, run: pytest -m ""
-]
-markers = [
-  "slow: Tests with a longer than average runtime.",
-]
-
-[tool.coverage.run]
-source = ["polars"]
-branch = true
-
-[tool.coverage.report]
-fail_under = 85
-skip_covered = true
-show_missing = true
-exclude_lines = [
-  "pragma: no cover",
-  "@overload",
-  "except ImportError",
-  "if TYPE_CHECKING:",
-  "from typing_extensions import ",
-]
-
 [tool.ruff]
 target-version = "py37"
 line-length = 88
@@ -178,7 +153,31 @@ ban-relative-imports = "all"
 
 [tool.ruff.per-file-ignores]
 "polars/datatypes.py" = ["B019"]
-"tests/*/*.py" = ["D100", "D103"]
-"tests/docs/run_doc_examples.py" = ["D101", "D102", "D103"]
-"tests/parametric/__init__.py" = ["D104"]
-"tests/slow/__init__.py" = ["D104"]
+"tests/**/*.py" = ["D100", "D103"]
+
+[tool.pytest.ini_options]
+addopts = [
+  "--import-mode=importlib",
+  # Default to running fast tests only. To run ALL tests, run: pytest -m ""
+  "-m not slow and not hypothesis and not benchmark",
+]
+markers = [
+  "slow: Tests with a longer than average runtime.",
+  "benchmark: Tests that should be run on a Polars release build.",
+]
+
+[tool.coverage.run]
+source = ["polars"]
+branch = true
+
+[tool.coverage.report]
+fail_under = 85
+skip_covered = true
+show_missing = true
+exclude_lines = [
+  "pragma: no cover",
+  "@overload",
+  "except ImportError",
+  "if TYPE_CHECKING:",
+  "from typing_extensions import ",
+]
diff --git a/py-polars/tests/README.md b/py-polars/tests/README.md
@@ -0,0 +1,117 @@
+# Polars test suite
+
+This folder contains the main Polars test suite. This document contains some information on the various components of the test suite, as well as guidelines for writing new tests.
+
+The test suite contains four main components, each confined to their own folder: unit tests, parametric tests, benchmark tests, and doctests.
+
+Note that this test suite is indirectly responsible for testing Rust Polars as well. The Rust test suite is kept small to reduce compilation times. A lot of the Rust functionality is tested here instead.
+
+## Table of contents
+
+- [Unit tests](#unit-tests)
+- [Parametric tests](#parametric-tests)
+- [Doctests](#doctests)
+- [Benchmark tests](#benchmark-tests)
+
+## Unit tests
+
+The `unit` folder contains all regular unit tests.
+These tests are intended to make sure all Polars functionality works as intended.
+
+### Running unit tests
+
+Run unit tests by running `make test` from the `py-polars` folder. This will compile the Rust bindings and then run the unit tests.
+
+If you're working in the Python code only, you can avoid recompiling every time by simply running `pytest` instead.
+
+By default, slow tests are skipped. Slow tests are marked as such using a [custom pytest marker](https://docs.pytest.org/en/latest/example/markers.html).
+If you wish to run slow tests, run `pytest -m slow`.
+Or run `pytest -m ""` to run _all_ tests, regardless of marker.
+
+Tests can be run in parallel using [`pytext-xdist`](https://pytest-xdist.readthedocs.io/en/latest/). Run `pytest -n auto` to parallelize your test run.
+
+### Writing unit tests
+
+Whenever you add new functionality, you should also add matching unit tests.
+Add your tests to appropriate test module in the `unit` folder.
+Some guidelines to keep in mind:
+
+- Try to fully cover all possible inputs and edge cases you can think of.
+- Utilize pytest tools like [`fixture`](https://docs.pytest.org/en/latest/explanation/fixtures.html) and [`parametrize`](https://docs.pytest.org/en/latest/how-to/parametrize.html) where appropriate.
+- Since many tests will require some data to be defined first, it can be efficient to run multiple checks in a single test. This can also be addressed using pytest fixtures.
+- Unit tests should not depend on external factors, otherwise test parallelization will break.
+
+## Parametric tests
+
+The `parametric` folder contains parametric tests written using the [Hypothesis](https://hypothesis.readthedocs.io/) framework.
+These tests are intended to find and test edge cases by generating many random datapoints.
+
+### Running parametric tests
+
+Run parametric tests by running `pytest -m hypothesis`.
+
+Note that parametric tests are excluded by default when running `pytest`.
+You must explicitly specify `-m hypothesis` to run them.
+
+These tests _will_ be included when calculating test coverage, and will also be run as part of the `make test-all` make command.
+
+## Doctests
+
+The `docs` folder contains a script for running [`doctest`](https://docs.python.org/3/library/doctest.html).
+This folder does not contain any actual tests - rather, the script checks all docstrings in the Polars package for `Examples` sections, runs the code examples, and verifies the output.
+
+The aim of running `doctest` is to make sure the `Examples` sections in our docstrings are valid and remain up-to-date with code changes.
+
+### Running `doctest`
+
+To run the `doctest` module, run `make doctest` from the `py-polars` folder.
+You can also run the script directly from your virtual environment.
+
+Note that doctests are _not_ run using pytest. While pytest does have the capability to run doc examples, configuration options are too limited for our purposes.
+
+Doctests will _not_ count towards test coverage. They are not a substitute for unit tests, but rather intended to convey the intended use of the Polars API to the user.
+
+### Writing doc examples
+
+Almost all classes/methods/functions that are part of Polars' public API should include code examples in their docstring.
+These examples help users understand basic usage and allow us to illustrate more advanced concepts as well.
+Some guidelines for writing a good docstring `Examples` section:
+
+- Start with a minimal example that showcases the default functionality.
+- Showcase the effect of its parameters.
+- Showcase any special interactions when combined with other code.
+- Keep it succinct and avoid multiple examples showcasing the same thing.
+
+There are many great docstring examples already, just check other code if you need inspiration!
+
+In addition to the [regular options](https://docs.python.org/3/library/doctest.html#option-flags) available when writing doctests, the script configuration allows for a new `IGNORE_RESULT` directive. Use this directive if you want to ensure the code runs, but the output may be random by design or not interesting to check.
+
+```python
+>>> df.sample(n=2)  # doctest: +IGNORE_RESULT
+```
+
+## Benchmark tests
+
+The `benchmark` folder contains code for running the [H2O AI database benchmark](https://github.com/h2oai/db-benchmark).
+It also contains various other benchmark tests.
+
+The aim of this part of the test suite is to spot performance regressions in the code, and to verify that Polars functionality works as expected when run on a release build or at a larger scale.
+
+### Running the H2O AI database benchmark
+
+The benchmark is somewhat cumbersome to run locally. You must first generate the dataset using the R script provided in the `benchmark` folder. Afterwards, you can simply run the Python script to run the benchmark.
+
+Make sure to install a release build of Polars before running the benchmark to guarantee the best results.
+
+Refer to the [benchmark workflow](/.github/workflows/benchmark.yaml) for detailed steps.
+
+### Running other benchmark tests
+
+The other benchmark tests are run using pytest.
+Run `pytest -m benchmark --durations 0 -v` to run these tests and report run duration.
+
+Note that benchmark tests are excluded by default when running `pytest`.
+You must explicitly specify `-m benchmark` to run them.
+They will also be excluded when calculating test coverage.
+
+These tests _will_ be run as part of the `make test-all` make command.
diff --git a/py-polars/tests/benchmark/__init__.py b/py-polars/tests/benchmark/__init__.py
diff --git a/...lars/tests/db-benchmark/groupby-datagen.R → py-polars/tests/benchmark/groupby-datagen.R b/...lars/tests/db-benchmark/groupby-datagen.R → py-polars/tests/benchmark/groupby-datagen.R
diff --git a/py-polars/tests/db-benchmark/main.py → ...rs/tests/benchmark/run_h2oai_benchmark.py b/py-polars/tests/db-benchmark/main.py → ...rs/tests/benchmark/run_h2oai_benchmark.py
@@ -1,4 +1,13 @@
-# type: ignore
+"""
+Contains code for running the H2O AI database benchmark.
+
+First, run the R script to generate a dataset with set characteristics.
+Then run this script to get the runtime of certain queries.
+
+See:
+https://h2oai.github.io/db-benchmark/
+
+"""
 
 import sys
 import time
@@ -9,7 +18,7 @@
 
 print(pl.__version__)
 
-x = pl.read_csv(
+df = pl.read_csv(
     "G1_1e7_1e2_5_0.csv",
     dtypes={
         "id4": pl.Int32,
@@ -23,8 +32,8 @@
 ON_STRINGS = sys.argv.pop() == "on_strings"
 
 if not ON_STRINGS:
-    x = x.with_columns([pl.col(["id1", "id2", "id3"]).cast(pl.Categorical)])
-df = x.clone()
+    df = df.with_columns([pl.col(["id1", "id2", "id3"]).cast(pl.Categorical)])
+df = df.clone()
 x = df.lazy()
 
 t00 = time.time()
@@ -310,6 +319,8 @@
 assert out["id6"].to_list() == [2137755425]
 assert np.isclose(out["v3"].to_list(), 4.7040828499563754e8).all()
 
-if not ON_STRINGS and total_time > 12:
-    print("query took longer than 12s, may be noise")
+# Fail or pass the benchmark based on runtime. Used in the Benchmark CI workflow.
+threshold = 18 if ON_STRINGS else 12
+if total_time > threshold:
+    print(f"query took longer than {threshold}s, may be noise")
     exit(1)