From 2dee8fad33c306c9fe34f1c0540149f76c43d424 Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijn@degooijer.io>
Date: Thu, 9 Feb 2023 09:55:09 +0100
Subject: [PATCH] test(python): Reorganize benchmark test folder (#6695)

---
 .github/workflows/benchmark.yaml              |  36 ++---
 .github/workflows/test-python.yaml            |   6 +-
 CONTRIBUTING.md                               |   2 +-
 py-polars/Makefile                            |   6 +-
 py-polars/pyproject.toml                      |  57 ++++---
 py-polars/tests/README.md                     | 117 ++++++++++++++
 py-polars/tests/benchmark/__init__.py         |   0
 .../groupby-datagen.R                         |   0
 .../run_h2oai_benchmark.py}                   |  23 ++-
 py-polars/tests/benchmark/test_release.py     | 148 ++++++++++++++++++
 py-polars/tests/db-benchmark/lazy_vs_eager.py |   9 --
 py-polars/tests/db-benchmark/various.py       | 124 ---------------
 py-polars/tests/docs/__init__.py              |   0
 .../{run_doc_examples.py => run_doctest.py}   |   0
 py-polars/tests/parametric/test_testing.py    |   1 +
 py-polars/tests/unit/io/conftest.py           |   2 +-
 16 files changed, 335 insertions(+), 196 deletions(-)
 create mode 100644 py-polars/tests/README.md
 create mode 100644 py-polars/tests/benchmark/__init__.py
 rename py-polars/tests/{db-benchmark => benchmark}/groupby-datagen.R (100%)
 rename py-polars/tests/{db-benchmark/main.py => benchmark/run_h2oai_benchmark.py} (92%)
 create mode 100644 py-polars/tests/benchmark/test_release.py
 delete mode 100644 py-polars/tests/db-benchmark/lazy_vs_eager.py
 delete mode 100644 py-polars/tests/db-benchmark/various.py
 create mode 100644 py-polars/tests/docs/__init__.py
 rename py-polars/tests/docs/{run_doc_examples.py => run_doctest.py} (100%)

diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml
index 3605f5324eba3..576e18e1419c1 100644
--- a/.github/workflows/benchmark.yaml
+++ b/.github/workflows/benchmark.yaml
@@ -5,7 +5,7 @@ on:
     paths:
       - "polars/**"
       - "Cargo.toml"
-      - "py-polars/tests/db-benchmark/**"
+      - "py-polars/tests/benchmark/**"
       - ".github/workflows/benchmark.yaml"
 
 concurrency:
@@ -21,9 +21,9 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
-          python-version: "3.10"
-          cache: "pip"
-          cache-dependency-path: "py-polars/requirements-dev.txt"
+          python-version: '3.11'
+          cache: pip
+          cache-dependency-path: py-polars/requirements-dev.txt
 
       - name: Create virtual environment
         working-directory: py-polars
@@ -43,9 +43,9 @@ jobs:
           r-version: "3.5.3"
 
       - name: Generate data
-        working-directory: py-polars/tests/db-benchmark
+        working-directory: py-polars/tests/benchmark
         run: |
-          Rscript -e 'install.packages("data.table",repos = "https://cloud.r-project.org")'
+          Rscript -e 'install.packages("data.table", repos="https://cloud.r-project.org")'
           Rscript groupby-datagen.R 1e7 1e2 5 0
 
       - name: Set up Rust
@@ -58,7 +58,7 @@ jobs:
         with:
           workspaces: py-polars
 
-      - name: Install Polars
+      - name: Install Polars release build
         env:
           RUSTFLAGS: -C embed-bitcode
         working-directory: py-polars
@@ -66,18 +66,14 @@ jobs:
           source activate
           maturin develop --release -- -C codegen-units=8 -C lto=thin -C target-cpu=native
 
-      - name: Run benchmark tests - lazy versus eager
-        working-directory: py-polars/tests/db-benchmark
-        run: python lazy_vs_eager.py
+      - name: Run H2O AI database benchmark - on strings
+        working-directory: py-polars/tests/benchmark
+        run: python run_h2oai_benchmark.py on_strings
 
-      - name: Run benchmark tests - various
-        working-directory: py-polars/tests/db-benchmark
-        run: python various.py
+      - name: Run H2O AI database benchmark - on categoricals
+        working-directory: py-polars/tests/benchmark
+        run: python run_h2oai_benchmark.py
 
-      - name: Run benchmark tests - on strings
-        working-directory: py-polars/tests/db-benchmark
-        run: python main.py on_strings
-
-      - name: Run benchmark tests - on categoricals
-        working-directory: py-polars/tests/db-benchmark
-        run: python main.py
+      - name: Run various benchmark tests
+        working-directory: py-polars
+        run: pytest -m benchmark --durations 0 -v
diff --git a/.github/workflows/test-python.yaml b/.github/workflows/test-python.yaml
index 3c10b57924ffd..13f944a882da9 100644
--- a/.github/workflows/test-python.yaml
+++ b/.github/workflows/test-python.yaml
@@ -65,10 +65,10 @@ jobs:
           maturin develop
 
       - name: Run tests and report coverage
-        run: pytest --cov -n auto -m "slow or not slow"
+        run: pytest --cov -n auto -m "not benchmark"
 
       - name: Run doctests
-        run: python tests/docs/run_doc_examples.py
+        run: python tests/docs/run_doctest.py
 
       - name: Check import without optional dependencies
         run: |
@@ -129,7 +129,7 @@ jobs:
           pip install target/wheels/polars-*.whl
 
       - name: Run tests
-        run: pytest -n auto -m "slow or not slow"
+        run: pytest -n auto -m "not benchmark"
 
       - name: Check import without optional dependencies
         run: |
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 97867339ec556..92726d2dfa3d9 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -117,7 +117,7 @@ Create a new git branch from the `master` branch in your local repository, and s
 The Rust codebase is located in the `polars` directory, while the Python codebase is located in the `py-polars` directory.
 Both directories contain a `Makefile` with helpful commands. Most notably:
 
-- `make test` to run the test suite
+- `make test` to run the test suite (see the [test suite docs](/py-polars/tests/README.md) for more info)
 - `make pre-commit` to run autoformatting and linting
 
 Note that your work cannot be merged if these checks fail!
diff --git a/py-polars/Makefile b/py-polars/Makefile
index 6678e01fe52bb..3fe607eedcc3a 100644
--- a/py-polars/Makefile
+++ b/py-polars/Makefile
@@ -46,16 +46,16 @@ test: venv build  ## Run fast unittests
 
 .PHONY: doctest
 doctest: venv build  ## Run doctests
-	$(VENV_BIN)/python tests/docs/run_doc_examples.py
+	$(VENV_BIN)/python tests/docs/run_doctest.py
 
 .PHONY: test-all
 test-all: venv build  ## Run all tests
 	$(VENV_BIN)/pytest -n auto -m "slow or not slow"
-	$(VENV_BIN)/python tests/docs/run_doc_examples.py
+	$(VENV_BIN)/python tests/docs/run_doctest.py
 
 .PHONY: coverage
 coverage: venv build  ## Run tests and report coverage
-	$(VENV_BIN)/pytest --cov -n auto -m "slow or not slow"
+	$(VENV_BIN)/pytest --cov -n auto -m "not benchmark"
 
 .PHONY: clean
 clean:  ## Clean up caches and build artifacts
diff --git a/py-polars/pyproject.toml b/py-polars/pyproject.toml
index 6f7adda985c67..b973529a0e8ca 100644
--- a/py-polars/pyproject.toml
+++ b/py-polars/pyproject.toml
@@ -95,31 +95,6 @@ module = ["polars.*"]
 # thin wrapper around the PyO3 api to start with.
 warn_return_any = false
 
-[tool.pytest.ini_options]
-addopts = [
-  "--import-mode=importlib",
-  "-m not slow and not hypothesis", # Default to running fast tests only. To run ALL tests, run: pytest -m ""
-]
-markers = [
-  "slow: Tests with a longer than average runtime.",
-]
-
-[tool.coverage.run]
-source = ["polars"]
-branch = true
-
-[tool.coverage.report]
-fail_under = 85
-skip_covered = true
-show_missing = true
-exclude_lines = [
-  "pragma: no cover",
-  "@overload",
-  "except ImportError",
-  "if TYPE_CHECKING:",
-  "from typing_extensions import ",
-]
-
 [tool.ruff]
 target-version = "py37"
 line-length = 88
@@ -178,7 +153,31 @@ ban-relative-imports = "all"
 
 [tool.ruff.per-file-ignores]
 "polars/datatypes.py" = ["B019"]
-"tests/*/*.py" = ["D100", "D103"]
-"tests/docs/run_doc_examples.py" = ["D101", "D102", "D103"]
-"tests/parametric/__init__.py" = ["D104"]
-"tests/slow/__init__.py" = ["D104"]
+"tests/**/*.py" = ["D100", "D103"]
+
+[tool.pytest.ini_options]
+addopts = [
+  "--import-mode=importlib",
+  # Default to running fast tests only. To run ALL tests, run: pytest -m ""
+  "-m not slow and not hypothesis and not benchmark",
+]
+markers = [
+  "slow: Tests with a longer than average runtime.",
+  "benchmark: Tests that should be run on a Polars release build.",
+]
+
+[tool.coverage.run]
+source = ["polars"]
+branch = true
+
+[tool.coverage.report]
+fail_under = 85
+skip_covered = true
+show_missing = true
+exclude_lines = [
+  "pragma: no cover",
+  "@overload",
+  "except ImportError",
+  "if TYPE_CHECKING:",
+  "from typing_extensions import ",
+]
diff --git a/py-polars/tests/README.md b/py-polars/tests/README.md
new file mode 100644
index 0000000000000..30286980ff196
--- /dev/null
+++ b/py-polars/tests/README.md
@@ -0,0 +1,117 @@
+# Polars test suite
+
+This folder contains the main Polars test suite. This document contains some information on the various components of the test suite, as well as guidelines for writing new tests.
+
+The test suite contains four main components, each confined to their own folder: unit tests, parametric tests, benchmark tests, and doctests.
+
+Note that this test suite is indirectly responsible for testing Rust Polars as well. The Rust test suite is kept small to reduce compilation times. A lot of the Rust functionality is tested here instead.
+
+## Table of contents
+
+- [Unit tests](#unit-tests)
+- [Parametric tests](#parametric-tests)
+- [Doctests](#doctests)
+- [Benchmark tests](#benchmark-tests)
+
+## Unit tests
+
+The `unit` folder contains all regular unit tests.
+These tests are intended to make sure all Polars functionality works as intended.
+
+### Running unit tests
+
+Run unit tests by running `make test` from the `py-polars` folder. This will compile the Rust bindings and then run the unit tests.
+
+If you're working in the Python code only, you can avoid recompiling every time by simply running `pytest` instead.
+
+By default, slow tests are skipped. Slow tests are marked as such using a [custom pytest marker](https://docs.pytest.org/en/latest/example/markers.html).
+If you wish to run slow tests, run `pytest -m slow`.
+Or run `pytest -m ""` to run _all_ tests, regardless of marker.
+
+Tests can be run in parallel using [`pytext-xdist`](https://pytest-xdist.readthedocs.io/en/latest/). Run `pytest -n auto` to parallelize your test run.
+
+### Writing unit tests
+
+Whenever you add new functionality, you should also add matching unit tests.
+Add your tests to appropriate test module in the `unit` folder.
+Some guidelines to keep in mind:
+
+- Try to fully cover all possible inputs and edge cases you can think of.
+- Utilize pytest tools like [`fixture`](https://docs.pytest.org/en/latest/explanation/fixtures.html) and [`parametrize`](https://docs.pytest.org/en/latest/how-to/parametrize.html) where appropriate.
+- Since many tests will require some data to be defined first, it can be efficient to run multiple checks in a single test. This can also be addressed using pytest fixtures.
+- Unit tests should not depend on external factors, otherwise test parallelization will break.
+
+## Parametric tests
+
+The `parametric` folder contains parametric tests written using the [Hypothesis](https://hypothesis.readthedocs.io/) framework.
+These tests are intended to find and test edge cases by generating many random datapoints.
+
+### Running parametric tests
+
+Run parametric tests by running `pytest -m hypothesis`.
+
+Note that parametric tests are excluded by default when running `pytest`.
+You must explicitly specify `-m hypothesis` to run them.
+
+These tests _will_ be included when calculating test coverage, and will also be run as part of the `make test-all` make command.
+
+## Doctests
+
+The `docs` folder contains a script for running [`doctest`](https://docs.python.org/3/library/doctest.html).
+This folder does not contain any actual tests - rather, the script checks all docstrings in the Polars package for `Examples` sections, runs the code examples, and verifies the output.
+
+The aim of running `doctest` is to make sure the `Examples` sections in our docstrings are valid and remain up-to-date with code changes.
+
+### Running `doctest`
+
+To run the `doctest` module, run `make doctest` from the `py-polars` folder.
+You can also run the script directly from your virtual environment.
+
+Note that doctests are _not_ run using pytest. While pytest does have the capability to run doc examples, configuration options are too limited for our purposes.
+
+Doctests will _not_ count towards test coverage. They are not a substitute for unit tests, but rather intended to convey the intended use of the Polars API to the user.
+
+### Writing doc examples
+
+Almost all classes/methods/functions that are part of Polars' public API should include code examples in their docstring.
+These examples help users understand basic usage and allow us to illustrate more advanced concepts as well.
+Some guidelines for writing a good docstring `Examples` section:
+
+- Start with a minimal example that showcases the default functionality.
+- Showcase the effect of its parameters.
+- Showcase any special interactions when combined with other code.
+- Keep it succinct and avoid multiple examples showcasing the same thing.
+
+There are many great docstring examples already, just check other code if you need inspiration!
+
+In addition to the [regular options](https://docs.python.org/3/library/doctest.html#option-flags) available when writing doctests, the script configuration allows for a new `IGNORE_RESULT` directive. Use this directive if you want to ensure the code runs, but the output may be random by design or not interesting to check.
+
+```python
+>>> df.sample(n=2)  # doctest: +IGNORE_RESULT
+```
+
+## Benchmark tests
+
+The `benchmark` folder contains code for running the [H2O AI database benchmark](https://github.com/h2oai/db-benchmark).
+It also contains various other benchmark tests.
+
+The aim of this part of the test suite is to spot performance regressions in the code, and to verify that Polars functionality works as expected when run on a release build or at a larger scale.
+
+### Running the H2O AI database benchmark
+
+The benchmark is somewhat cumbersome to run locally. You must first generate the dataset using the R script provided in the `benchmark` folder. Afterwards, you can simply run the Python script to run the benchmark.
+
+Make sure to install a release build of Polars before running the benchmark to guarantee the best results.
+
+Refer to the [benchmark workflow](/.github/workflows/benchmark.yaml) for detailed steps.
+
+### Running other benchmark tests
+
+The other benchmark tests are run using pytest.
+Run `pytest -m benchmark --durations 0 -v` to run these tests and report run duration.
+
+Note that benchmark tests are excluded by default when running `pytest`.
+You must explicitly specify `-m benchmark` to run them.
+They will also be excluded when calculating test coverage.
+
+These tests _will_ be run as part of the `make test-all` make command.
diff --git a/py-polars/tests/benchmark/__init__.py b/py-polars/tests/benchmark/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/py-polars/tests/db-benchmark/groupby-datagen.R b/py-polars/tests/benchmark/groupby-datagen.R
similarity index 100%
rename from py-polars/tests/db-benchmark/groupby-datagen.R
rename to py-polars/tests/benchmark/groupby-datagen.R
diff --git a/py-polars/tests/db-benchmark/main.py b/py-polars/tests/benchmark/run_h2oai_benchmark.py
similarity index 92%
rename from py-polars/tests/db-benchmark/main.py
rename to py-polars/tests/benchmark/run_h2oai_benchmark.py
index 710b1e7f5ab78..f691ef9cda5e6 100644
--- a/py-polars/tests/db-benchmark/main.py
+++ b/py-polars/tests/benchmark/run_h2oai_benchmark.py
@@ -1,4 +1,13 @@
-# type: ignore
+"""
+Contains code for running the H2O AI database benchmark.
+
+First, run the R script to generate a dataset with set characteristics.
+Then run this script to get the runtime of certain queries.
+
+See:
+https://h2oai.github.io/db-benchmark/
+
+"""
 
 import sys
 import time
@@ -9,7 +18,7 @@
 
 print(pl.__version__)
 
-x = pl.read_csv(
+df = pl.read_csv(
     "G1_1e7_1e2_5_0.csv",
     dtypes={
         "id4": pl.Int32,
@@ -23,8 +32,8 @@
 ON_STRINGS = sys.argv.pop() == "on_strings"
 
 if not ON_STRINGS:
-    x = x.with_columns([pl.col(["id1", "id2", "id3"]).cast(pl.Categorical)])
-df = x.clone()
+    df = df.with_columns([pl.col(["id1", "id2", "id3"]).cast(pl.Categorical)])
+df = df.clone()
 x = df.lazy()
 
 t00 = time.time()
@@ -310,6 +319,8 @@
 assert out["id6"].to_list() == [2137755425]
 assert np.isclose(out["v3"].to_list(), 4.7040828499563754e8).all()
 
-if not ON_STRINGS and total_time > 12:
-    print("query took longer than 12s, may be noise")
+# Fail or pass the benchmark based on runtime. Used in the Benchmark CI workflow.
+threshold = 18 if ON_STRINGS else 12
+if total_time > threshold:
+    print(f"query took longer than {threshold}s, may be noise")
     exit(1)
diff --git a/py-polars/tests/benchmark/test_release.py b/py-polars/tests/benchmark/test_release.py
new file mode 100644
index 0000000000000..21c68b12ea516
--- /dev/null
+++ b/py-polars/tests/benchmark/test_release.py
@@ -0,0 +1,148 @@
+"""
+Various benchmark tests.
+
+Tests in this module will be run in the CI using a release build of Polars.
+
+To run these tests: pytest -m benchmark
+"""
+import os
+import time
+from pathlib import Path
+from typing import cast
+
+import numpy as np
+import pytest
+
+import polars as pl
+from polars.testing import assert_frame_equal
+
+# Mark all tests in this module as benchmark tests
+pytestmark = pytest.mark.benchmark
+
+
+@pytest.mark.skipif(
+    not (Path(os.path.dirname(__file__)) / "G1_1e7_1e2_5_0.csv").is_file(),
+    reason="Dataset must be generated before running this test.",
+)
+def test_read_scan_large_csv() -> None:
+    filename = "G1_1e7_1e2_5_0.csv"
+    path = Path(os.path.dirname(__file__)) / filename
+
+    predicate = pl.col("v2") < 5
+
+    shape_eager = pl.read_csv(path).filter(predicate).shape
+    shape_lazy = (pl.scan_csv(path).filter(predicate)).collect().shape
+
+    assert shape_lazy == shape_eager
+
+
+def test_sort_nan_1942() -> None:
+    # https://github.com/pola-rs/polars/issues/1942
+    t0 = time.time()
+    pl.repeat(float("nan"), 2 << 12, eager=True).sort()
+    assert (time.time() - t0) < 1
+
+
+def test_mean_overflow() -> None:
+    np.random.seed(1)
+    expected = 769.5607652
+
+    df = pl.DataFrame(np.random.randint(500, 1040, 5000000), schema=["value"])
+
+    result = df.with_columns(pl.mean("value"))[0, 0]
+    assert np.isclose(result, expected)
+
+    result = df.with_columns(pl.col("value").cast(pl.Int32)).with_columns(
+        pl.mean("value")
+    )[0, 0]
+    assert np.isclose(result, expected)
+
+    result = df.with_columns(pl.col("value").cast(pl.Int32)).get_column("value").mean()
+    assert np.isclose(result, expected)
+
+
+def test_min_max_2850() -> None:
+    # https://github.com/pola-rs/polars/issues/2850
+    df = pl.DataFrame(
+        {
+            "id": [
+                130352432,
+                130352277,
+                130352611,
+                130352833,
+                130352305,
+                130352258,
+                130352764,
+                130352475,
+                130352368,
+                130352346,
+            ]
+        }
+    )
+
+    minimum = 130352258
+    maximum = 130352833.0
+
+    for _ in range(10):
+        permuted = df.sample(frac=1.0, seed=0)
+        computed = permuted.select(
+            [pl.col("id").min().alias("min"), pl.col("id").max().alias("max")]
+        )
+        assert cast(int, computed[0, "min"]) == minimum
+        assert cast(float, computed[0, "max"]) == maximum
+
+
+def test_windows_not_cached() -> None:
+    ldf = (
+        pl.DataFrame(
+            [
+                pl.Series("key", ["a", "a", "b", "b"]),
+                pl.Series("val", [2, 2, 1, 3]),
+            ]
+        )
+        .lazy()
+        .filter(
+            (pl.col("key").cumcount().over("key") == 0)
+            | (pl.col("val").shift(1).over("key").is_not_null())
+            | (pl.col("val") != pl.col("val").shift(1).over("key"))
+        )
+    )
+    # this might fail if they are cached
+    for _ in range(1000):
+        ldf.collect()
+
+
+def test_cross_join() -> None:
+    # triggers > 100 rows implementation
+    # https://github.com/pola-rs/polars/blob/5f5acb2a523ce01bc710768b396762b8e69a9e07/polars/polars-core/src/frame/cross_join.rs#L34
+    df1 = pl.DataFrame({"col1": ["a"], "col2": ["d"]})
+    df2 = pl.DataFrame({"frame2": pl.arange(0, 100, eager=True)})
+    out = df2.join(df1, how="cross")
+    df2 = pl.DataFrame({"frame2": pl.arange(0, 101, eager=True)})
+    assert_frame_equal(df2.join(df1, how="cross").slice(0, 100), out)
+
+
+def test_cross_join_slice_pushdown() -> None:
+    # this will likely go out of memory if we did not pushdown the slice
+    df = (
+        pl.Series("x", pl.arange(0, 2**16 - 1, eager=True, dtype=pl.UInt16) % 2**15)
+    ).to_frame()
+
+    result = df.lazy().join(df.lazy(), how="cross", suffix="_").slice(-5, 10).collect()
+    expected = pl.DataFrame(
+        {
+            "x": [32766, 32766, 32766, 32766, 32766],
+            "x_": [32762, 32763, 32764, 32765, 32766],
+        },
+        schema={"x": pl.UInt16, "x_": pl.UInt16},
+    )
+    assert_frame_equal(result, expected)
+
+    result = df.lazy().join(df.lazy(), how="cross", suffix="_").slice(2, 10).collect()
+    expected = pl.DataFrame(
+        {
+            "x": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            "x_": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
+        },
+        schema={"x": pl.UInt16, "x_": pl.UInt16},
+    )
diff --git a/py-polars/tests/db-benchmark/lazy_vs_eager.py b/py-polars/tests/db-benchmark/lazy_vs_eager.py
deleted file mode 100644
index 60e188ee46cd5..0000000000000
--- a/py-polars/tests/db-benchmark/lazy_vs_eager.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import polars as pl
-
-path = "G1_1e7_1e2_5_0.csv"
-predicate = pl.col("v2") < 5
-
-shape_eager = pl.read_csv(path).filter(predicate).shape
-
-shape_lazy = (pl.scan_csv(path).filter(predicate)).collect().shape
-assert shape_lazy == shape_eager
diff --git a/py-polars/tests/db-benchmark/various.py b/py-polars/tests/db-benchmark/various.py
deleted file mode 100644
index 3373b466d692f..0000000000000
--- a/py-polars/tests/db-benchmark/various.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# may contain many things that seemed to go wrong at scale
-
-import time
-from typing import cast
-
-import numpy as np
-
-import polars as pl
-from polars.testing import assert_frame_equal
-
-# https://github.com/pola-rs/polars/issues/1942
-t0 = time.time()
-pl.repeat(float("nan"), 2 << 12).sort()
-assert (time.time() - t0) < 1
-
-# test mean overflow issues
-np.random.seed(1)
-mean = 769.5607652
-df = pl.DataFrame(np.random.randint(500, 1040, 5000000), schema=["value"])
-assert np.isclose(df.with_columns(pl.mean("value"))[0, 0], mean)
-assert np.isclose(
-    df.with_columns(pl.col("value").cast(pl.Int32)).with_columns(pl.mean("value"))[
-        0, 0
-    ],
-    mean,
-)
-
-calculated_mean = (
-    df.with_columns(pl.col("value").cast(pl.Int32)).get_column("value").mean()
-)
-
-if calculated_mean is not None:
-    assert np.isclose(calculated_mean, mean)
-else:
-    raise AssertionError("mean is None")
-
-# https://github.com/pola-rs/polars/issues/2850
-df = pl.DataFrame(
-    {
-        "id": [
-            130352432,
-            130352277,
-            130352611,
-            130352833,
-            130352305,
-            130352258,
-            130352764,
-            130352475,
-            130352368,
-            130352346,
-        ]
-    }
-)
-
-minimum = 130352258
-maximum = 130352833.0
-
-for _ in range(10):
-    permuted = df.sample(frac=1.0, seed=0)
-    computed = permuted.select(
-        [pl.col("id").min().alias("min"), pl.col("id").max().alias("max")]
-    )
-    assert cast(int, computed[0, "min"]) == minimum
-    assert cast(float, computed[0, "max"]) == maximum
-
-
-def test_windows_not_cached() -> None:
-    ldf = (
-        pl.DataFrame(
-            [
-                pl.Series("key", ["a", "a", "b", "b"]),
-                pl.Series("val", [2, 2, 1, 3]),
-            ]
-        )
-        .lazy()
-        .filter(
-            (pl.col("key").cumcount().over("key") == 0)
-            | (pl.col("val").shift(1).over("key").is_not_null())
-            | (pl.col("val") != pl.col("val").shift(1).over("key"))
-        )
-    )
-    # this might fail if they are cached
-    for _ in range(1000):
-        ldf.collect()
-
-
-def test_cross_join() -> None:
-    # triggers > 100 rows implementation
-    # https://github.com/pola-rs/polars/blob/5f5acb2a523ce01bc710768b396762b8e69a9e07/polars/polars-core/src/frame/cross_join.rs#L34
-    df1 = pl.DataFrame({"col1": ["a"], "col2": ["d"]})
-    df2 = pl.DataFrame({"frame2": pl.arange(0, 100, eager=True)})
-    out = df2.join(df1, how="cross")
-    df2 = pl.DataFrame({"frame2": pl.arange(0, 101, eager=True)})
-    assert_frame_equal(df2.join(df1, how="cross").slice(0, 100), out)
-
-
-def test_cross_join_slice_pushdown() -> None:
-    # this will likely go out of memory if we did not pushdown the slice
-    df = pl.DataFrame(
-        [
-            pl.Series("x", pl.arange(0, 2**16 - 1, eager=True) % 2**15).cast(
-                pl.UInt16
-            )
-        ]
-    )
-
-    assert df.lazy().join(df.lazy(), how="cross", suffix="_").slice(
-        -5, 10
-    ).collect().to_dict(False) == {
-        "x": [32766, 32766, 32766, 32766, 32766],
-        "x_": [32762, 32763, 32764, 32765, 32766],
-    }
-
-    assert df.lazy().join(df.lazy(), how="cross", suffix="_").slice(
-        2, 10
-    ).collect().to_dict(False) == {
-        "x": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        "x_": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
-    }
-
-
-if __name__ == "__main__":
-    test_windows_not_cached()
-    test_cross_join()
diff --git a/py-polars/tests/docs/__init__.py b/py-polars/tests/docs/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/py-polars/tests/docs/run_doc_examples.py b/py-polars/tests/docs/run_doctest.py
similarity index 100%
rename from py-polars/tests/docs/run_doc_examples.py
rename to py-polars/tests/docs/run_doctest.py
diff --git a/py-polars/tests/parametric/test_testing.py b/py-polars/tests/parametric/test_testing.py
index 8b94130d854c5..3fbb6fb0f0362 100644
--- a/py-polars/tests/parametric/test_testing.py
+++ b/py-polars/tests/parametric/test_testing.py
@@ -204,6 +204,7 @@ def finite_float(value: Any) -> bool:
         assert all(finite_float(val) for val in df[col].to_list())
 
 
+@pytest.mark.hypothesis()
 def test_invalid_arguments() -> None:
     for invalid_probability in (-1.0, +2.0):
         with pytest.raises(InvalidArgument, match="between 0.0 and 1.0"):
diff --git a/py-polars/tests/unit/io/conftest.py b/py-polars/tests/unit/io/conftest.py
index 3280b937ce4fe..b488a9d29c443 100644
--- a/py-polars/tests/unit/io/conftest.py
+++ b/py-polars/tests/unit/io/conftest.py
@@ -8,5 +8,5 @@
 
 @pytest.fixture()
 def io_files_path() -> Path:
-    current_dir = os.path.abspath(os.path.dirname(__file__))
+    current_dir = os.path.dirname(__file__)
     return Path(current_dir) / "files"