From 2dee8fad33c306c9fe34f1c0540149f76c43d424 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Thu, 9 Feb 2023 09:55:09 +0100 Subject: [PATCH] test(python): Reorganize benchmark test folder (#6695) --- .github/workflows/benchmark.yaml | 36 ++--- .github/workflows/test-python.yaml | 6 +- CONTRIBUTING.md | 2 +- py-polars/Makefile | 6 +- py-polars/pyproject.toml | 57 ++++--- py-polars/tests/README.md | 117 ++++++++++++++ py-polars/tests/benchmark/__init__.py | 0 .../groupby-datagen.R | 0 .../run_h2oai_benchmark.py} | 23 ++- py-polars/tests/benchmark/test_release.py | 148 ++++++++++++++++++ py-polars/tests/db-benchmark/lazy_vs_eager.py | 9 -- py-polars/tests/db-benchmark/various.py | 124 --------------- py-polars/tests/docs/__init__.py | 0 .../{run_doc_examples.py => run_doctest.py} | 0 py-polars/tests/parametric/test_testing.py | 1 + py-polars/tests/unit/io/conftest.py | 2 +- 16 files changed, 335 insertions(+), 196 deletions(-) create mode 100644 py-polars/tests/README.md create mode 100644 py-polars/tests/benchmark/__init__.py rename py-polars/tests/{db-benchmark => benchmark}/groupby-datagen.R (100%) rename py-polars/tests/{db-benchmark/main.py => benchmark/run_h2oai_benchmark.py} (92%) create mode 100644 py-polars/tests/benchmark/test_release.py delete mode 100644 py-polars/tests/db-benchmark/lazy_vs_eager.py delete mode 100644 py-polars/tests/db-benchmark/various.py create mode 100644 py-polars/tests/docs/__init__.py rename py-polars/tests/docs/{run_doc_examples.py => run_doctest.py} (100%) diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml index 3605f5324eba3..576e18e1419c1 100644 --- a/.github/workflows/benchmark.yaml +++ b/.github/workflows/benchmark.yaml @@ -5,7 +5,7 @@ on: paths: - "polars/**" - "Cargo.toml" - - "py-polars/tests/db-benchmark/**" + - "py-polars/tests/benchmark/**" - ".github/workflows/benchmark.yaml" concurrency: @@ -21,9 +21,9 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: "3.10" - cache: "pip" - cache-dependency-path: "py-polars/requirements-dev.txt" + python-version: '3.11' + cache: pip + cache-dependency-path: py-polars/requirements-dev.txt - name: Create virtual environment working-directory: py-polars @@ -43,9 +43,9 @@ jobs: r-version: "3.5.3" - name: Generate data - working-directory: py-polars/tests/db-benchmark + working-directory: py-polars/tests/benchmark run: | - Rscript -e 'install.packages("data.table",repos = "https://cloud.r-project.org")' + Rscript -e 'install.packages("data.table", repos="https://cloud.r-project.org")' Rscript groupby-datagen.R 1e7 1e2 5 0 - name: Set up Rust @@ -58,7 +58,7 @@ jobs: with: workspaces: py-polars - - name: Install Polars + - name: Install Polars release build env: RUSTFLAGS: -C embed-bitcode working-directory: py-polars @@ -66,18 +66,14 @@ jobs: source activate maturin develop --release -- -C codegen-units=8 -C lto=thin -C target-cpu=native - - name: Run benchmark tests - lazy versus eager - working-directory: py-polars/tests/db-benchmark - run: python lazy_vs_eager.py + - name: Run H2O AI database benchmark - on strings + working-directory: py-polars/tests/benchmark + run: python run_h2oai_benchmark.py on_strings - - name: Run benchmark tests - various - working-directory: py-polars/tests/db-benchmark - run: python various.py + - name: Run H2O AI database benchmark - on categoricals + working-directory: py-polars/tests/benchmark + run: python run_h2oai_benchmark.py - - name: Run benchmark tests - on strings - working-directory: py-polars/tests/db-benchmark - run: python main.py on_strings - - - name: Run benchmark tests - on categoricals - working-directory: py-polars/tests/db-benchmark - run: python main.py + - name: Run various benchmark tests + working-directory: py-polars + run: pytest -m benchmark --durations 0 -v diff --git a/.github/workflows/test-python.yaml b/.github/workflows/test-python.yaml index 3c10b57924ffd..13f944a882da9 100644 --- a/.github/workflows/test-python.yaml +++ b/.github/workflows/test-python.yaml @@ -65,10 +65,10 @@ jobs: maturin develop - name: Run tests and report coverage - run: pytest --cov -n auto -m "slow or not slow" + run: pytest --cov -n auto -m "not benchmark" - name: Run doctests - run: python tests/docs/run_doc_examples.py + run: python tests/docs/run_doctest.py - name: Check import without optional dependencies run: | @@ -129,7 +129,7 @@ jobs: pip install target/wheels/polars-*.whl - name: Run tests - run: pytest -n auto -m "slow or not slow" + run: pytest -n auto -m "not benchmark" - name: Check import without optional dependencies run: | diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 97867339ec556..92726d2dfa3d9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -117,7 +117,7 @@ Create a new git branch from the `master` branch in your local repository, and s The Rust codebase is located in the `polars` directory, while the Python codebase is located in the `py-polars` directory. Both directories contain a `Makefile` with helpful commands. Most notably: -- `make test` to run the test suite +- `make test` to run the test suite (see the [test suite docs](/py-polars/tests/README.md) for more info) - `make pre-commit` to run autoformatting and linting Note that your work cannot be merged if these checks fail! diff --git a/py-polars/Makefile b/py-polars/Makefile index 6678e01fe52bb..3fe607eedcc3a 100644 --- a/py-polars/Makefile +++ b/py-polars/Makefile @@ -46,16 +46,16 @@ test: venv build ## Run fast unittests .PHONY: doctest doctest: venv build ## Run doctests - $(VENV_BIN)/python tests/docs/run_doc_examples.py + $(VENV_BIN)/python tests/docs/run_doctest.py .PHONY: test-all test-all: venv build ## Run all tests $(VENV_BIN)/pytest -n auto -m "slow or not slow" - $(VENV_BIN)/python tests/docs/run_doc_examples.py + $(VENV_BIN)/python tests/docs/run_doctest.py .PHONY: coverage coverage: venv build ## Run tests and report coverage - $(VENV_BIN)/pytest --cov -n auto -m "slow or not slow" + $(VENV_BIN)/pytest --cov -n auto -m "not benchmark" .PHONY: clean clean: ## Clean up caches and build artifacts diff --git a/py-polars/pyproject.toml b/py-polars/pyproject.toml index 6f7adda985c67..b973529a0e8ca 100644 --- a/py-polars/pyproject.toml +++ b/py-polars/pyproject.toml @@ -95,31 +95,6 @@ module = ["polars.*"] # thin wrapper around the PyO3 api to start with. warn_return_any = false -[tool.pytest.ini_options] -addopts = [ - "--import-mode=importlib", - "-m not slow and not hypothesis", # Default to running fast tests only. To run ALL tests, run: pytest -m "" -] -markers = [ - "slow: Tests with a longer than average runtime.", -] - -[tool.coverage.run] -source = ["polars"] -branch = true - -[tool.coverage.report] -fail_under = 85 -skip_covered = true -show_missing = true -exclude_lines = [ - "pragma: no cover", - "@overload", - "except ImportError", - "if TYPE_CHECKING:", - "from typing_extensions import ", -] - [tool.ruff] target-version = "py37" line-length = 88 @@ -178,7 +153,31 @@ ban-relative-imports = "all" [tool.ruff.per-file-ignores] "polars/datatypes.py" = ["B019"] -"tests/*/*.py" = ["D100", "D103"] -"tests/docs/run_doc_examples.py" = ["D101", "D102", "D103"] -"tests/parametric/__init__.py" = ["D104"] -"tests/slow/__init__.py" = ["D104"] +"tests/**/*.py" = ["D100", "D103"] + +[tool.pytest.ini_options] +addopts = [ + "--import-mode=importlib", + # Default to running fast tests only. To run ALL tests, run: pytest -m "" + "-m not slow and not hypothesis and not benchmark", +] +markers = [ + "slow: Tests with a longer than average runtime.", + "benchmark: Tests that should be run on a Polars release build.", +] + +[tool.coverage.run] +source = ["polars"] +branch = true + +[tool.coverage.report] +fail_under = 85 +skip_covered = true +show_missing = true +exclude_lines = [ + "pragma: no cover", + "@overload", + "except ImportError", + "if TYPE_CHECKING:", + "from typing_extensions import ", +] diff --git a/py-polars/tests/README.md b/py-polars/tests/README.md new file mode 100644 index 0000000000000..30286980ff196 --- /dev/null +++ b/py-polars/tests/README.md @@ -0,0 +1,117 @@ +# Polars test suite + +This folder contains the main Polars test suite. This document contains some information on the various components of the test suite, as well as guidelines for writing new tests. + +The test suite contains four main components, each confined to their own folder: unit tests, parametric tests, benchmark tests, and doctests. + +Note that this test suite is indirectly responsible for testing Rust Polars as well. The Rust test suite is kept small to reduce compilation times. A lot of the Rust functionality is tested here instead. + +## Table of contents + +- [Unit tests](#unit-tests) +- [Parametric tests](#parametric-tests) +- [Doctests](#doctests) +- [Benchmark tests](#benchmark-tests) + +## Unit tests + +The `unit` folder contains all regular unit tests. +These tests are intended to make sure all Polars functionality works as intended. + +### Running unit tests + +Run unit tests by running `make test` from the `py-polars` folder. This will compile the Rust bindings and then run the unit tests. + +If you're working in the Python code only, you can avoid recompiling every time by simply running `pytest` instead. + +By default, slow tests are skipped. Slow tests are marked as such using a [custom pytest marker](https://docs.pytest.org/en/latest/example/markers.html). +If you wish to run slow tests, run `pytest -m slow`. +Or run `pytest -m ""` to run _all_ tests, regardless of marker. + +Tests can be run in parallel using [`pytext-xdist`](https://pytest-xdist.readthedocs.io/en/latest/). Run `pytest -n auto` to parallelize your test run. + +### Writing unit tests + +Whenever you add new functionality, you should also add matching unit tests. +Add your tests to appropriate test module in the `unit` folder. +Some guidelines to keep in mind: + +- Try to fully cover all possible inputs and edge cases you can think of. +- Utilize pytest tools like [`fixture`](https://docs.pytest.org/en/latest/explanation/fixtures.html) and [`parametrize`](https://docs.pytest.org/en/latest/how-to/parametrize.html) where appropriate. +- Since many tests will require some data to be defined first, it can be efficient to run multiple checks in a single test. This can also be addressed using pytest fixtures. +- Unit tests should not depend on external factors, otherwise test parallelization will break. + +## Parametric tests + +The `parametric` folder contains parametric tests written using the [Hypothesis](https://hypothesis.readthedocs.io/) framework. +These tests are intended to find and test edge cases by generating many random datapoints. + +### Running parametric tests + +Run parametric tests by running `pytest -m hypothesis`. + +Note that parametric tests are excluded by default when running `pytest`. +You must explicitly specify `-m hypothesis` to run them. + +These tests _will_ be included when calculating test coverage, and will also be run as part of the `make test-all` make command. + +## Doctests + +The `docs` folder contains a script for running [`doctest`](https://docs.python.org/3/library/doctest.html). +This folder does not contain any actual tests - rather, the script checks all docstrings in the Polars package for `Examples` sections, runs the code examples, and verifies the output. + +The aim of running `doctest` is to make sure the `Examples` sections in our docstrings are valid and remain up-to-date with code changes. + +### Running `doctest` + +To run the `doctest` module, run `make doctest` from the `py-polars` folder. +You can also run the script directly from your virtual environment. + +Note that doctests are _not_ run using pytest. While pytest does have the capability to run doc examples, configuration options are too limited for our purposes. + +Doctests will _not_ count towards test coverage. They are not a substitute for unit tests, but rather intended to convey the intended use of the Polars API to the user. + +### Writing doc examples + +Almost all classes/methods/functions that are part of Polars' public API should include code examples in their docstring. +These examples help users understand basic usage and allow us to illustrate more advanced concepts as well. +Some guidelines for writing a good docstring `Examples` section: + +- Start with a minimal example that showcases the default functionality. +- Showcase the effect of its parameters. +- Showcase any special interactions when combined with other code. +- Keep it succinct and avoid multiple examples showcasing the same thing. + +There are many great docstring examples already, just check other code if you need inspiration! + +In addition to the [regular options](https://docs.python.org/3/library/doctest.html#option-flags) available when writing doctests, the script configuration allows for a new `IGNORE_RESULT` directive. Use this directive if you want to ensure the code runs, but the output may be random by design or not interesting to check. + +```python +>>> df.sample(n=2) # doctest: +IGNORE_RESULT +``` + +## Benchmark tests + +The `benchmark` folder contains code for running the [H2O AI database benchmark](https://github.com/h2oai/db-benchmark). +It also contains various other benchmark tests. + +The aim of this part of the test suite is to spot performance regressions in the code, and to verify that Polars functionality works as expected when run on a release build or at a larger scale. + +### Running the H2O AI database benchmark + +The benchmark is somewhat cumbersome to run locally. You must first generate the dataset using the R script provided in the `benchmark` folder. Afterwards, you can simply run the Python script to run the benchmark. + +Make sure to install a release build of Polars before running the benchmark to guarantee the best results. + +Refer to the [benchmark workflow](/.github/workflows/benchmark.yaml) for detailed steps. + +### Running other benchmark tests + +The other benchmark tests are run using pytest. +Run `pytest -m benchmark --durations 0 -v` to run these tests and report run duration. + +Note that benchmark tests are excluded by default when running `pytest`. +You must explicitly specify `-m benchmark` to run them. +They will also be excluded when calculating test coverage. + +These tests _will_ be run as part of the `make test-all` make command. diff --git a/py-polars/tests/benchmark/__init__.py b/py-polars/tests/benchmark/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/py-polars/tests/db-benchmark/groupby-datagen.R b/py-polars/tests/benchmark/groupby-datagen.R similarity index 100% rename from py-polars/tests/db-benchmark/groupby-datagen.R rename to py-polars/tests/benchmark/groupby-datagen.R diff --git a/py-polars/tests/db-benchmark/main.py b/py-polars/tests/benchmark/run_h2oai_benchmark.py similarity index 92% rename from py-polars/tests/db-benchmark/main.py rename to py-polars/tests/benchmark/run_h2oai_benchmark.py index 710b1e7f5ab78..f691ef9cda5e6 100644 --- a/py-polars/tests/db-benchmark/main.py +++ b/py-polars/tests/benchmark/run_h2oai_benchmark.py @@ -1,4 +1,13 @@ -# type: ignore +""" +Contains code for running the H2O AI database benchmark. + +First, run the R script to generate a dataset with set characteristics. +Then run this script to get the runtime of certain queries. + +See: +https://h2oai.github.io/db-benchmark/ + +""" import sys import time @@ -9,7 +18,7 @@ print(pl.__version__) -x = pl.read_csv( +df = pl.read_csv( "G1_1e7_1e2_5_0.csv", dtypes={ "id4": pl.Int32, @@ -23,8 +32,8 @@ ON_STRINGS = sys.argv.pop() == "on_strings" if not ON_STRINGS: - x = x.with_columns([pl.col(["id1", "id2", "id3"]).cast(pl.Categorical)]) -df = x.clone() + df = df.with_columns([pl.col(["id1", "id2", "id3"]).cast(pl.Categorical)]) +df = df.clone() x = df.lazy() t00 = time.time() @@ -310,6 +319,8 @@ assert out["id6"].to_list() == [2137755425] assert np.isclose(out["v3"].to_list(), 4.7040828499563754e8).all() -if not ON_STRINGS and total_time > 12: - print("query took longer than 12s, may be noise") +# Fail or pass the benchmark based on runtime. Used in the Benchmark CI workflow. +threshold = 18 if ON_STRINGS else 12 +if total_time > threshold: + print(f"query took longer than {threshold}s, may be noise") exit(1) diff --git a/py-polars/tests/benchmark/test_release.py b/py-polars/tests/benchmark/test_release.py new file mode 100644 index 0000000000000..21c68b12ea516 --- /dev/null +++ b/py-polars/tests/benchmark/test_release.py @@ -0,0 +1,148 @@ +""" +Various benchmark tests. + +Tests in this module will be run in the CI using a release build of Polars. + +To run these tests: pytest -m benchmark +""" +import os +import time +from pathlib import Path +from typing import cast + +import numpy as np +import pytest + +import polars as pl +from polars.testing import assert_frame_equal + +# Mark all tests in this module as benchmark tests +pytestmark = pytest.mark.benchmark + + +@pytest.mark.skipif( + not (Path(os.path.dirname(__file__)) / "G1_1e7_1e2_5_0.csv").is_file(), + reason="Dataset must be generated before running this test.", +) +def test_read_scan_large_csv() -> None: + filename = "G1_1e7_1e2_5_0.csv" + path = Path(os.path.dirname(__file__)) / filename + + predicate = pl.col("v2") < 5 + + shape_eager = pl.read_csv(path).filter(predicate).shape + shape_lazy = (pl.scan_csv(path).filter(predicate)).collect().shape + + assert shape_lazy == shape_eager + + +def test_sort_nan_1942() -> None: + # https://github.com/pola-rs/polars/issues/1942 + t0 = time.time() + pl.repeat(float("nan"), 2 << 12, eager=True).sort() + assert (time.time() - t0) < 1 + + +def test_mean_overflow() -> None: + np.random.seed(1) + expected = 769.5607652 + + df = pl.DataFrame(np.random.randint(500, 1040, 5000000), schema=["value"]) + + result = df.with_columns(pl.mean("value"))[0, 0] + assert np.isclose(result, expected) + + result = df.with_columns(pl.col("value").cast(pl.Int32)).with_columns( + pl.mean("value") + )[0, 0] + assert np.isclose(result, expected) + + result = df.with_columns(pl.col("value").cast(pl.Int32)).get_column("value").mean() + assert np.isclose(result, expected) + + +def test_min_max_2850() -> None: + # https://github.com/pola-rs/polars/issues/2850 + df = pl.DataFrame( + { + "id": [ + 130352432, + 130352277, + 130352611, + 130352833, + 130352305, + 130352258, + 130352764, + 130352475, + 130352368, + 130352346, + ] + } + ) + + minimum = 130352258 + maximum = 130352833.0 + + for _ in range(10): + permuted = df.sample(frac=1.0, seed=0) + computed = permuted.select( + [pl.col("id").min().alias("min"), pl.col("id").max().alias("max")] + ) + assert cast(int, computed[0, "min"]) == minimum + assert cast(float, computed[0, "max"]) == maximum + + +def test_windows_not_cached() -> None: + ldf = ( + pl.DataFrame( + [ + pl.Series("key", ["a", "a", "b", "b"]), + pl.Series("val", [2, 2, 1, 3]), + ] + ) + .lazy() + .filter( + (pl.col("key").cumcount().over("key") == 0) + | (pl.col("val").shift(1).over("key").is_not_null()) + | (pl.col("val") != pl.col("val").shift(1).over("key")) + ) + ) + # this might fail if they are cached + for _ in range(1000): + ldf.collect() + + +def test_cross_join() -> None: + # triggers > 100 rows implementation + # https://github.com/pola-rs/polars/blob/5f5acb2a523ce01bc710768b396762b8e69a9e07/polars/polars-core/src/frame/cross_join.rs#L34 + df1 = pl.DataFrame({"col1": ["a"], "col2": ["d"]}) + df2 = pl.DataFrame({"frame2": pl.arange(0, 100, eager=True)}) + out = df2.join(df1, how="cross") + df2 = pl.DataFrame({"frame2": pl.arange(0, 101, eager=True)}) + assert_frame_equal(df2.join(df1, how="cross").slice(0, 100), out) + + +def test_cross_join_slice_pushdown() -> None: + # this will likely go out of memory if we did not pushdown the slice + df = ( + pl.Series("x", pl.arange(0, 2**16 - 1, eager=True, dtype=pl.UInt16) % 2**15) + ).to_frame() + + result = df.lazy().join(df.lazy(), how="cross", suffix="_").slice(-5, 10).collect() + expected = pl.DataFrame( + { + "x": [32766, 32766, 32766, 32766, 32766], + "x_": [32762, 32763, 32764, 32765, 32766], + }, + schema={"x": pl.UInt16, "x_": pl.UInt16}, + ) + assert_frame_equal(result, expected) + + result = df.lazy().join(df.lazy(), how="cross", suffix="_").slice(2, 10).collect() + expected = pl.DataFrame( + { + "x": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "x_": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11], + }, + schema={"x": pl.UInt16, "x_": pl.UInt16}, + ) diff --git a/py-polars/tests/db-benchmark/lazy_vs_eager.py b/py-polars/tests/db-benchmark/lazy_vs_eager.py deleted file mode 100644 index 60e188ee46cd5..0000000000000 --- a/py-polars/tests/db-benchmark/lazy_vs_eager.py +++ /dev/null @@ -1,9 +0,0 @@ -import polars as pl - -path = "G1_1e7_1e2_5_0.csv" -predicate = pl.col("v2") < 5 - -shape_eager = pl.read_csv(path).filter(predicate).shape - -shape_lazy = (pl.scan_csv(path).filter(predicate)).collect().shape -assert shape_lazy == shape_eager diff --git a/py-polars/tests/db-benchmark/various.py b/py-polars/tests/db-benchmark/various.py deleted file mode 100644 index 3373b466d692f..0000000000000 --- a/py-polars/tests/db-benchmark/various.py +++ /dev/null @@ -1,124 +0,0 @@ -# may contain many things that seemed to go wrong at scale - -import time -from typing import cast - -import numpy as np - -import polars as pl -from polars.testing import assert_frame_equal - -# https://github.com/pola-rs/polars/issues/1942 -t0 = time.time() -pl.repeat(float("nan"), 2 << 12).sort() -assert (time.time() - t0) < 1 - -# test mean overflow issues -np.random.seed(1) -mean = 769.5607652 -df = pl.DataFrame(np.random.randint(500, 1040, 5000000), schema=["value"]) -assert np.isclose(df.with_columns(pl.mean("value"))[0, 0], mean) -assert np.isclose( - df.with_columns(pl.col("value").cast(pl.Int32)).with_columns(pl.mean("value"))[ - 0, 0 - ], - mean, -) - -calculated_mean = ( - df.with_columns(pl.col("value").cast(pl.Int32)).get_column("value").mean() -) - -if calculated_mean is not None: - assert np.isclose(calculated_mean, mean) -else: - raise AssertionError("mean is None") - -# https://github.com/pola-rs/polars/issues/2850 -df = pl.DataFrame( - { - "id": [ - 130352432, - 130352277, - 130352611, - 130352833, - 130352305, - 130352258, - 130352764, - 130352475, - 130352368, - 130352346, - ] - } -) - -minimum = 130352258 -maximum = 130352833.0 - -for _ in range(10): - permuted = df.sample(frac=1.0, seed=0) - computed = permuted.select( - [pl.col("id").min().alias("min"), pl.col("id").max().alias("max")] - ) - assert cast(int, computed[0, "min"]) == minimum - assert cast(float, computed[0, "max"]) == maximum - - -def test_windows_not_cached() -> None: - ldf = ( - pl.DataFrame( - [ - pl.Series("key", ["a", "a", "b", "b"]), - pl.Series("val", [2, 2, 1, 3]), - ] - ) - .lazy() - .filter( - (pl.col("key").cumcount().over("key") == 0) - | (pl.col("val").shift(1).over("key").is_not_null()) - | (pl.col("val") != pl.col("val").shift(1).over("key")) - ) - ) - # this might fail if they are cached - for _ in range(1000): - ldf.collect() - - -def test_cross_join() -> None: - # triggers > 100 rows implementation - # https://github.com/pola-rs/polars/blob/5f5acb2a523ce01bc710768b396762b8e69a9e07/polars/polars-core/src/frame/cross_join.rs#L34 - df1 = pl.DataFrame({"col1": ["a"], "col2": ["d"]}) - df2 = pl.DataFrame({"frame2": pl.arange(0, 100, eager=True)}) - out = df2.join(df1, how="cross") - df2 = pl.DataFrame({"frame2": pl.arange(0, 101, eager=True)}) - assert_frame_equal(df2.join(df1, how="cross").slice(0, 100), out) - - -def test_cross_join_slice_pushdown() -> None: - # this will likely go out of memory if we did not pushdown the slice - df = pl.DataFrame( - [ - pl.Series("x", pl.arange(0, 2**16 - 1, eager=True) % 2**15).cast( - pl.UInt16 - ) - ] - ) - - assert df.lazy().join(df.lazy(), how="cross", suffix="_").slice( - -5, 10 - ).collect().to_dict(False) == { - "x": [32766, 32766, 32766, 32766, 32766], - "x_": [32762, 32763, 32764, 32765, 32766], - } - - assert df.lazy().join(df.lazy(), how="cross", suffix="_").slice( - 2, 10 - ).collect().to_dict(False) == { - "x": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "x_": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11], - } - - -if __name__ == "__main__": - test_windows_not_cached() - test_cross_join() diff --git a/py-polars/tests/docs/__init__.py b/py-polars/tests/docs/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/py-polars/tests/docs/run_doc_examples.py b/py-polars/tests/docs/run_doctest.py similarity index 100% rename from py-polars/tests/docs/run_doc_examples.py rename to py-polars/tests/docs/run_doctest.py diff --git a/py-polars/tests/parametric/test_testing.py b/py-polars/tests/parametric/test_testing.py index 8b94130d854c5..3fbb6fb0f0362 100644 --- a/py-polars/tests/parametric/test_testing.py +++ b/py-polars/tests/parametric/test_testing.py @@ -204,6 +204,7 @@ def finite_float(value: Any) -> bool: assert all(finite_float(val) for val in df[col].to_list()) +@pytest.mark.hypothesis() def test_invalid_arguments() -> None: for invalid_probability in (-1.0, +2.0): with pytest.raises(InvalidArgument, match="between 0.0 and 1.0"): diff --git a/py-polars/tests/unit/io/conftest.py b/py-polars/tests/unit/io/conftest.py index 3280b937ce4fe..b488a9d29c443 100644 --- a/py-polars/tests/unit/io/conftest.py +++ b/py-polars/tests/unit/io/conftest.py @@ -8,5 +8,5 @@ @pytest.fixture() def io_files_path() -> Path: - current_dir = os.path.abspath(os.path.dirname(__file__)) + current_dir = os.path.dirname(__file__) return Path(current_dir) / "files"