From 9cb01d637687f969676adb92821ddfff5f45f6b6 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 25 Jul 2024 08:19:57 -0700 Subject: [PATCH] test(benchmark): remove unnecessary `date` call in benchmark (#9693) --- .github/workflows/ibis-main.yml | 4 +- ibis/tests/benchmarks/test_benchmarks.py | 62 +++++++++++------------- 2 files changed, 30 insertions(+), 36 deletions(-) diff --git a/.github/workflows/ibis-main.yml b/.github/workflows/ibis-main.yml index f696420dc143..16ffbc2ab553 100644 --- a/.github/workflows/ibis-main.yml +++ b/.github/workflows/ibis-main.yml @@ -94,11 +94,11 @@ jobs: - name: run all core tests and run benchmarks once parallel if: matrix.os != 'windows-latest' - run: just ci-check -m "'core or benchmarks'" --numprocesses auto -rs + run: just ci-check -m "'core or benchmark'" --numprocesses auto -rfEs - name: run all core tests and run benchmarks once serial if: matrix.os == 'windows-latest' - run: just ci-check -m "'core or benchmarks'" -rs + run: just ci-check -m "'core or benchmark'" -rfEs - name: upload code coverage if: success() diff --git a/ibis/tests/benchmarks/test_benchmarks.py b/ibis/tests/benchmarks/test_benchmarks.py index 4faf306dd0e8..f8d8e1f4bbb4 100644 --- a/ibis/tests/benchmarks/test_benchmarks.py +++ b/ibis/tests/benchmarks/test_benchmarks.py @@ -1,6 +1,7 @@ from __future__ import annotations import copy +import datetime import functools import inspect import itertools @@ -8,14 +9,9 @@ import os import random import string -from datetime import datetime -from operator import attrgetter, itemgetter -import numpy as np -import pandas as pd import pytest import pytz -from packaging.version import parse as vparse from pytest import param import ibis @@ -200,8 +196,17 @@ def test_compile(benchmark, module, expr_fn, t, base, large_expr): pytest.skip(str(e)) -@pytest.fixture(scope="module") -def pt(): +@pytest.fixture +def con(): + pytest.importorskip("duckdb") + return ibis.duckdb.connect() + + +@pytest.fixture +def pt(con): + np = pytest.importorskip("numpy") + pd = pytest.importorskip("pandas") + n = 60_000 data = pd.DataFrame( { @@ -220,7 +225,6 @@ def pt(): } ) - con = ibis.duckdb.connect() return con.create_table("df", data) @@ -292,13 +296,6 @@ def high_card_window(t): return ibis.window(group_by=t.key) -broken_pandas_grouped_rolling = pytest.mark.xfail( - condition=vparse("1.4") <= vparse(pd.__version__) < vparse("1.4.2"), - raises=ValueError, - reason="https://github.com/pandas-dev/pandas/pull/44068", -) - - @pytest.mark.benchmark(group="execution") @pytest.mark.parametrize( "expression_fn", @@ -311,16 +308,8 @@ def high_card_window(t): pytest.param(simple_sort_projection, id="simple_sort_projection"), pytest.param(multikey_sort, id="multikey_sort"), pytest.param(multikey_sort_projection, id="multikey_sort_projection"), - pytest.param( - low_card_grouped_rolling, - id="low_card_grouped_rolling", - marks=[broken_pandas_grouped_rolling], - ), - pytest.param( - high_card_grouped_rolling, - id="high_card_grouped_rolling", - marks=[broken_pandas_grouped_rolling], - ), + pytest.param(low_card_grouped_rolling, id="low_card_grouped_rolling"), + pytest.param(high_card_grouped_rolling, id="high_card_grouped_rolling"), ], ) def test_execute(benchmark, expression_fn, pt): @@ -627,7 +616,7 @@ def test_compile_with_drops( .join(products, "sku") .drop("customerid", "qty", "total", "items") .drop("dims_cm", "cost") - .mutate(o_date=lambda t: t.shipped.date()) + .mutate(o_date=lambda t: t.shipped) .filter(lambda t: t.ordered == t.shipped) ) @@ -730,6 +719,7 @@ def test_duckdb_to_pyarrow(benchmark, sql, ddb) -> None: # yes, we're benchmarking duckdb here, not ibis # # we do this to get a baseline for comparison + pytest.importorskip("pyarrow") duckdb = pytest.importorskip("duckdb") con = duckdb.connect(ddb, read_only=True) @@ -737,6 +727,7 @@ def test_duckdb_to_pyarrow(benchmark, sql, ddb) -> None: def test_ibis_duckdb_to_pyarrow(benchmark, sql, ddb) -> None: + pytest.importorskip("pyarrow") pytest.importorskip("duckdb") con = ibis.duckdb.connect(ddb, read_only=True) @@ -820,6 +811,8 @@ def test_big_join_compile(benchmark, src, diff): @pytest.mark.timeout(5) def test_big_expression_compile(benchmark): + pytest.importorskip("duckdb") + from ibis.tests.benchmarks.benchfuncs import clean_names t = ibis.table( @@ -846,7 +839,7 @@ def many_cols(): @pytest.mark.parametrize( "getter", - [itemgetter("x0"), itemgetter(0), attrgetter("x0")], + [lambda t: t["x0"], lambda t: t[0], lambda t: t.x0], ids=["str", "int", "attr"], ) def test_column_access(benchmark, many_cols, getter): @@ -868,8 +861,10 @@ def test_large_union_construct(benchmark, many_tables): @pytest.mark.timeout(180) def test_large_union_compile(benchmark, many_tables): + pytest.importorskip("duckdb") + expr = ibis.union(*many_tables) - assert benchmark(ibis.to_sql, expr) is not None + assert benchmark(ibis.to_sql, expr, dialect="duckdb") is not None @pytest.fixture(scope="session") @@ -914,6 +909,8 @@ def test_wide_drop_construct(benchmark, wide_table, cols_to_drop): def test_wide_drop_compile(benchmark, wide_table, cols_to_drop): + pytest.importorskip("duckdb") + benchmark( lambda expr: ibis.to_sql(expr, dialect="duckdb"), wide_table.drop(*cols_to_drop) ) @@ -958,14 +955,11 @@ def test_wide_relocate(benchmark, input, column, relative, cols): benchmark(t.relocate, column.format(last), **{input: relative.format(last)}) -def test_duckdb_timestamp_conversion(benchmark): - pytest.importorskip("duckdb") - - start = datetime(2000, 1, 1, tzinfo=pytz.UTC) - stop = datetime(2000, 2, 1, tzinfo=pytz.UTC) +def test_duckdb_timestamp_conversion(benchmark, con): + start = datetime.datetime(2000, 1, 1, tzinfo=pytz.UTC) + stop = datetime.datetime(2000, 2, 1, tzinfo=pytz.UTC) expr = ibis.range(start, stop, ibis.interval(seconds=1)).unnest() - con = ibis.duckdb.connect() series = benchmark(con.execute, expr) assert series.size == (stop - start).total_seconds()