Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test(python): Use assert_frame_equal instead of assert df.frame_equal(...) #6553

Merged
merged 11 commits into from
Jan 30, 2023
4 changes: 2 additions & 2 deletions py-polars/polars/testing/asserts.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,6 @@ def assert_frame_equal_local_categoricals(
raise AssertionError

cat_to_str = pli.col(Categorical).cast(str)
assert df_a.with_columns(cat_to_str).frame_equal(df_b.with_columns(cat_to_str))
assert_frame_equal(df_a.with_columns(cat_to_str), df_b.with_columns(cat_to_str))
cat_to_phys = pli.col(Categorical).to_physical()
assert df_a.with_columns(cat_to_phys).frame_equal(df_b.with_columns(cat_to_phys))
assert_frame_equal(df_a.with_columns(cat_to_phys), df_b.with_columns(cat_to_phys))
3 changes: 2 additions & 1 deletion py-polars/tests/db-benchmark/various.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numpy as np

import polars as pl
from polars.testing import assert_frame_equal

# https://github.com/pola-rs/polars/issues/1942
t0 = time.time()
Expand Down Expand Up @@ -84,7 +85,7 @@ def test_cross_join() -> None:
df2 = pl.DataFrame({"frame2": pl.arange(0, 100, eager=True)})
out = df2.join(df1, how="cross")
df2 = pl.DataFrame({"frame2": pl.arange(0, 101, eager=True)})
assert df2.join(df1, how="cross").slice(0, 100).frame_equal(out)
assert_frame_equal(df2.join(df1, how="cross").slice(0, 100), out)


def test_cross_join_slice_pushdown() -> None:
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/slow/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,4 @@ def test_parquet_chunks_545() -> None:

# read it with polars
polars_df = pl.read_parquet(f)
assert pl.DataFrame(df).frame_equal(polars_df)
assert_frame_equal(pl.DataFrame(df), polars_df)
9 changes: 5 additions & 4 deletions py-polars/tests/unit/io/test_avro.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pytest

import polars as pl
from polars.testing import assert_frame_equal

if TYPE_CHECKING:
from polars.internals.type_aliases import AvroCompression
Expand All @@ -27,7 +28,7 @@ def test_from_to_buffer(example_df: pl.DataFrame, compression: AvroCompression)
buf.seek(0)

read_df = pl.read_avro(buf)
assert example_df.frame_equal(read_df)
assert_frame_equal(example_df, read_df)


@pytest.mark.parametrize("compression", COMPRESSIONS)
Expand All @@ -37,7 +38,7 @@ def test_from_to_file(example_df: pl.DataFrame, compression: AvroCompression) ->
example_df.write_avro(file_path, compression=compression)
df_read = pl.read_avro(file_path)

assert example_df.frame_equal(df_read)
assert_frame_equal(example_df, df_read)


def test_select_columns() -> None:
Expand All @@ -49,7 +50,7 @@ def test_select_columns() -> None:
f.seek(0)

read_df = pl.read_avro(f, columns=["b", "c"])
assert expected.frame_equal(read_df)
assert_frame_equal(expected, read_df)


def test_select_projection() -> None:
Expand All @@ -61,4 +62,4 @@ def test_select_projection() -> None:
f.seek(0)

read_df = pl.read_avro(f, columns=[1, 2])
assert expected.frame_equal(read_df)
assert_frame_equal(expected, read_df)
30 changes: 15 additions & 15 deletions py-polars/tests/unit/io/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,31 +440,31 @@ def test_compressed_csv(io_files_path: Path) -> None:
expected = pl.DataFrame(
{"a": [1, 2, 3], "b": ["a", "b", "c"], "c": [1.0, 2.0, 3.0]}
)
assert out.frame_equal(expected)
assert_frame_equal(out, expected)

# now from disk
csv_file = io_files_path / "gzipped.csv"
out = pl.read_csv(str(csv_file))
assert out.frame_equal(expected)
assert_frame_equal(out, expected)

# now with column projection
out = pl.read_csv(csv_bytes, columns=["a", "b"])
expected = pl.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
assert out.frame_equal(expected)
assert_frame_equal(out, expected)

# zlib compression
csv_bytes = zlib.compress(csv.encode())
out = pl.read_csv(csv_bytes)
expected = pl.DataFrame(
{"a": [1, 2, 3], "b": ["a", "b", "c"], "c": [1.0, 2.0, 3.0]}
)
assert out.frame_equal(expected)
assert_frame_equal(out, expected)

# no compression
f2 = io.BytesIO(b"a,b\n1,2\n")
out2 = pl.read_csv(f2)
expected = pl.DataFrame({"a": [1], "b": [2]})
assert out2.frame_equal(expected)
assert_frame_equal(out2, expected)


def test_partial_decompression(foods_file_path: Path) -> None:
Expand Down Expand Up @@ -540,7 +540,7 @@ def test_csv_quote_char() -> None:
rolling_stones.encode(), quote_char=None, use_pyarrow=use_pyarrow
)
assert out.shape == (9, 3)
out.frame_equal(expected)
assert_frame_equal(out, expected)


def test_csv_empty_quotes_char_1622() -> None:
Expand Down Expand Up @@ -591,10 +591,10 @@ def test_csv_date_handling() -> None:
}
)
out = pl.read_csv(csv.encode(), parse_dates=True)
assert out.frame_equal(expected, null_equal=True)
assert_frame_equal(out, expected)
dtypes = {"date": pl.Date}
out = pl.read_csv(csv.encode(), dtypes=dtypes)
assert out.frame_equal(expected, null_equal=True)
assert_frame_equal(out, expected)


def test_csv_globbing(io_files_path: Path) -> None:
Expand Down Expand Up @@ -665,7 +665,7 @@ def test_empty_string_missing_round_trip() -> None:
df.write_csv(f, null_value=null)
f.seek(0)
df_read = pl.read_csv(f, null_values=null)
assert df.frame_equal(df_read)
assert_frame_equal(df, df_read)


def test_write_csv_delimiter() -> None:
Expand Down Expand Up @@ -710,7 +710,7 @@ def test_quoting_round_trip() -> None:
f.seek(0)
read_df = pl.read_csv(f)

assert read_df.frame_equal(df)
assert_frame_equal(read_df, df)


def test_fallback_chrono_parser() -> None:
Expand All @@ -732,7 +732,7 @@ def test_csv_string_escaping() -> None:
df.write_csv(f)
f.seek(0)
df_read = pl.read_csv(f)
assert df_read.frame_equal(df)
assert_frame_equal(df_read, df)


def test_glob_csv(df_no_lists: pl.DataFrame) -> None:
Expand Down Expand Up @@ -790,16 +790,16 @@ def test_csv_multiple_null_values() -> None:
}
)

assert df2.frame_equal(expected)
assert_frame_equal(df2, expected)


def test_different_eol_char() -> None:
csv = "a,1,10;b,2,20;c,3,30"
expected = pl.DataFrame(
{"column_1": ["a", "b", "c"], "column_2": [1, 2, 3], "column_3": [10, 20, 30]}
)
assert pl.read_csv(csv.encode(), eol_char=";", has_header=False).frame_equal(
expected
assert_frame_equal(
pl.read_csv(csv.encode(), eol_char=";", has_header=False), expected
)


Expand All @@ -809,7 +809,7 @@ def test_csv_write_escape_newlines() -> None:
df.write_csv(f)
f.seek(0)
read_df = pl.read_csv(f)
assert df.frame_equal(read_df)
assert_frame_equal(df, read_df)


def test_skip_new_line_embedded_lines() -> None:
Expand Down
8 changes: 4 additions & 4 deletions py-polars/tests/unit/io/test_delta.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest

import polars as pl
from polars.testing import assert_frame_equal
from polars.testing import assert_frame_equal, assert_frame_not_equal


@pytest.fixture()
Expand All @@ -23,7 +23,7 @@ def test_scan_delta_version(delta_table_path: Path) -> None:
df1 = pl.scan_delta(str(delta_table_path), version=0).collect()
df2 = pl.scan_delta(str(delta_table_path), version=1).collect()

assert not df1.frame_equal(df2)
assert_frame_not_equal(df1, df2)


def test_scan_delta_columns(delta_table_path: Path) -> None:
Expand All @@ -50,7 +50,7 @@ def test_scan_delta_relative(delta_table_path: Path) -> None:
assert_frame_equal(expected, ldf.collect(), check_dtype=False)

ldf = pl.scan_delta(rel_delta_table_path, version=1)
assert not expected.frame_equal(ldf.collect())
assert_frame_not_equal(expected, ldf.collect())


def test_read_delta(delta_table_path: Path) -> None:
Expand All @@ -64,7 +64,7 @@ def test_read_delta_version(delta_table_path: Path) -> None:
df1 = pl.read_delta(str(delta_table_path), version=0)
df2 = pl.read_delta(str(delta_table_path), version=1)

assert not df1.frame_equal(df2)
assert_frame_not_equal(df1, df2)


def test_read_delta_columns(delta_table_path: Path) -> None:
Expand Down
8 changes: 4 additions & 4 deletions py-polars/tests/unit/io/test_ipc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import pytest

import polars as pl
from polars.testing import assert_frame_equal_local_categoricals
from polars.testing import assert_frame_equal, assert_frame_equal_local_categoricals

if TYPE_CHECKING:
from polars.internals.type_aliases import IpcCompression
Expand Down Expand Up @@ -61,7 +61,7 @@ def test_select_columns_from_buffer() -> None:
f.seek(0)

read_df = pl.read_ipc(f, columns=["b", "c"], use_pyarrow=False)
assert expected.frame_equal(read_df)
assert_frame_equal(expected, read_df)


def test_select_columns_projection() -> None:
Expand All @@ -73,7 +73,7 @@ def test_select_columns_projection() -> None:
f.seek(0)

read_df = pl.read_ipc(f, columns=[1, 2], use_pyarrow=False)
assert expected.frame_equal(read_df)
assert_frame_equal(expected, read_df)


@pytest.mark.parametrize("compression", COMPRESSIONS)
Expand All @@ -85,7 +85,7 @@ def test_compressed_simple(compression: IpcCompression) -> None:
f.seek(0)

df_read = pl.read_ipc(f, use_pyarrow=False)
assert df_read.frame_equal(df)
assert_frame_equal(df_read, df)


@pytest.mark.parametrize("compression", COMPRESSIONS)
Expand Down
5 changes: 3 additions & 2 deletions py-polars/tests/unit/io/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
from typing import cast

import polars as pl
from polars.testing import assert_frame_equal


def test_copy() -> None:
df = pl.DataFrame({"a": [1, 2], "b": ["a", None], "c": [True, False]})
assert copy.copy(df).frame_equal(df, True)
assert copy.deepcopy(df).frame_equal(df, True)
assert_frame_equal(copy.copy(df), df)
assert_frame_equal(copy.deepcopy(df), df)

a = pl.Series("a", [1, 2])
assert copy.copy(a).series_equal(a, True)
Expand Down
20 changes: 10 additions & 10 deletions py-polars/tests/unit/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def test_select_columns() -> None:
f.seek(0)

read_df = pl.read_parquet(f, columns=["b", "c"], use_pyarrow=False)
assert expected.frame_equal(read_df)
assert_frame_equal(expected, read_df)


def test_select_projection() -> None:
Expand All @@ -114,7 +114,7 @@ def test_select_projection() -> None:
f.seek(0)

read_df = pl.read_parquet(f, columns=[1, 2], use_pyarrow=False)
assert expected.frame_equal(read_df)
assert_frame_equal(expected, read_df)


@pytest.mark.parametrize("compression", COMPRESSIONS)
Expand All @@ -139,7 +139,7 @@ def test_parquet_datetime(compression: ParquetCompression, use_pyarrow: bool) ->
df.write_parquet(f, use_pyarrow=use_pyarrow, compression=compression)
f.seek(0)
read = pl.read_parquet(f)
assert read.frame_equal(df)
assert_frame_equal(read, df)


def test_nested_parquet() -> None:
Expand Down Expand Up @@ -198,7 +198,7 @@ def test_chunked_round_trip() -> None:
f = io.BytesIO()
df.write_parquet(f)
f.seek(0)
assert pl.read_parquet(f).frame_equal(df)
assert_frame_equal(pl.read_parquet(f), df)


def test_lazy_self_join_file_cache_prop_3979(df: pl.DataFrame) -> None:
Expand Down Expand Up @@ -238,7 +238,7 @@ def test_nested_dictionary() -> None:
f.seek(0)

read_df = pl.read_parquet(f)
assert df.frame_equal(read_df)
assert_frame_equal(df, read_df)


def test_row_group_size_saturation() -> None:
Expand All @@ -248,7 +248,7 @@ def test_row_group_size_saturation() -> None:
# request larger chunk than rows in df
df.write_parquet(f, row_group_size=1024)
f.seek(0)
assert pl.read_parquet(f).frame_equal(df)
assert_frame_equal(pl.read_parquet(f), df)


def test_nested_sliced() -> None:
Expand All @@ -262,7 +262,7 @@ def test_nested_sliced() -> None:
f = io.BytesIO()
df.write_parquet(f)
f.seek(0)
assert pl.read_parquet(f).frame_equal(df)
assert_frame_equal(pl.read_parquet(f), df)


def test_parquet_5795() -> None:
Expand Down Expand Up @@ -295,7 +295,7 @@ def test_parquet_5795() -> None:
f = io.BytesIO()
df_pd.to_parquet(f)
f.seek(0)
assert pl.read_parquet(f).frame_equal(pl.from_pandas(df_pd))
assert_frame_equal(pl.read_parquet(f), pl.from_pandas(df_pd))


@typing.no_type_check
Expand All @@ -322,7 +322,7 @@ def test_parquet_nesting_structs_list() -> None:
df.write_parquet(f)
f.seek(0)

assert pl.read_parquet(f).frame_equal(df)
assert_frame_equal(pl.read_parquet(f), df)


@typing.no_type_check
Expand All @@ -346,7 +346,7 @@ def test_parquet_nested_dictionaries_6217() -> None:
pq.write_table(table, f, compression="snappy")
f.seek(0)
read = pl.read_parquet(f)
assert read.frame_equal(df)
assert_frame_equal(read, df)


@pytest.mark.xfail(sys.platform == "win32", reason="Does not work on Windows")
Expand Down
3 changes: 2 additions & 1 deletion py-polars/tests/unit/io/test_pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pickle

import polars as pl
from polars.testing import assert_frame_equal


def test_pickle() -> None:
Expand All @@ -14,7 +15,7 @@ def test_pickle() -> None:
df = pl.DataFrame({"a": [1, 2], "b": ["a", None], "c": [True, False]})
b = pickle.dumps(df)
out = pickle.loads(b)
assert df.frame_equal(out, null_equal=True)
assert_frame_equal(df, out)


def test_pickle_expr() -> None:
Expand Down
3 changes: 2 additions & 1 deletion py-polars/tests/unit/io/test_pyarrow_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import pytest

import polars as pl
from polars.testing import assert_frame_equal


@typing.no_type_check
Expand All @@ -17,7 +18,7 @@ def helper_dataset_test(file_path: Path, query) -> None:

expected = query(pl.scan_ipc(file_path))
out = query(pl.scan_ds(dset))
assert out.frame_equal(expected)
assert_frame_equal(out, expected)


@pytest.mark.xfail(sys.platform == "win32", reason="Does not work on Windows")
Expand Down
Loading