Skip to content

Commit

Permalink
test(python): Use assert_frame_equal instead of `assert df.frame_eq…
Browse files Browse the repository at this point in the history
…ual(...)` (pola-rs#6553)
  • Loading branch information
stinodego authored and vincent committed Jan 30, 2023
1 parent 8ae47e5 commit 0176adb
Show file tree
Hide file tree
Showing 36 changed files with 502 additions and 405 deletions.
4 changes: 2 additions & 2 deletions py-polars/polars/testing/asserts.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,6 @@ def assert_frame_equal_local_categoricals(
raise AssertionError

cat_to_str = pli.col(Categorical).cast(str)
assert df_a.with_columns(cat_to_str).frame_equal(df_b.with_columns(cat_to_str))
assert_frame_equal(df_a.with_columns(cat_to_str), df_b.with_columns(cat_to_str))
cat_to_phys = pli.col(Categorical).to_physical()
assert df_a.with_columns(cat_to_phys).frame_equal(df_b.with_columns(cat_to_phys))
assert_frame_equal(df_a.with_columns(cat_to_phys), df_b.with_columns(cat_to_phys))
3 changes: 2 additions & 1 deletion py-polars/tests/db-benchmark/various.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numpy as np

import polars as pl
from polars.testing import assert_frame_equal

# https://github.com/pola-rs/polars/issues/1942
t0 = time.time()
Expand Down Expand Up @@ -84,7 +85,7 @@ def test_cross_join() -> None:
df2 = pl.DataFrame({"frame2": pl.arange(0, 100, eager=True)})
out = df2.join(df1, how="cross")
df2 = pl.DataFrame({"frame2": pl.arange(0, 101, eager=True)})
assert df2.join(df1, how="cross").slice(0, 100).frame_equal(out)
assert_frame_equal(df2.join(df1, how="cross").slice(0, 100), out)


def test_cross_join_slice_pushdown() -> None:
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/slow/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,4 @@ def test_parquet_chunks_545() -> None:

# read it with polars
polars_df = pl.read_parquet(f)
assert pl.DataFrame(df).frame_equal(polars_df)
assert_frame_equal(pl.DataFrame(df), polars_df)
9 changes: 5 additions & 4 deletions py-polars/tests/unit/io/test_avro.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pytest

import polars as pl
from polars.testing import assert_frame_equal

if TYPE_CHECKING:
from polars.internals.type_aliases import AvroCompression
Expand All @@ -27,7 +28,7 @@ def test_from_to_buffer(example_df: pl.DataFrame, compression: AvroCompression)
buf.seek(0)

read_df = pl.read_avro(buf)
assert example_df.frame_equal(read_df)
assert_frame_equal(example_df, read_df)


@pytest.mark.parametrize("compression", COMPRESSIONS)
Expand All @@ -37,7 +38,7 @@ def test_from_to_file(example_df: pl.DataFrame, compression: AvroCompression) ->
example_df.write_avro(file_path, compression=compression)
df_read = pl.read_avro(file_path)

assert example_df.frame_equal(df_read)
assert_frame_equal(example_df, df_read)


def test_select_columns() -> None:
Expand All @@ -49,7 +50,7 @@ def test_select_columns() -> None:
f.seek(0)

read_df = pl.read_avro(f, columns=["b", "c"])
assert expected.frame_equal(read_df)
assert_frame_equal(expected, read_df)


def test_select_projection() -> None:
Expand All @@ -61,4 +62,4 @@ def test_select_projection() -> None:
f.seek(0)

read_df = pl.read_avro(f, columns=[1, 2])
assert expected.frame_equal(read_df)
assert_frame_equal(expected, read_df)
30 changes: 15 additions & 15 deletions py-polars/tests/unit/io/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,31 +440,31 @@ def test_compressed_csv(io_files_path: Path) -> None:
expected = pl.DataFrame(
{"a": [1, 2, 3], "b": ["a", "b", "c"], "c": [1.0, 2.0, 3.0]}
)
assert out.frame_equal(expected)
assert_frame_equal(out, expected)

# now from disk
csv_file = io_files_path / "gzipped.csv"
out = pl.read_csv(str(csv_file))
assert out.frame_equal(expected)
assert_frame_equal(out, expected)

# now with column projection
out = pl.read_csv(csv_bytes, columns=["a", "b"])
expected = pl.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
assert out.frame_equal(expected)
assert_frame_equal(out, expected)

# zlib compression
csv_bytes = zlib.compress(csv.encode())
out = pl.read_csv(csv_bytes)
expected = pl.DataFrame(
{"a": [1, 2, 3], "b": ["a", "b", "c"], "c": [1.0, 2.0, 3.0]}
)
assert out.frame_equal(expected)
assert_frame_equal(out, expected)

# no compression
f2 = io.BytesIO(b"a,b\n1,2\n")
out2 = pl.read_csv(f2)
expected = pl.DataFrame({"a": [1], "b": [2]})
assert out2.frame_equal(expected)
assert_frame_equal(out2, expected)


def test_partial_decompression(foods_file_path: Path) -> None:
Expand Down Expand Up @@ -540,7 +540,7 @@ def test_csv_quote_char() -> None:
rolling_stones.encode(), quote_char=None, use_pyarrow=use_pyarrow
)
assert out.shape == (9, 3)
out.frame_equal(expected)
assert_frame_equal(out, expected)


def test_csv_empty_quotes_char_1622() -> None:
Expand Down Expand Up @@ -591,10 +591,10 @@ def test_csv_date_handling() -> None:
}
)
out = pl.read_csv(csv.encode(), parse_dates=True)
assert out.frame_equal(expected, null_equal=True)
assert_frame_equal(out, expected)
dtypes = {"date": pl.Date}
out = pl.read_csv(csv.encode(), dtypes=dtypes)
assert out.frame_equal(expected, null_equal=True)
assert_frame_equal(out, expected)


def test_csv_globbing(io_files_path: Path) -> None:
Expand Down Expand Up @@ -665,7 +665,7 @@ def test_empty_string_missing_round_trip() -> None:
df.write_csv(f, null_value=null)
f.seek(0)
df_read = pl.read_csv(f, null_values=null)
assert df.frame_equal(df_read)
assert_frame_equal(df, df_read)


def test_write_csv_delimiter() -> None:
Expand Down Expand Up @@ -710,7 +710,7 @@ def test_quoting_round_trip() -> None:
f.seek(0)
read_df = pl.read_csv(f)

assert read_df.frame_equal(df)
assert_frame_equal(read_df, df)


def test_fallback_chrono_parser() -> None:
Expand All @@ -732,7 +732,7 @@ def test_csv_string_escaping() -> None:
df.write_csv(f)
f.seek(0)
df_read = pl.read_csv(f)
assert df_read.frame_equal(df)
assert_frame_equal(df_read, df)


def test_glob_csv(df_no_lists: pl.DataFrame) -> None:
Expand Down Expand Up @@ -790,16 +790,16 @@ def test_csv_multiple_null_values() -> None:
}
)

assert df2.frame_equal(expected)
assert_frame_equal(df2, expected)


def test_different_eol_char() -> None:
csv = "a,1,10;b,2,20;c,3,30"
expected = pl.DataFrame(
{"column_1": ["a", "b", "c"], "column_2": [1, 2, 3], "column_3": [10, 20, 30]}
)
assert pl.read_csv(csv.encode(), eol_char=";", has_header=False).frame_equal(
expected
assert_frame_equal(
pl.read_csv(csv.encode(), eol_char=";", has_header=False), expected
)


Expand All @@ -809,7 +809,7 @@ def test_csv_write_escape_newlines() -> None:
df.write_csv(f)
f.seek(0)
read_df = pl.read_csv(f)
assert df.frame_equal(read_df)
assert_frame_equal(df, read_df)


def test_skip_new_line_embedded_lines() -> None:
Expand Down
8 changes: 4 additions & 4 deletions py-polars/tests/unit/io/test_delta.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest

import polars as pl
from polars.testing import assert_frame_equal
from polars.testing import assert_frame_equal, assert_frame_not_equal


@pytest.fixture()
Expand All @@ -23,7 +23,7 @@ def test_scan_delta_version(delta_table_path: Path) -> None:
df1 = pl.scan_delta(str(delta_table_path), version=0).collect()
df2 = pl.scan_delta(str(delta_table_path), version=1).collect()

assert not df1.frame_equal(df2)
assert_frame_not_equal(df1, df2)


def test_scan_delta_columns(delta_table_path: Path) -> None:
Expand All @@ -50,7 +50,7 @@ def test_scan_delta_relative(delta_table_path: Path) -> None:
assert_frame_equal(expected, ldf.collect(), check_dtype=False)

ldf = pl.scan_delta(rel_delta_table_path, version=1)
assert not expected.frame_equal(ldf.collect())
assert_frame_not_equal(expected, ldf.collect())


def test_read_delta(delta_table_path: Path) -> None:
Expand All @@ -64,7 +64,7 @@ def test_read_delta_version(delta_table_path: Path) -> None:
df1 = pl.read_delta(str(delta_table_path), version=0)
df2 = pl.read_delta(str(delta_table_path), version=1)

assert not df1.frame_equal(df2)
assert_frame_not_equal(df1, df2)


def test_read_delta_columns(delta_table_path: Path) -> None:
Expand Down
8 changes: 4 additions & 4 deletions py-polars/tests/unit/io/test_ipc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import pytest

import polars as pl
from polars.testing import assert_frame_equal_local_categoricals
from polars.testing import assert_frame_equal, assert_frame_equal_local_categoricals

if TYPE_CHECKING:
from polars.internals.type_aliases import IpcCompression
Expand Down Expand Up @@ -61,7 +61,7 @@ def test_select_columns_from_buffer() -> None:
f.seek(0)

read_df = pl.read_ipc(f, columns=["b", "c"], use_pyarrow=False)
assert expected.frame_equal(read_df)
assert_frame_equal(expected, read_df)


def test_select_columns_projection() -> None:
Expand All @@ -73,7 +73,7 @@ def test_select_columns_projection() -> None:
f.seek(0)

read_df = pl.read_ipc(f, columns=[1, 2], use_pyarrow=False)
assert expected.frame_equal(read_df)
assert_frame_equal(expected, read_df)


@pytest.mark.parametrize("compression", COMPRESSIONS)
Expand All @@ -85,7 +85,7 @@ def test_compressed_simple(compression: IpcCompression) -> None:
f.seek(0)

df_read = pl.read_ipc(f, use_pyarrow=False)
assert df_read.frame_equal(df)
assert_frame_equal(df_read, df)


@pytest.mark.parametrize("compression", COMPRESSIONS)
Expand Down
5 changes: 3 additions & 2 deletions py-polars/tests/unit/io/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
from typing import cast

import polars as pl
from polars.testing import assert_frame_equal


def test_copy() -> None:
df = pl.DataFrame({"a": [1, 2], "b": ["a", None], "c": [True, False]})
assert copy.copy(df).frame_equal(df, True)
assert copy.deepcopy(df).frame_equal(df, True)
assert_frame_equal(copy.copy(df), df)
assert_frame_equal(copy.deepcopy(df), df)

a = pl.Series("a", [1, 2])
assert copy.copy(a).series_equal(a, True)
Expand Down
20 changes: 10 additions & 10 deletions py-polars/tests/unit/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def test_select_columns() -> None:
f.seek(0)

read_df = pl.read_parquet(f, columns=["b", "c"], use_pyarrow=False)
assert expected.frame_equal(read_df)
assert_frame_equal(expected, read_df)


def test_select_projection() -> None:
Expand All @@ -114,7 +114,7 @@ def test_select_projection() -> None:
f.seek(0)

read_df = pl.read_parquet(f, columns=[1, 2], use_pyarrow=False)
assert expected.frame_equal(read_df)
assert_frame_equal(expected, read_df)


@pytest.mark.parametrize("compression", COMPRESSIONS)
Expand All @@ -139,7 +139,7 @@ def test_parquet_datetime(compression: ParquetCompression, use_pyarrow: bool) ->
df.write_parquet(f, use_pyarrow=use_pyarrow, compression=compression)
f.seek(0)
read = pl.read_parquet(f)
assert read.frame_equal(df)
assert_frame_equal(read, df)


def test_nested_parquet() -> None:
Expand Down Expand Up @@ -198,7 +198,7 @@ def test_chunked_round_trip() -> None:
f = io.BytesIO()
df.write_parquet(f)
f.seek(0)
assert pl.read_parquet(f).frame_equal(df)
assert_frame_equal(pl.read_parquet(f), df)


def test_lazy_self_join_file_cache_prop_3979(df: pl.DataFrame) -> None:
Expand Down Expand Up @@ -238,7 +238,7 @@ def test_nested_dictionary() -> None:
f.seek(0)

read_df = pl.read_parquet(f)
assert df.frame_equal(read_df)
assert_frame_equal(df, read_df)


def test_row_group_size_saturation() -> None:
Expand All @@ -248,7 +248,7 @@ def test_row_group_size_saturation() -> None:
# request larger chunk than rows in df
df.write_parquet(f, row_group_size=1024)
f.seek(0)
assert pl.read_parquet(f).frame_equal(df)
assert_frame_equal(pl.read_parquet(f), df)


def test_nested_sliced() -> None:
Expand All @@ -262,7 +262,7 @@ def test_nested_sliced() -> None:
f = io.BytesIO()
df.write_parquet(f)
f.seek(0)
assert pl.read_parquet(f).frame_equal(df)
assert_frame_equal(pl.read_parquet(f), df)


def test_parquet_5795() -> None:
Expand Down Expand Up @@ -295,7 +295,7 @@ def test_parquet_5795() -> None:
f = io.BytesIO()
df_pd.to_parquet(f)
f.seek(0)
assert pl.read_parquet(f).frame_equal(pl.from_pandas(df_pd))
assert_frame_equal(pl.read_parquet(f), pl.from_pandas(df_pd))


@typing.no_type_check
Expand All @@ -322,7 +322,7 @@ def test_parquet_nesting_structs_list() -> None:
df.write_parquet(f)
f.seek(0)

assert pl.read_parquet(f).frame_equal(df)
assert_frame_equal(pl.read_parquet(f), df)


@typing.no_type_check
Expand All @@ -346,7 +346,7 @@ def test_parquet_nested_dictionaries_6217() -> None:
pq.write_table(table, f, compression="snappy")
f.seek(0)
read = pl.read_parquet(f)
assert read.frame_equal(df)
assert_frame_equal(read, df)


@pytest.mark.xfail(sys.platform == "win32", reason="Does not work on Windows")
Expand Down
3 changes: 2 additions & 1 deletion py-polars/tests/unit/io/test_pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pickle

import polars as pl
from polars.testing import assert_frame_equal


def test_pickle() -> None:
Expand All @@ -14,7 +15,7 @@ def test_pickle() -> None:
df = pl.DataFrame({"a": [1, 2], "b": ["a", None], "c": [True, False]})
b = pickle.dumps(df)
out = pickle.loads(b)
assert df.frame_equal(out, null_equal=True)
assert_frame_equal(df, out)


def test_pickle_expr() -> None:
Expand Down
3 changes: 2 additions & 1 deletion py-polars/tests/unit/io/test_pyarrow_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import pytest

import polars as pl
from polars.testing import assert_frame_equal


@typing.no_type_check
Expand All @@ -17,7 +18,7 @@ def helper_dataset_test(file_path: Path, query) -> None:

expected = query(pl.scan_ipc(file_path))
out = query(pl.scan_ds(dset))
assert out.frame_equal(expected)
assert_frame_equal(out, expected)


@pytest.mark.xfail(sys.platform == "win32", reason="Does not work on Windows")
Expand Down
Loading

0 comments on commit 0176adb

Please sign in to comment.