Skip to content

Commit

Permalink
test(python): More test restructure (#6961)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Feb 17, 2023
1 parent 01126f2 commit cec13c3
Show file tree
Hide file tree
Showing 33 changed files with 498 additions and 460 deletions.
1 change: 1 addition & 0 deletions py-polars/tests/unit/datatypes/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Test module for testing behaviour of specific data types in various operations."""
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -61,61 +61,6 @@ def test_read_csv_categorical() -> None:
assert df["col1"].dtype == pl.Categorical


def test_categorical_lexical_sort() -> None:
df = pl.DataFrame(
{"cats": ["z", "z", "k", "a", "b"], "vals": [3, 1, 2, 2, 3]}
).with_columns(
[
pl.col("cats").cast(pl.Categorical).cat.set_ordering("lexical"),
]
)

out = df.sort(["cats"])
assert out["cats"].dtype == pl.Categorical
expected = pl.DataFrame(
{"cats": ["a", "b", "k", "z", "z"], "vals": [2, 3, 2, 3, 1]}
)
assert_frame_equal(out.with_columns(pl.col("cats").cast(pl.Utf8)), expected)
out = df.sort(["cats", "vals"])
expected = pl.DataFrame(
{"cats": ["a", "b", "k", "z", "z"], "vals": [2, 3, 2, 1, 3]}
)
assert_frame_equal(out.with_columns(pl.col("cats").cast(pl.Utf8)), expected)
out = df.sort(["vals", "cats"])

expected = pl.DataFrame(
{"cats": ["z", "a", "k", "b", "z"], "vals": [1, 2, 2, 3, 3]}
)
assert_frame_equal(out.with_columns(pl.col("cats").cast(pl.Utf8)), expected)


def test_categorical_lexical_ordering_after_concat() -> None:
with pl.StringCache():
ldf1 = (
pl.DataFrame([pl.Series("key1", [8, 5]), pl.Series("key2", ["fox", "baz"])])
.lazy()
.with_columns(
pl.col("key2").cast(pl.Categorical).cat.set_ordering("lexical")
)
)
ldf2 = (
pl.DataFrame(
[pl.Series("key1", [6, 8, 6]), pl.Series("key2", ["fox", "foo", "bar"])]
)
.lazy()
.with_columns(
pl.col("key2").cast(pl.Categorical).cat.set_ordering("lexical")
)
)
df = (
pl.concat([ldf1, ldf2])
.with_columns(pl.col("key2").cat.set_ordering("lexical"))
.collect()
)

df.sort(["key1", "key2"])


def test_cat_to_dummies() -> None:
df = pl.DataFrame({"foo": [1, 2, 3, 4], "bar": ["a", "b", "a", "c"]})
df = df.with_columns(pl.col("bar").cast(pl.Categorical))
Expand Down Expand Up @@ -295,23 +240,6 @@ def test_categorical_in_struct_nulls() -> None:
assert s[2] == {"job": "waiter", "counts": 1}


def test_sort_categoricals_6014() -> None:
with pl.StringCache():
# create basic categorical
df1 = pl.DataFrame({"key": ["bbb", "aaa", "ccc"]}).with_columns(
pl.col("key").cast(pl.Categorical)
)
# create lexically-ordered categorical
df2 = pl.DataFrame({"key": ["bbb", "aaa", "ccc"]}).with_columns(
pl.col("key").cast(pl.Categorical).cat.set_ordering("lexical")
)

out = df1.sort("key")
assert out.to_dict(False) == {"key": ["bbb", "aaa", "ccc"]}
out = df2.sort("key")
assert out.to_dict(False) == {"key": ["aaa", "bbb", "ccc"]}


def test_cast_inner_categorical() -> None:
dtype = pl.List(pl.Categorical)
out = pl.Series("foo", [["a"], ["a", "b"]]).cast(dtype)
Expand Down
Loading

0 comments on commit cec13c3

Please sign in to comment.