From b790f390c4c513cc215282e7f1dce9426e455e0b Mon Sep 17 00:00:00 2001 From: elijahbenizzy Date: Mon, 3 Apr 2023 08:46:51 -0700 Subject: [PATCH 1/3] Adds support for #129 We will likely need to rejigger this over time, but this is a quick way to get support working. Note that we don't actually check the dictionary type, but we're close enough. --- hamilton/base.py | 8 +++++++- tests/test_base.py | 27 +++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/hamilton/base.py b/hamilton/base.py index 5debad766..a4a8b76a6 100644 --- a/hamilton/base.py +++ b/hamilton/base.py @@ -423,7 +423,13 @@ class SimplePythonDataFrameGraphAdapter(HamiltonGraphAdapter, PandasDataFrameRes def check_input_type(node_type: Type, input_value: Any) -> bool: if node_type == Any: return True - elif inspect.isclass(node_type) and isinstance(input_value, node_type): + # In the case of dict[str, Any] (or equivalent) in python 3.9 + + # we need to double-check that its not generic, as the isinstance clause will break this + elif ( + inspect.isclass(node_type) + and not typing_inspect.is_generic_type(node_type) + and isinstance(input_value, node_type) + ): return True elif typing_inspect.is_typevar(node_type): # skip runtime comparison for now. return True diff --git a/tests/test_base.py b/tests/test_base.py index d8b6a79c2..1401a8581 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -1,4 +1,5 @@ import collections +import sys import typing import numpy as np @@ -85,6 +86,32 @@ def test_SimplePythonDataFrameGraphAdapter_check_input_type_match(node_type, inp assert actual is True +# We cannot parameterize this as the parameterization cannot be +# included if the +@pytest.mark.skipif( + sys.version_info < (3, 9, 0), + reason="Type hinting generics in standard collections " "is only supported in 3.9+", +) +def test_SimplePythonDataFrameGraphAdapter_subscripted_generics_dict_str_Any(): + """Tests check_input_type of SimplePythonDataFrameGraphAdapter""" + adapter = base.SimplePythonDataFrameGraphAdapter() + actual = adapter.check_input_type(dict[str, typing.Any], {}) + assert actual is True + + +# We cannot parameterize this as the parameterization cannot be +# included if the +@pytest.mark.skipif( + sys.version_info < (3, 9, 0), + reason="Type hinting generics in standard collections " "is only supported in 3.9+", +) +def test_SimplePythonDataFrameGraphAdapter_subscripted_generics_list_Any(): + """Tests check_input_type of SimplePythonDataFrameGraphAdapter""" + adapter = base.SimplePythonDataFrameGraphAdapter() + actual = adapter.check_input_type(list[typing.Any], []) + assert actual is True + + @pytest.mark.parametrize( "node_type,input_value", [ From c58fc902f711de82fd09ab4224708f3ff54adee5 Mon Sep 17 00:00:00 2001 From: elijahbenizzy Date: Mon, 3 Apr 2023 09:07:15 -0700 Subject: [PATCH 2/3] Fixes for pandas 2.0 1. Index type in unit tests was incorrect 2. .mean() now does not filter non-numeric columns 3. pyspark appears not to be compatible with pandas 2.0 --- setup.py | 3 ++- tests/test_base.py | 28 ++++++++++++++++++++++++++-- tests/test_end_to_end.py | 8 ++++---- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/setup.py b/setup.py index 59ceed67e..01c8284f5 100644 --- a/setup.py +++ b/setup.py @@ -74,7 +74,8 @@ def load_requirements(): "dask-diagnostics": ["dask[diagnostics]"], "dask-distributed": ["dask[distributed]"], "ray": ["ray>=2.0.0", "pyarrow"], - "pyspark": ["pyspark[pandas_on_spark]"], + "pyspark": ["pyspark[pandas_on_spark]", "pandas<2.0"], # I'm sure they'll add support soon, + # but for now its not compatible "pandera": ["pandera"], }, # Relevant project URLs diff --git a/tests/test_base.py b/tests/test_base.py index 1401a8581..320038a51 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -372,6 +372,12 @@ def test_PandasDataFrameResult_build_dataframe_with_dataframes(outputs, expected pd.testing.assert_frame_equal(actual, expected_result) +# we have to support this in the tests as pandas 2.0 doesn't support python 3.7 and we still do +int_64_index = "Index:::int64" if pd.__version__ >= "2.0.0" else "RangeIndex:::int64" + +PD_VERSION = tuple(int(item) for item in pd.__version__.split(".")) + + @pytest.mark.parametrize( "outputs,expected_result", [ @@ -412,7 +418,24 @@ def test_PandasDataFrameResult_build_dataframe_with_dataframes(outputs, expected ), ), ({"a": pd.DataFrame({"a": [1, 2, 3]})}, ({"RangeIndex:::int64": ["a"]}, {}, {})), - ({"a": pd.Series([1, 2, 3]).index}, ({"Int64Index:::int64": ["a"]}, {}, {})), + pytest.param( + {"a": pd.Series([1, 2, 3]).index}, + ({"Index:::int64": ["a"]}, {}, {}), + marks=pytest.mark.skipif( + PD_VERSION < (2, 0, 0), + reason="Pandas 2.0 changed default indices but we have to " + "support old pandas in unit tests due to python 3.7 support", + ), + ), + pytest.param( + {"a": pd.Series([1, 2, 3]).index}, + ({"Int64Index:::int64": ["a"]}, {}, {}), + marks=pytest.mark.skipif( + PD_VERSION >= (2, 0, 0), + reason="Pandas 2.0 changed default indices but we have to " + "support old pandas in unit tests due to python 3.7 support", + ), + ), ], ids=[ "int-index", @@ -421,7 +444,8 @@ def test_PandasDataFrameResult_build_dataframe_with_dataframes(outputs, expected "no-index", "multiple-different-indexes", "df-index", - "index-object", + "index-object-3-7", + "index-object-3-8-plus", ], ) def test_PandasDataFrameResult_pandas_index_types(outputs, expected_result): diff --git a/tests/test_end_to_end.py b/tests/test_end_to_end.py index e740416ec..6fd2ca2fc 100644 --- a/tests/test_end_to_end.py +++ b/tests/test_end_to_end.py @@ -90,10 +90,10 @@ def test_smoke_screen_module(future_import_annotations, monkeypatch): final_vars=output_columns, ) epsilon = 0.00001 - assert abs(df.mean()["raw_acquisition_cost"] - 0.393808) < epsilon - assert abs(df.mean()["pessimistic_net_acquisition_cost"] - 0.420769) < epsilon - assert abs(df.mean()["neutral_net_acquisition_cost"] - 0.405582) < epsilon - assert abs(df.mean()["optimistic_net_acquisition_cost"] - 0.399363) < epsilon + assert abs(df["raw_acquisition_cost"].mean() - 0.393808) < epsilon + assert abs(df["pessimistic_net_acquisition_cost"].mean() - 0.420769) < epsilon + assert abs(df["neutral_net_acquisition_cost"].mean() - 0.405582) < epsilon + assert abs(df["optimistic_net_acquisition_cost"].mean() - 0.399363) < epsilon assert df["series_with_start_date_end_date"].iloc[0] == "date_20200101_date_20220801" From 306fce7437f44aa72ab7cfbeb6aaa0bff0896177 Mon Sep 17 00:00:00 2001 From: elijahbenizzy Date: Mon, 3 Apr 2023 09:44:08 -0700 Subject: [PATCH 3/3] Bumps version from 1.21.0 to 1.21.1 --- hamilton/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hamilton/version.py b/hamilton/version.py index a01020b55..40a4cd05d 100644 --- a/hamilton/version.py +++ b/hamilton/version.py @@ -1 +1 @@ -VERSION = (1, 21, 0) +VERSION = (1, 21, 1)