DAGWorks-Inc · elijahbenizzy · Apr 3, 2023 · Apr 3, 2023 · Apr 3, 2023 · Apr 3, 2023
diff --git a/hamilton/base.py b/hamilton/base.py
@@ -423,7 +423,13 @@ class SimplePythonDataFrameGraphAdapter(HamiltonGraphAdapter, PandasDataFrameRes
     def check_input_type(node_type: Type, input_value: Any) -> bool:
         if node_type == Any:
             return True
-        elif inspect.isclass(node_type) and isinstance(input_value, node_type):
+        # In the case of dict[str, Any] (or equivalent) in python 3.9 +
+        # we need to double-check that its not generic, as the isinstance clause will break this
+        elif (
+            inspect.isclass(node_type)
+            and not typing_inspect.is_generic_type(node_type)
+            and isinstance(input_value, node_type)
+        ):
             return True
         elif typing_inspect.is_typevar(node_type):  # skip runtime comparison for now.
             return True

diff --git a/setup.py b/setup.py
@@ -74,7 +74,8 @@ def load_requirements():
         "dask-diagnostics": ["dask[diagnostics]"],
         "dask-distributed": ["dask[distributed]"],
         "ray": ["ray>=2.0.0", "pyarrow"],
-        "pyspark": ["pyspark[pandas_on_spark]"],
+        "pyspark": ["pyspark[pandas_on_spark]", "pandas<2.0"],  # I'm sure they'll add support soon,
+        # but for now its not compatible
         "pandera": ["pandera"],
     },
     # Relevant project URLs

diff --git a/tests/test_base.py b/tests/test_base.py
@@ -1,4 +1,5 @@
 import collections
+import sys
 import typing
 
 import numpy as np
@@ -85,6 +86,32 @@ def test_SimplePythonDataFrameGraphAdapter_check_input_type_match(node_type, inp
     assert actual is True
 
 
+# We cannot parameterize this as the parameterization cannot be
+# included if the
+@pytest.mark.skipif(
+    sys.version_info < (3, 9, 0),
+    reason="Type hinting generics in standard collections " "is only supported in 3.9+",
+)
+def test_SimplePythonDataFrameGraphAdapter_subscripted_generics_dict_str_Any():
+    """Tests check_input_type of SimplePythonDataFrameGraphAdapter"""
+    adapter = base.SimplePythonDataFrameGraphAdapter()
+    actual = adapter.check_input_type(dict[str, typing.Any], {})
+    assert actual is True
+
+
+# We cannot parameterize this as the parameterization cannot be
+# included if the
+@pytest.mark.skipif(
+    sys.version_info < (3, 9, 0),
+    reason="Type hinting generics in standard collections " "is only supported in 3.9+",
+)
+def test_SimplePythonDataFrameGraphAdapter_subscripted_generics_list_Any():
+    """Tests check_input_type of SimplePythonDataFrameGraphAdapter"""
+    adapter = base.SimplePythonDataFrameGraphAdapter()
+    actual = adapter.check_input_type(list[typing.Any], [])
+    assert actual is True
+
+
 @pytest.mark.parametrize(
     "node_type,input_value",
     [
@@ -345,6 +372,12 @@ def test_PandasDataFrameResult_build_dataframe_with_dataframes(outputs, expected
     pd.testing.assert_frame_equal(actual, expected_result)
 
 
+# we have to support this in the tests as pandas 2.0 doesn't support python 3.7 and we still do
+int_64_index = "Index:::int64" if pd.__version__ >= "2.0.0" else "RangeIndex:::int64"
+
+PD_VERSION = tuple(int(item) for item in pd.__version__.split("."))
+
+
 @pytest.mark.parametrize(
     "outputs,expected_result",
     [
@@ -385,7 +418,24 @@ def test_PandasDataFrameResult_build_dataframe_with_dataframes(outputs, expected
             ),
         ),
         ({"a": pd.DataFrame({"a": [1, 2, 3]})}, ({"RangeIndex:::int64": ["a"]}, {}, {})),
-        ({"a": pd.Series([1, 2, 3]).index}, ({"Int64Index:::int64": ["a"]}, {}, {})),
+        pytest.param(
+            {"a": pd.Series([1, 2, 3]).index},
+            ({"Index:::int64": ["a"]}, {}, {}),
+            marks=pytest.mark.skipif(
+                PD_VERSION < (2, 0, 0),
+                reason="Pandas 2.0 changed default indices but we have to "
+                "support old pandas in unit tests due to python 3.7 support",
+            ),
+        ),
+        pytest.param(
+            {"a": pd.Series([1, 2, 3]).index},
+            ({"Int64Index:::int64": ["a"]}, {}, {}),
+            marks=pytest.mark.skipif(
+                PD_VERSION >= (2, 0, 0),
+                reason="Pandas 2.0 changed default indices but we have to "
+                "support old pandas in unit tests due to python 3.7 support",
+            ),
+        ),
     ],
     ids=[
         "int-index",
@@ -394,7 +444,8 @@ def test_PandasDataFrameResult_build_dataframe_with_dataframes(outputs, expected
         "no-index",
         "multiple-different-indexes",
         "df-index",
-        "index-object",
+        "index-object-3-7",
+        "index-object-3-8-plus",
     ],
 )
 def test_PandasDataFrameResult_pandas_index_types(outputs, expected_result):

diff --git a/tests/test_end_to_end.py b/tests/test_end_to_end.py
@@ -90,10 +90,10 @@ def test_smoke_screen_module(future_import_annotations, monkeypatch):
         final_vars=output_columns,
     )
     epsilon = 0.00001
-    assert abs(df.mean()["raw_acquisition_cost"] - 0.393808) < epsilon
-    assert abs(df.mean()["pessimistic_net_acquisition_cost"] - 0.420769) < epsilon
-    assert abs(df.mean()["neutral_net_acquisition_cost"] - 0.405582) < epsilon
-    assert abs(df.mean()["optimistic_net_acquisition_cost"] - 0.399363) < epsilon
+    assert abs(df["raw_acquisition_cost"].mean() - 0.393808) < epsilon
+    assert abs(df["pessimistic_net_acquisition_cost"].mean() - 0.420769) < epsilon
+    assert abs(df["neutral_net_acquisition_cost"].mean() - 0.405582) < epsilon
+    assert abs(df["optimistic_net_acquisition_cost"].mean() - 0.399363) < epsilon
     assert df["series_with_start_date_end_date"].iloc[0] == "date_20200101_date_20220801"