From e0de3348b1958905e870009151beb5ba43e5a8b4 Mon Sep 17 00:00:00 2001 From: elijahbenizzy Date: Thu, 2 Mar 2023 14:11:38 -0800 Subject: [PATCH 1/2] Adds support for from __future__ import annotations Luckily the code we have is fairly clean -- this doesn't have a big blast radius. In reality, it should be one spot we make the changes, but we'll have to refactor this later. With __future__ import annotations, python now allows you to lazily evaluate annotations. In <3.10, there is no way to get an inspect.signature with fully evaluated annotations, so we have to use typing.get_type_hints. In 3.10, there's a parameter to force-evaluate signatures, so we can switch to use that when we deprecate 3.9 in quite a while. This is tested in the smoke-screen module -- testing is tricky due to complexity of "future statements". We have to hack up imports, so I chose only to do that in one place. --- hamilton/function_modifiers/expanders.py | 5 +-- hamilton/function_modifiers/macros.py | 5 +-- hamilton/graph.py | 10 ++++-- hamilton/node.py | 10 +++--- tests/test_end_to_end.py | 41 ++++++++++++++++++++++-- 5 files changed, 58 insertions(+), 13 deletions(-) diff --git a/hamilton/function_modifiers/expanders.py b/hamilton/function_modifiers/expanders.py index 5b6b4a59b..bcf0bde84 100644 --- a/hamilton/function_modifiers/expanders.py +++ b/hamilton/function_modifiers/expanders.py @@ -1,6 +1,7 @@ import dataclasses import functools import inspect +import typing from typing import Any, Callable, Collection, Dict, Tuple, Union import typing_inspect @@ -423,7 +424,7 @@ def validate_return_type(fn: Callable): """Validates that the return type of the function is a pandas dataframe. :param fn: Function to validate """ - output_type = inspect.signature(fn).return_annotation + output_type = typing.get_type_hints(fn).get("return") try: registry.get_column_type_from_df_type(output_type) except NotImplementedError: @@ -540,7 +541,7 @@ def validate(self, fn: Callable): :param fn: Function to validate. :raises: InvalidDecoratorException If the function is not annotated with a dict or typing.Dict type as output. """ - output_type = inspect.signature(fn).return_annotation + output_type = typing.get_type_hints(fn).get("return") if typing_inspect.is_generic_type(output_type): base_type = typing_inspect.get_origin(output_type) if ( diff --git a/hamilton/function_modifiers/macros.py b/hamilton/function_modifiers/macros.py index ba577e0d2..1f1f2dd1f 100644 --- a/hamilton/function_modifiers/macros.py +++ b/hamilton/function_modifiers/macros.py @@ -1,4 +1,5 @@ import inspect +import typing from typing import Any, Callable, Dict, List, Type, Union import pandas as pd @@ -229,7 +230,7 @@ def validate(self, fn: Callable): ensure_function_empty(fn) # it has to look exactly signature = inspect.signature(fn) - if not issubclass(signature.return_annotation, pd.Series): + if not issubclass(typing.get_type_hints(fn).get("return"), pd.Series): raise base.InvalidDecoratorException( "Models must declare their return type as a pandas Series" ) @@ -250,7 +251,7 @@ def generate_nodes(self, fn: Callable, config: Dict[str, Any] = None) -> List[no return [ node.Node( name=fn_name, - typ=inspect.signature(fn).return_annotation, + typ=typing.get_type_hints(fn).get("return"), doc_string=fn.__doc__, callabl=transform.compute, input_types={dep: pd.Series for dep in transform.get_dependents()}, diff --git a/hamilton/graph.py b/hamilton/graph.py index d2bbca755..76b2f460c 100644 --- a/hamilton/graph.py +++ b/hamilton/graph.py @@ -54,7 +54,10 @@ def add_dependency( def create_function_graph( - *modules: ModuleType, config: Dict[str, Any], adapter: base.HamiltonGraphAdapter + *modules: ModuleType, + config: Dict[str, Any], + adapter: base.HamiltonGraphAdapter, + fg: Optional["FunctionGraph"] = None, ) -> Dict[str, node.Node]: """Creates a graph of all available functions & their dependencies. :param modules: A set of modules over which one wants to compute the function graph @@ -63,7 +66,10 @@ def create_function_graph( :return: list of nodes in the graph. If it needs to be more complicated, we'll return an actual networkx graph and get all the rest of the logic for free """ - nodes = {} # name -> Node + if fg is None: + nodes = {} # name -> Node + else: + nodes = fg.nodes functions = sum([find_functions(module) for module in modules], []) # create nodes -- easier to just create this in one loop diff --git a/hamilton/node.py b/hamilton/node.py index 505c51020..e539412c8 100644 --- a/hamilton/node.py +++ b/hamilton/node.py @@ -1,4 +1,5 @@ import inspect +import typing from enum import Enum from typing import Any, Callable, Dict, List, Tuple, Type, Union @@ -82,14 +83,15 @@ def __init__( for key, value in input_types.items() } else: + input_types = typing.get_type_hints(callabl) signature = inspect.signature(callabl) for key, value in signature.parameters.items(): - if value.annotation == inspect._empty: + if key not in input_types: raise ValueError( f"Missing type hint for {key} in function {name}. Please add one to fix." ) self._input_types[key] = ( - value.annotation, + input_types[key], DependencyType.from_parameter(value), ) elif self.user_defined: @@ -188,11 +190,11 @@ def from_fn(fn: Callable, name: str = None) -> "Node": """ if name is None: name = fn.__name__ - sig = inspect.signature(fn) + return_type = typing.get_type_hints(fn)["return"] module = inspect.getmodule(fn).__name__ return Node( name, - sig.return_annotation, + return_type, fn.__doc__ if fn.__doc__ else "", callabl=fn, tags={"module": module}, diff --git a/tests/test_end_to_end.py b/tests/test_end_to_end.py index bcdc8cd2f..39bf36011 100644 --- a/tests/test_end_to_end.py +++ b/tests/test_end_to_end.py @@ -1,8 +1,12 @@ +import importlib +import sys + import pytest import hamilton.driver import tests.resources.data_quality -import tests.resources.smoke_screen_module + +# import tests.resources.smoke_screen_module from hamilton.data_quality.base import DataValidationError, ValidationResult @@ -32,9 +36,40 @@ def test_data_quality_workflow_fails(): ) -def test_smoke_screen_module(): +# Adapted from https://stackoverflow.com/questions/41858147/how-to-modify-imported-source-code-on-the-fly +# This is needed to decide whether to import annotations... +def modify_and_import(module_name, package, modification_func): + spec = importlib.util.find_spec(module_name, package) + source = spec.loader.get_source(module_name) + new_source = modification_func(source) + module = importlib.util.module_from_spec(spec) + codeobj = compile(new_source, module.__spec__.origin, "exec") + exec(codeobj, module.__dict__) + sys.modules[module_name] = module + return module + + +@pytest.mark.parametrize( + "future_import_annotations", + [ + True, + False, + ], +) +def test_smoke_screen_module(future_import_annotations, monkeypatch): + # Monkeypatch the env + # This tells the smoke screen module whether to use the future import + modification_func = ( + lambda source: "\n".join(["from __future__ import annotations"] + source.splitlines()) + if future_import_annotations + else source + ) + # module = importlib.reload(tests.resources.smoke_screen_module) + module = modify_and_import( + "tests.resources.smoke_screen_module", tests.resources, modification_func + ) config = {"region": "US"} - dr = hamilton.driver.Driver(config, tests.resources.smoke_screen_module) + dr = hamilton.driver.Driver(config, module) output_columns = [ "raw_acquisition_cost", "pessimistic_net_acquisition_cost", From 9a3991fc3529dc9b5343878ed70f71ff0ec17ce5 Mon Sep 17 00:00:00 2001 From: elijahbenizzy Date: Sat, 4 Mar 2023 12:28:26 -0800 Subject: [PATCH 2/2] Adds caveats section to hamilton documentation Currently there's just a note about postponed evaluation of annotations, but we can add more here over time. This is meant to be specific API reference stuff that doesn't really fit elsewhere. --- docs/reference/api-reference/caveats.md | 9 +++++++++ docs/reference/api-reference/index.rst | 3 +++ 2 files changed, 12 insertions(+) create mode 100644 docs/reference/api-reference/caveats.md diff --git a/docs/reference/api-reference/caveats.md b/docs/reference/api-reference/caveats.md new file mode 100644 index 000000000..120e15746 --- /dev/null +++ b/docs/reference/api-reference/caveats.md @@ -0,0 +1,9 @@ +# Caveats + +## Delayed evaluation of annotation + +Hamilton works with [PEP-563](https://peps.python.org/pep-0563/), postponed evaluation of annotations. +That said, it *does* force evaluation of type-hints when building the function graph. So, if you're using +particularly complex/slow to load python types and expecting delay, know that they will have to be evaluted +when the driver is instantiated and modules are passed, so Hamilton can inspect the types and build the +function graph. diff --git a/docs/reference/api-reference/index.rst b/docs/reference/api-reference/index.rst index 9f910c5ec..c9421f1a4 100644 --- a/docs/reference/api-reference/index.rst +++ b/docs/reference/api-reference/index.rst @@ -10,6 +10,7 @@ API Reference graph-adapters drivers disabling-telemetry + caveats Here lies documentation for the public API you can build on top of. @@ -23,3 +24,5 @@ See :doc:`graph-adapters` for ways to execute Hamilton. See :doc:`drivers` for how to call and instantiate a Hamilton dataflow. Right now there are only two! See :doc:`disabling-telemetry` for how to disable telemetry. + +See :doc:`caveats` for implementation details/design decisions that may impact your use of Hamilton.