Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Supports delayed evaluation of annotations #92

Merged
merged 2 commits into from
Mar 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions docs/reference/api-reference/caveats.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Caveats

## Delayed evaluation of annotation

Hamilton works with [PEP-563](https://peps.python.org/pep-0563/), postponed evaluation of annotations.
That said, it *does* force evaluation of type-hints when building the function graph. So, if you're using
particularly complex/slow to load python types and expecting delay, know that they will have to be evaluted
when the driver is instantiated and modules are passed, so Hamilton can inspect the types and build the
function graph.
3 changes: 3 additions & 0 deletions docs/reference/api-reference/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ API Reference
graph-adapters
drivers
disabling-telemetry
caveats

Here lies documentation for the public API you can build on top of.

Expand All @@ -23,3 +24,5 @@ See :doc:`graph-adapters` for ways to execute Hamilton.
See :doc:`drivers` for how to call and instantiate a Hamilton dataflow. Right now there are only two!

See :doc:`disabling-telemetry` for how to disable telemetry.

See :doc:`caveats` for implementation details/design decisions that may impact your use of Hamilton.
5 changes: 3 additions & 2 deletions hamilton/function_modifiers/expanders.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import dataclasses
import functools
import inspect
import typing
from typing import Any, Callable, Collection, Dict, Tuple, Union

import typing_inspect
Expand Down Expand Up @@ -423,7 +424,7 @@ def validate_return_type(fn: Callable):
"""Validates that the return type of the function is a pandas dataframe.
:param fn: Function to validate
"""
output_type = inspect.signature(fn).return_annotation
output_type = typing.get_type_hints(fn).get("return")
try:
registry.get_column_type_from_df_type(output_type)
except NotImplementedError:
Expand Down Expand Up @@ -540,7 +541,7 @@ def validate(self, fn: Callable):
:param fn: Function to validate.
:raises: InvalidDecoratorException If the function is not annotated with a dict or typing.Dict type as output.
"""
output_type = inspect.signature(fn).return_annotation
output_type = typing.get_type_hints(fn).get("return")
if typing_inspect.is_generic_type(output_type):
base_type = typing_inspect.get_origin(output_type)
if (
Expand Down
5 changes: 3 additions & 2 deletions hamilton/function_modifiers/macros.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import inspect
import typing
from typing import Any, Callable, Dict, List, Type, Union

import pandas as pd
Expand Down Expand Up @@ -229,7 +230,7 @@ def validate(self, fn: Callable):

ensure_function_empty(fn) # it has to look exactly
signature = inspect.signature(fn)
if not issubclass(signature.return_annotation, pd.Series):
if not issubclass(typing.get_type_hints(fn).get("return"), pd.Series):
raise base.InvalidDecoratorException(
"Models must declare their return type as a pandas Series"
)
Expand All @@ -250,7 +251,7 @@ def generate_nodes(self, fn: Callable, config: Dict[str, Any] = None) -> List[no
return [
node.Node(
name=fn_name,
typ=inspect.signature(fn).return_annotation,
typ=typing.get_type_hints(fn).get("return"),
doc_string=fn.__doc__,
callabl=transform.compute,
input_types={dep: pd.Series for dep in transform.get_dependents()},
Expand Down
10 changes: 8 additions & 2 deletions hamilton/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@ def add_dependency(


def create_function_graph(
*modules: ModuleType, config: Dict[str, Any], adapter: base.HamiltonGraphAdapter
*modules: ModuleType,
config: Dict[str, Any],
adapter: base.HamiltonGraphAdapter,
fg: Optional["FunctionGraph"] = None,
) -> Dict[str, node.Node]:
"""Creates a graph of all available functions & their dependencies.
:param modules: A set of modules over which one wants to compute the function graph
Expand All @@ -63,7 +66,10 @@ def create_function_graph(
:return: list of nodes in the graph.
If it needs to be more complicated, we'll return an actual networkx graph and get all the rest of the logic for free
"""
nodes = {} # name -> Node
if fg is None:
nodes = {} # name -> Node
else:
nodes = fg.nodes
functions = sum([find_functions(module) for module in modules], [])

# create nodes -- easier to just create this in one loop
Expand Down
10 changes: 6 additions & 4 deletions hamilton/node.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import inspect
import typing
from enum import Enum
from typing import Any, Callable, Dict, List, Tuple, Type, Union

Expand Down Expand Up @@ -82,14 +83,15 @@ def __init__(
for key, value in input_types.items()
}
else:
input_types = typing.get_type_hints(callabl)
signature = inspect.signature(callabl)
for key, value in signature.parameters.items():
if value.annotation == inspect._empty:
if key not in input_types:
raise ValueError(
f"Missing type hint for {key} in function {name}. Please add one to fix."
)
self._input_types[key] = (
value.annotation,
input_types[key],
DependencyType.from_parameter(value),
)
elif self.user_defined:
Expand Down Expand Up @@ -188,11 +190,11 @@ def from_fn(fn: Callable, name: str = None) -> "Node":
"""
if name is None:
name = fn.__name__
sig = inspect.signature(fn)
return_type = typing.get_type_hints(fn)["return"]
module = inspect.getmodule(fn).__name__
return Node(
name,
sig.return_annotation,
return_type,
fn.__doc__ if fn.__doc__ else "",
callabl=fn,
tags={"module": module},
Expand Down
41 changes: 38 additions & 3 deletions tests/test_end_to_end.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import importlib
import sys

import pytest

import hamilton.driver
import tests.resources.data_quality
import tests.resources.smoke_screen_module

# import tests.resources.smoke_screen_module
from hamilton.data_quality.base import DataValidationError, ValidationResult


Expand Down Expand Up @@ -32,9 +36,40 @@ def test_data_quality_workflow_fails():
)


def test_smoke_screen_module():
# Adapted from https://stackoverflow.com/questions/41858147/how-to-modify-imported-source-code-on-the-fly
# This is needed to decide whether to import annotations...
def modify_and_import(module_name, package, modification_func):
spec = importlib.util.find_spec(module_name, package)
source = spec.loader.get_source(module_name)
new_source = modification_func(source)
module = importlib.util.module_from_spec(spec)
codeobj = compile(new_source, module.__spec__.origin, "exec")
exec(codeobj, module.__dict__)
sys.modules[module_name] = module
return module


@pytest.mark.parametrize(
"future_import_annotations",
[
True,
False,
],
)
def test_smoke_screen_module(future_import_annotations, monkeypatch):
# Monkeypatch the env
# This tells the smoke screen module whether to use the future import
modification_func = (
lambda source: "\n".join(["from __future__ import annotations"] + source.splitlines())
if future_import_annotations
else source
)
# module = importlib.reload(tests.resources.smoke_screen_module)
module = modify_and_import(
"tests.resources.smoke_screen_module", tests.resources, modification_func
)
config = {"region": "US"}
dr = hamilton.driver.Driver(config, tests.resources.smoke_screen_module)
dr = hamilton.driver.Driver(config, module)
output_columns = [
"raw_acquisition_cost",
"pessimistic_net_acquisition_cost",
Expand Down