Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

@ignore #1172

Merged
merged 3 commits into from
Oct 12, 2024
Merged

@ignore #1172

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docs/reference/decorators/config_when.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ Note the following:

* ``@config`` If you're feeling adventurous, you can pass in a lambda function that takes in the entire configuration and resolves to ``True`` or ``False``. You probably don't want to do this.

* To always exclude a function (such as helper functions) from the DAG the most straightforward and preferred pattern is to prefix it with "_", but you can also use ``@hamilton_exclude``.

----

**Reference Documentation**

.. autoclass:: hamilton.function_modifiers.config
:members: when, when_in, when_not, when_not_in
:special-members: __init__

.. autoclass:: hamilton.function_modifiers.configuration.hamilton_exclude

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def data_3() -> pd.DataFrame:

# data1 and data2
@mutate(data_1, data_2)
def _filter(some_data: pd.DataFrame) -> pd.DataFrame:
def filter_(some_data: pd.DataFrame) -> pd.DataFrame:
"""Remove NAN values.

Decorated with mutate this will be applied to both data_1 and data_2.
Expand All @@ -35,7 +35,7 @@ def _filter(some_data: pd.DataFrame) -> pd.DataFrame:
# data 2
# this is for value
@mutate(data_2, missing_row=value(["c", 145]))
def _add_missing_value(some_data: pd.DataFrame, missing_row: List[Any]) -> pd.DataFrame:
def add_missing_value(some_data: pd.DataFrame, missing_row: List[Any]) -> pd.DataFrame:
"""Add row to dataframe.

The functions decorated with mutate can be viewed as steps in pipe_output in the order they
Expand All @@ -49,7 +49,7 @@ def _add_missing_value(some_data: pd.DataFrame, missing_row: List[Any]) -> pd.Da
# data 2
# this is for source
@mutate(data_2, other_data=source("data_3"))
def _join(some_data: pd.DataFrame, other_data: pd.DataFrame) -> pd.DataFrame:
def join(some_data: pd.DataFrame, other_data: pd.DataFrame) -> pd.DataFrame:
"""Join two dataframes.

We can use results from other nodes in the DAG by using the `source` functionality. Here we join
Expand All @@ -60,7 +60,7 @@ def _join(some_data: pd.DataFrame, other_data: pd.DataFrame) -> pd.DataFrame:

# data1 and data2
@mutate(data_1, data_2)
def _sort(some_data: pd.DataFrame) -> pd.DataFrame:
def sort(some_data: pd.DataFrame) -> pd.DataFrame:
"""Sort dataframes by first column.

This is the last step of our pipeline(s) and gets again applied to data_1 and data_2. We did some
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def feat_D(field_2: pd.Series, col_2: pd.Series) -> pd.DataFrame:

# data1 and data2
@mutate(apply_to(data_1).when_in(a=[1, 2, 3]), apply_to(data_2).when_not_in(a=[1, 2, 3]))
def _filter(some_data: pd.DataFrame) -> pd.DataFrame:
def filter_(some_data: pd.DataFrame) -> pd.DataFrame:
"""Remove NAN values.

Mutate accepts a `config.*` family conditional where we can choose when the transform will be applied
Expand All @@ -66,7 +66,7 @@ def _filter(some_data: pd.DataFrame) -> pd.DataFrame:
# data 2
# this is for value
@mutate(apply_to(data_2), missing_row=value(["c", 145]))
def _add_missing_value(some_data: pd.DataFrame, missing_row: List[Any]) -> pd.DataFrame:
def add_missing_value(some_data: pd.DataFrame, missing_row: List[Any]) -> pd.DataFrame:
"""Add row to dataframe.

The functions decorated with mutate can be viewed as steps in pipe_output in the order they
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -2,47 +2,56 @@

import pandas as pd

from hamilton.function_modifiers import pipe_output, source, step, value
from hamilton.function_modifiers import hamilton_exclude, pipe_output, source, step, value


# data1 and data2
def _filter(some_data: pd.DataFrame) -> pd.DataFrame:
@hamilton_exclude
def filter_(some_data: pd.DataFrame) -> pd.DataFrame:
return some_data.dropna()


@hamilton_exclude
def test_foo(a, b, c):
return a + b + c


# data 2
# this is for value
def _add_missing_value(some_data: pd.DataFrame, missing_row: List[Any]) -> pd.DataFrame:
@hamilton_exclude
def add_missing_value(some_data: pd.DataFrame, missing_row: List[Any]) -> pd.DataFrame:
some_data.loc[-1] = missing_row
return some_data


# data 2
# this is for source
def _join(some_data: pd.DataFrame, other_data: pd.DataFrame) -> pd.DataFrame:
@hamilton_exclude
def join(some_data: pd.DataFrame, other_data: pd.DataFrame) -> pd.DataFrame:
return some_data.set_index("col_2").join(other_data.set_index("col_1"))


# data1 and data2
def _sort(some_data: pd.DataFrame) -> pd.DataFrame:
@hamilton_exclude
def sort(some_data: pd.DataFrame) -> pd.DataFrame:
columns = some_data.columns
return some_data.sort_values(by=columns[0])


@pipe_output(
step(_filter),
step(_sort),
step(filter_),
step(sort),
)
def data_1() -> pd.DataFrame:
df = pd.DataFrame.from_dict({"col_1": [3, 2, pd.NA, 0], "col_2": ["a", "b", pd.NA, "d"]})
return df


@pipe_output(
step(_filter),
step(_add_missing_value, missing_row=value(["c", 145])),
step(_join, other_data=source("data_3")),
step(_sort),
step(filter_),
step(add_missing_value, missing_row=value(["c", 145])),
step(join, other_data=source("data_3")),
step(sort),
)
def data_2() -> pd.DataFrame:
df = pd.DataFrame.from_dict(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from typing import Dict

from hamilton.function_modifiers import (
extract_fields,
hamilton_exclude,
pipe_output,
step,
)


@hamilton_exclude
def pre_step(something: int) -> int:
return something + 10


@hamilton_exclude
def post_step(something: int) -> int:
return something + 100


@hamilton_exclude
def something_else(something: int) -> int:
return something + 1000


def a() -> int:
return 10


@pipe_output(
step(something_else), # gets applied to all sink nodes
step(pre_step).named(name="transform_1").on_output("field_1"), # only applied to field_1
step(post_step)
.named(name="transform_2")
.on_output(["field_1", "field_3"]), # applied to field_1 and field_3
)
@extract_fields({"field_1": int, "field_2": int, "field_3": int})
def foo(a: int) -> Dict[str, int]:
return {"field_1": 1, "field_2": 2, "field_3": 3}
1 change: 1 addition & 0 deletions hamilton/function_modifiers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

# The config decorator
config = configuration.config
hamilton_exclude = configuration.hamilton_exclude()

# Dependency Specification
# Helper functions to specify dependency sources for parameterization
Expand Down
35 changes: 35 additions & 0 deletions hamilton/function_modifiers/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,3 +255,38 @@ def LEAD_LOG_BASS_MODEL_TIMES_TREND(

resolver = ConfigResolver.when_not_in(**key_value_group_pairs)
return config(resolver, config_used=list(resolver.optional_config))


class hamilton_exclude(base.NodeResolver):
"""Decorator class that excludes a function from the DAG.

The preferred way to hide functions from the Hamilton DAG is to prefix them with "_". However,
for the exceptional case, it can be useful for decorating helper functions without the need to prefix
them with "_" and use them either inside other nodes or in conjunction with ``step`` or ``apply_to``.

.. code-block:: python

@hamilton_exclude
def helper(...) -> ...:
'''This will not be part of the DAG'''
...

You may also want to use this decorator for excluding functions in legacy code that would raise
and error in Hamilton (for example missing type hints).
"""

def __init__(self):
pass

def resolve(self, *args, **kwargs) -> Optional[Callable]:
"""Returning None defaults to not be included in the DAG.

:param fn: Function to resolve
:param config: DAG config
:return: None to not be included in the DAG
"""
return None

def validate(self, fn):
"""Any function should work."""
pass
9 changes: 3 additions & 6 deletions hamilton/function_modifiers/macros.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from hamilton import models, node
from hamilton.dev_utils.deprecation import deprecated
from hamilton.function_modifiers import base
from hamilton.function_modifiers.configuration import ConfigResolver
from hamilton.function_modifiers.configuration import ConfigResolver, hamilton_exclude
from hamilton.function_modifiers.delayed import resolve as delayed_resolve
from hamilton.function_modifiers.dependencies import (
LiteralDependency,
Expand Down Expand Up @@ -1367,11 +1367,8 @@ def __call__(self, mutating_fn: Callable):
:return: mutating_fn, to guarantee function works even when Hamilton driver is not used
"""

# TODO: We want to hide such helper function from the DAG by default, since we are manually
# adding them to the DAG in a different place
# Suggestion: ignore decorator - https://github.com/DAGWorks-Inc/hamilton/issues/1168
# if not mutating_fn.__name__.startswith("_"):
# mutating_fn.__name__ = "".join(("_", mutating_fn.__name__))
# This function will be excluded from the DAG as a node since we are inserting it manually
mutating_fn = hamilton_exclude()(mutating_fn)

if self.restrict_to_single_module:
self.validate_same_module(mutating_fn=mutating_fn)
Expand Down
11 changes: 11 additions & 0 deletions tests/function_modifiers/test_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,14 @@ def config_fn() -> int:
config_fn = annotation(config_fn)
nodes = base.resolve_nodes(config_fn, {})
assert len(nodes) == 0


def test_hamilton_exclude():
def fn_to_hamilton_exclude() -> int:
pass

decorator = function_modifiers.hamilton_exclude
hidden_fn = decorator(fn_to_hamilton_exclude)
nodes = base.resolve_nodes(hidden_fn, {})
assert decorator.resolve(fn_to_hamilton_exclude, {"key": "value"}) is None
assert len(nodes) == 0