DAGWorks-Inc · elijahbenizzy · Oct 12, 2024 · Oct 8, 2024 · Oct 8, 2024 · Oct 9, 2024
diff --git a/docs/reference/decorators/config_when.rst b/docs/reference/decorators/config_when.rst
@@ -18,11 +18,13 @@ Note the following:
 
   * ``@config`` If you're feeling adventurous, you can pass in a lambda function that takes in the entire configuration and resolves to ``True`` or ``False``. You probably don't want to do this.
 
+* To always exclude a function (such as helper functions) from the DAG the most straightforward and preferred pattern is to prefix it with "_", but you can also use ``@hamilton_exclude``.
 
 ----
 
 **Reference Documentation**
 
 .. autoclass:: hamilton.function_modifiers.config
-   :members: when, when_in, when_not, when_not_in
    :special-members: __init__
+
+.. autoclass:: hamilton.function_modifiers.configuration.hamilton_exclude
diff --git a/examples/mutate/abstract functionality blueprint/pipe_output_on_output.py b/examples/mutate/abstract functionality blueprint/pipe_output_on_output.py
diff --git a/.../abstract functionality blueprint/DAG.png → .../abstract_functionality_blueprint/DAG.png b/.../abstract functionality blueprint/DAG.png → .../abstract_functionality_blueprint/DAG.png
diff --git a/...e/abstract functionality blueprint/README → ...e/abstract_functionality_blueprint/README b/...e/abstract functionality blueprint/README → ...e/abstract_functionality_blueprint/README
diff --git a/...bstract functionality blueprint/mutate.py → ...bstract_functionality_blueprint/mutate.py b/...bstract functionality blueprint/mutate.py → ...bstract_functionality_blueprint/mutate.py
@@ -24,7 +24,7 @@ def data_3() -> pd.DataFrame:
 
 # data1 and data2
 @mutate(data_1, data_2)
-def _filter(some_data: pd.DataFrame) -> pd.DataFrame:
+def filter_(some_data: pd.DataFrame) -> pd.DataFrame:
     """Remove NAN values.
 
     Decorated with mutate this will be applied to both data_1 and data_2.
@@ -35,7 +35,7 @@ def _filter(some_data: pd.DataFrame) -> pd.DataFrame:
 # data 2
 # this is for value
 @mutate(data_2, missing_row=value(["c", 145]))
-def _add_missing_value(some_data: pd.DataFrame, missing_row: List[Any]) -> pd.DataFrame:
+def add_missing_value(some_data: pd.DataFrame, missing_row: List[Any]) -> pd.DataFrame:
     """Add row to dataframe.
 
     The functions decorated with mutate can be viewed as steps in pipe_output in the order they
@@ -49,7 +49,7 @@ def _add_missing_value(some_data: pd.DataFrame, missing_row: List[Any]) -> pd.Da
 # data 2
 # this is for source
 @mutate(data_2, other_data=source("data_3"))
-def _join(some_data: pd.DataFrame, other_data: pd.DataFrame) -> pd.DataFrame:
+def join(some_data: pd.DataFrame, other_data: pd.DataFrame) -> pd.DataFrame:
     """Join two dataframes.
 
     We can use results from other nodes in the DAG by using the `source` functionality. Here we join
@@ -60,7 +60,7 @@ def _join(some_data: pd.DataFrame, other_data: pd.DataFrame) -> pd.DataFrame:
 
 # data1 and data2
 @mutate(data_1, data_2)
-def _sort(some_data: pd.DataFrame) -> pd.DataFrame:
+def sort(some_data: pd.DataFrame) -> pd.DataFrame:
     """Sort dataframes by first column.
 
     This is the last step of our pipeline(s) and gets again applied to data_1 and data_2. We did some

diff --git a/...nctionality blueprint/mutate_on_output.py → ...nctionality_blueprint/mutate_on_output.py b/...nctionality blueprint/mutate_on_output.py → ...nctionality_blueprint/mutate_on_output.py
@@ -54,7 +54,7 @@ def feat_D(field_2: pd.Series, col_2: pd.Series) -> pd.DataFrame:
 
 # data1 and data2
 @mutate(apply_to(data_1).when_in(a=[1, 2, 3]), apply_to(data_2).when_not_in(a=[1, 2, 3]))
-def _filter(some_data: pd.DataFrame) -> pd.DataFrame:
+def filter_(some_data: pd.DataFrame) -> pd.DataFrame:
     """Remove NAN values.
 
     Mutate accepts a `config.*` family conditional where we can choose when the transform will be applied
@@ -66,7 +66,7 @@ def _filter(some_data: pd.DataFrame) -> pd.DataFrame:
 # data 2
 # this is for value
 @mutate(apply_to(data_2), missing_row=value(["c", 145]))
-def _add_missing_value(some_data: pd.DataFrame, missing_row: List[Any]) -> pd.DataFrame:
+def add_missing_value(some_data: pd.DataFrame, missing_row: List[Any]) -> pd.DataFrame:
     """Add row to dataframe.
 
     The functions decorated with mutate can be viewed as steps in pipe_output in the order they

diff --git a/...nality blueprint/mutate_twice_the_same.py → ...nality_blueprint/mutate_twice_the_same.py b/...nality blueprint/mutate_twice_the_same.py → ...nality_blueprint/mutate_twice_the_same.py
diff --git a/...ct functionality blueprint/notebook.ipynb → ...ct_functionality_blueprint/notebook.ipynb b/...ct functionality blueprint/notebook.ipynb → ...ct_functionality_blueprint/notebook.ipynb
diff --git a/...ct functionality blueprint/pipe_output.py → ...ct_functionality_blueprint/pipe_output.py b/...ct functionality blueprint/pipe_output.py → ...ct_functionality_blueprint/pipe_output.py
@@ -2,47 +2,56 @@
 
 import pandas as pd
 
-from hamilton.function_modifiers import pipe_output, source, step, value
+from hamilton.function_modifiers import hamilton_exclude, pipe_output, source, step, value
 
 
 # data1 and data2
-def _filter(some_data: pd.DataFrame) -> pd.DataFrame:
+@hamilton_exclude
+def filter_(some_data: pd.DataFrame) -> pd.DataFrame:
     return some_data.dropna()
 
 
+@hamilton_exclude
+def test_foo(a, b, c):
+    return a + b + c
+
+
 # data 2
 # this is for value
-def _add_missing_value(some_data: pd.DataFrame, missing_row: List[Any]) -> pd.DataFrame:
+@hamilton_exclude
+def add_missing_value(some_data: pd.DataFrame, missing_row: List[Any]) -> pd.DataFrame:
     some_data.loc[-1] = missing_row
     return some_data
 
 
 # data 2
 # this is for source
-def _join(some_data: pd.DataFrame, other_data: pd.DataFrame) -> pd.DataFrame:
+@hamilton_exclude
+def join(some_data: pd.DataFrame, other_data: pd.DataFrame) -> pd.DataFrame:
     return some_data.set_index("col_2").join(other_data.set_index("col_1"))
 
 
 # data1 and data2
-def _sort(some_data: pd.DataFrame) -> pd.DataFrame:
+@hamilton_exclude
+def sort(some_data: pd.DataFrame) -> pd.DataFrame:
     columns = some_data.columns
     return some_data.sort_values(by=columns[0])
 
 
 @pipe_output(
-    step(_filter),
-    step(_sort),
+    step(filter_),
+    step(sort),
 )
 def data_1() -> pd.DataFrame:
     df = pd.DataFrame.from_dict({"col_1": [3, 2, pd.NA, 0], "col_2": ["a", "b", pd.NA, "d"]})
     return df
 
 
 @pipe_output(
-    step(_filter),
-    step(_add_missing_value, missing_row=value(["c", 145])),
-    step(_join, other_data=source("data_3")),
-    step(_sort),
+    step(filter_),
+    step(add_missing_value, missing_row=value(["c", 145])),
+    step(join, other_data=source("data_3")),
+    step(sort),
 )
 def data_2() -> pd.DataFrame:
     df = pd.DataFrame.from_dict(

diff --git a/examples/mutate/abstract_functionality_blueprint/pipe_output_on_output.py b/examples/mutate/abstract_functionality_blueprint/pipe_output_on_output.py
@@ -0,0 +1,39 @@
+from typing import Dict
+
+from hamilton.function_modifiers import (
+    extract_fields,
+    hamilton_exclude,
+    pipe_output,
+    step,
+)
+
+
+@hamilton_exclude
+def pre_step(something: int) -> int:
+    return something + 10
+
+
+@hamilton_exclude
+def post_step(something: int) -> int:
+    return something + 100
+
+
+@hamilton_exclude
+def something_else(something: int) -> int:
+    return something + 1000
+
+
+def a() -> int:
+    return 10
+
+
+@pipe_output(
+    step(something_else),  # gets applied to all sink nodes
+    step(pre_step).named(name="transform_1").on_output("field_1"),  # only applied to field_1
+    step(post_step)
+    .named(name="transform_2")
+    .on_output(["field_1", "field_3"]),  # applied to field_1 and field_3
+)
+@extract_fields({"field_1": int, "field_2": int, "field_3": int})
+def foo(a: int) -> Dict[str, int]:
+    return {"field_1": 1, "field_2": 2, "field_3": 3}
diff --git a/...act functionality blueprint/procedural.py → ...act_functionality_blueprint/procedural.py b/...act functionality blueprint/procedural.py → ...act_functionality_blueprint/procedural.py
diff --git a/hamilton/function_modifiers/__init__.py b/hamilton/function_modifiers/__init__.py
@@ -29,6 +29,7 @@
 
 # The config decorator
 config = configuration.config
+hamilton_exclude = configuration.hamilton_exclude()
 
 # Dependency Specification
 # Helper functions to specify dependency sources for parameterization

diff --git a/hamilton/function_modifiers/configuration.py b/hamilton/function_modifiers/configuration.py
@@ -255,3 +255,38 @@ def LEAD_LOG_BASS_MODEL_TIMES_TREND(
 
         resolver = ConfigResolver.when_not_in(**key_value_group_pairs)
         return config(resolver, config_used=list(resolver.optional_config))
+
+
+class hamilton_exclude(base.NodeResolver):
+    """Decorator class that excludes a function from the DAG.
+
+    The preferred way to hide functions from the Hamilton DAG is to prefix them with "_". However,
+    for the exceptional case, it can be useful for decorating helper functions without the need to prefix
+    them with "_" and use them either inside other nodes or in conjunction with ``step`` or ``apply_to``.
+
+    .. code-block:: python
+
+        @hamilton_exclude
+        def helper(...) -> ...:
+            '''This will not be part of the DAG'''
+            ...
+
+    You may also want to use this decorator for excluding functions in legacy code that would raise
+    and error in Hamilton (for example missing type hints).
+    """
+
+    def __init__(self):
+        pass
+
+    def resolve(self, *args, **kwargs) -> Optional[Callable]:
+        """Returning None defaults to not be included in the DAG.
+
+        :param fn: Function to resolve
+        :param config: DAG config
+        :return: None to not be included in the DAG
+        """
+        return None
+
+    def validate(self, fn):
+        """Any function should work."""
+        pass
diff --git a/hamilton/function_modifiers/macros.py b/hamilton/function_modifiers/macros.py
@@ -11,7 +11,7 @@
 from hamilton import models, node
 from hamilton.dev_utils.deprecation import deprecated
 from hamilton.function_modifiers import base
-from hamilton.function_modifiers.configuration import ConfigResolver
+from hamilton.function_modifiers.configuration import ConfigResolver, hamilton_exclude
 from hamilton.function_modifiers.delayed import resolve as delayed_resolve
 from hamilton.function_modifiers.dependencies import (
     LiteralDependency,
@@ -1367,11 +1367,8 @@ def __call__(self, mutating_fn: Callable):
         :return: mutating_fn, to guarantee function works even when Hamilton driver is not used
         """
 
-        # TODO: We want to hide such helper function from the DAG by default, since we are manually
-        # adding them to the DAG in a different place
-        # Suggestion: ignore decorator - https://github.com/DAGWorks-Inc/hamilton/issues/1168
-        # if not mutating_fn.__name__.startswith("_"):
-        #     mutating_fn.__name__ = "".join(("_", mutating_fn.__name__))
+        # This function will be excluded from the DAG as a node since we are inserting it manually
+        mutating_fn = hamilton_exclude()(mutating_fn)
 
         if self.restrict_to_single_module:
             self.validate_same_module(mutating_fn=mutating_fn)

diff --git a/tests/function_modifiers/test_configuration.py b/tests/function_modifiers/test_configuration.py
@@ -111,3 +111,14 @@ def config_fn() -> int:
     config_fn = annotation(config_fn)
     nodes = base.resolve_nodes(config_fn, {})
     assert len(nodes) == 0
+
+
+def test_hamilton_exclude():
+    def fn_to_hamilton_exclude() -> int:
+        pass
+
+    decorator = function_modifiers.hamilton_exclude
+    hidden_fn = decorator(fn_to_hamilton_exclude)
+    nodes = base.resolve_nodes(hidden_fn, {})
+    assert decorator.resolve(fn_to_hamilton_exclude, {"key": "value"}) is None
+    assert len(nodes) == 0