Adds typed dict extract fields support (#1253)

Adds typeddict support for extract_fields This enables more ergonomic usage of TypedDict with extract fields. I skipped adding support for returning any `Mapping`. Though that should be an easy addition. ```python from typing import TypedDict from hamilton.function_modifiers import extract_fields class MyDict(TypedDict): foo: str bar: int @extract_fields() def some_function() -> MyDict: return MyDict(foo="s", bar=1) ``` The above will automatically extract the fields foo and bar. You can also do: ```python from typing import TypedDict from hamilton.function_modifiers import extract_fields class MyDict(TypedDict): foo: str bar: int @extract_fields({"foo": str}) def some_function()->MyDict: return MyDict(foo="s", bar=1) ``` To only expose a subset of the fields. Squashed commits: * Adds sketch of improving extract_fields with typeddict This in response to #1252. We should be able to handle typeddict better. This sketches some ideas: 1. field validation should happen in .validate() not the constructor. 2. extract_fields shouldn't need fields if the typeddict is the annotation type. 3. we properly check that typeddict can be a return type. * Adds typeddict tests * Adding validation to cover all extract_fields paths * Adds Typeddict Extract fields subclass type check and test for it
DAGWorks-Inc · Dec 12, 2024 · 622866a · 622866a
1 parent fc239a9
commit 622866a
Show file tree

Hide file tree

Showing 2 changed files with 92 additions and 7 deletions.
diff --git a/hamilton/function_modifiers/expanders.py b/hamilton/function_modifiers/expanders.py
@@ -5,9 +5,10 @@
 import typing
 from typing import Any, Callable, Collection, Dict, Tuple, Union
 
+import typing_extensions
 import typing_inspect
 
-from hamilton import node, registry
+from hamilton import htypes, node, registry
 from hamilton.dev_utils import deprecation
 from hamilton.function_modifiers import base
 from hamilton.function_modifiers.dependencies import (
@@ -733,7 +734,7 @@ def _validate_extract_fields(fields: dict):
 class extract_fields(base.SingleNodeNodeTransformer):
     """Extracts fields from a dictionary of output."""
 
-    def __init__(self, fields: dict, fill_with: Any = None):
+    def __init__(self, fields: dict = None, fill_with: Any = None):
         """Constructor for a modifier that expands a single function into the following nodes:
 
         - n functions, each of which take in the original dict and output a specific field
@@ -745,7 +746,6 @@ def __init__(self, fields: dict, fill_with: Any = None):
         field value.
         """
         super(extract_fields, self).__init__()
-        _validate_extract_fields(fields)
         self.fields = fields
         self.fill_with = fill_with
 
@@ -759,13 +759,32 @@ def validate(self, fn: Callable):
         if typing_inspect.is_generic_type(output_type):
             base_type = typing_inspect.get_origin(output_type)
             if base_type == dict or base_type == Dict:
-                pass
+                _validate_extract_fields(self.fields)
             else:
                 raise base.InvalidDecoratorException(
                     f"For extracting fields, output type must be a dict or typing.Dict, not: {output_type}"
                 )
         elif output_type == dict:
-            pass
+            _validate_extract_fields(self.fields)
+        elif typing_extensions.is_typeddict(output_type):
+            if self.fields is None:
+                self.fields = typing.get_type_hints(output_type)
+            else:
+                # check that fields is a subset of TypedDict that is defined
+                typed_dict_fields = typing.get_type_hints(output_type)
+                for field_name, field_type in self.fields.items():
+                    expected_type = typed_dict_fields.get(field_name, None)
+                    if expected_type == field_type:
+                        pass  # we're definitely good
+                    elif expected_type is not None and htypes.custom_subclass_check(
+                        field_type, expected_type
+                    ):
+                        pass
+                    else:
+                        raise base.InvalidDecoratorException(
+                            f"Error {self.fields} did not match a subset of the TypedDict annotation's fields {typed_dict_fields}."
+                        )
+            _validate_extract_fields(self.fields)
         else:
             raise base.InvalidDecoratorException(
                 f"For extracting fields, output type must be a dict or typing.Dict, not: {output_type}"

diff --git a/tests/function_modifiers/test_expanders.py b/tests/function_modifiers/test_expanders.py
@@ -1,5 +1,5 @@
 import sys
-from typing import Any, Dict, List, Optional, Type
+from typing import Any, Dict, List, Optional, Type, TypedDict
 
 import numpy as np
 import pandas as pd
@@ -313,13 +313,23 @@ def test_extract_fields_constructor_happy(fields):
     expanders._validate_extract_fields(fields)
 
 
+class MyDict(TypedDict):
+    test: int
+    test2: str
+
+
+class MyDictBad(TypedDict):
+    test2: str
+
+
 @pytest.mark.parametrize(
     "return_type",
     [
         dict,
         Dict,
         Dict[str, str],
         Dict[str, Any],
+        MyDict,
     ],
 )
 def test_extract_fields_validate_happy(return_type):
@@ -330,7 +340,45 @@ def return_dict() -> return_type:
     annotation.validate(return_dict)
 
 
-@pytest.mark.parametrize("return_type", [(int), (list), (np.ndarray), (pd.DataFrame)])
+class SomeObject:
+    pass
+
+
+class InheritedObject(SomeObject):
+    pass
+
+
+class MyDictInheritance(TypedDict):
+    test: SomeObject
+    test2: str
+
+
+class MyDictInheritanceBadCase(TypedDict):
+    test: InheritedObject
+    test2: str
+
+
+def test_extract_fields_validate_happy_inheritance():
+    def return_dict() -> MyDictInheritance:
+        return {}
+
+    annotation = function_modifiers.extract_fields({"test": InheritedObject})
+    annotation.validate(return_dict)
+
+
+def test_extract_fields_validate_not_subclass():
+    def return_dict() -> MyDictInheritanceBadCase:
+        return {}
+
+    annotation = function_modifiers.extract_fields({"test": SomeObject})
+    with pytest.raises(base.InvalidDecoratorException):
+        annotation.validate(return_dict)
+
+
+@pytest.mark.parametrize(
+    "return_type",
+    [(int), (list), (np.ndarray), (pd.DataFrame), (MyDictBad)],
+)
 def test_extract_fields_validate_errors(return_type):
     def return_dict() -> return_type:
         return {}
@@ -340,6 +388,24 @@ def return_dict() -> return_type:
         annotation.validate(return_dict)
 
 
+def test_extract_fields_typeddict_empty_fields():
+    def return_dict() -> MyDict:
+        return {}
+
+    # don't need fields for TypedDict
+    annotation = function_modifiers.extract_fields()
+    annotation.validate(return_dict)
+
+
+def test_extract_fields_typeddict_subset():
+    def return_dict() -> MyDict:
+        return {}
+
+    # test that a subset of fields is fine
+    annotation = function_modifiers.extract_fields({"test2": str})
+    annotation.validate(return_dict)
+
+
 def test_valid_extract_fields():
     """Tests whole extract_fields decorator."""
     annotation = function_modifiers.extract_fields(