Skip to content

Commit

Permalink
Adds typed dict extract fields support (#1253)
Browse files Browse the repository at this point in the history
Adds typeddict support for extract_fields

This enables more ergonomic usage of TypedDict with extract fields.

I skipped adding support for returning any `Mapping`. Though that should be an easy addition.

```python
from typing import TypedDict
from hamilton.function_modifiers import extract_fields

class MyDict(TypedDict):
    foo: str
    bar: int

@extract_fields()
def some_function() -> MyDict:
    return MyDict(foo="s", bar=1)
```
The above will automatically extract the fields foo and bar.

You can also do:

```python
from typing import TypedDict
from hamilton.function_modifiers import extract_fields

class MyDict(TypedDict):
    foo: str
    bar: int

@extract_fields({"foo": str})
def some_function()->MyDict:
    return MyDict(foo="s", bar=1)
```
To only expose a subset of the fields.


Squashed commits:

* Adds sketch of improving extract_fields with typeddict

This in response to #1252.

We should be able to handle typeddict better.

This sketches some ideas:

1. field validation should happen in .validate() not the constructor.
2. extract_fields shouldn't need fields if the typeddict is the annotation type.
3. we properly check that typeddict can be a return type.

* Adds typeddict tests

* Adding validation to cover all extract_fields paths

* Adds Typeddict Extract fields subclass type check and test for it
  • Loading branch information
skrawcz authored Dec 12, 2024
1 parent fc239a9 commit 622866a
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 7 deletions.
29 changes: 24 additions & 5 deletions hamilton/function_modifiers/expanders.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
import typing
from typing import Any, Callable, Collection, Dict, Tuple, Union

import typing_extensions
import typing_inspect

from hamilton import node, registry
from hamilton import htypes, node, registry
from hamilton.dev_utils import deprecation
from hamilton.function_modifiers import base
from hamilton.function_modifiers.dependencies import (
Expand Down Expand Up @@ -733,7 +734,7 @@ def _validate_extract_fields(fields: dict):
class extract_fields(base.SingleNodeNodeTransformer):
"""Extracts fields from a dictionary of output."""

def __init__(self, fields: dict, fill_with: Any = None):
def __init__(self, fields: dict = None, fill_with: Any = None):
"""Constructor for a modifier that expands a single function into the following nodes:
- n functions, each of which take in the original dict and output a specific field
Expand All @@ -745,7 +746,6 @@ def __init__(self, fields: dict, fill_with: Any = None):
field value.
"""
super(extract_fields, self).__init__()
_validate_extract_fields(fields)
self.fields = fields
self.fill_with = fill_with

Expand All @@ -759,13 +759,32 @@ def validate(self, fn: Callable):
if typing_inspect.is_generic_type(output_type):
base_type = typing_inspect.get_origin(output_type)
if base_type == dict or base_type == Dict:
pass
_validate_extract_fields(self.fields)
else:
raise base.InvalidDecoratorException(
f"For extracting fields, output type must be a dict or typing.Dict, not: {output_type}"
)
elif output_type == dict:
pass
_validate_extract_fields(self.fields)
elif typing_extensions.is_typeddict(output_type):
if self.fields is None:
self.fields = typing.get_type_hints(output_type)
else:
# check that fields is a subset of TypedDict that is defined
typed_dict_fields = typing.get_type_hints(output_type)
for field_name, field_type in self.fields.items():
expected_type = typed_dict_fields.get(field_name, None)
if expected_type == field_type:
pass # we're definitely good
elif expected_type is not None and htypes.custom_subclass_check(
field_type, expected_type
):
pass
else:
raise base.InvalidDecoratorException(
f"Error {self.fields} did not match a subset of the TypedDict annotation's fields {typed_dict_fields}."
)
_validate_extract_fields(self.fields)
else:
raise base.InvalidDecoratorException(
f"For extracting fields, output type must be a dict or typing.Dict, not: {output_type}"
Expand Down
70 changes: 68 additions & 2 deletions tests/function_modifiers/test_expanders.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import sys
from typing import Any, Dict, List, Optional, Type
from typing import Any, Dict, List, Optional, Type, TypedDict

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -313,13 +313,23 @@ def test_extract_fields_constructor_happy(fields):
expanders._validate_extract_fields(fields)


class MyDict(TypedDict):
test: int
test2: str


class MyDictBad(TypedDict):
test2: str


@pytest.mark.parametrize(
"return_type",
[
dict,
Dict,
Dict[str, str],
Dict[str, Any],
MyDict,
],
)
def test_extract_fields_validate_happy(return_type):
Expand All @@ -330,7 +340,45 @@ def return_dict() -> return_type:
annotation.validate(return_dict)


@pytest.mark.parametrize("return_type", [(int), (list), (np.ndarray), (pd.DataFrame)])
class SomeObject:
pass


class InheritedObject(SomeObject):
pass


class MyDictInheritance(TypedDict):
test: SomeObject
test2: str


class MyDictInheritanceBadCase(TypedDict):
test: InheritedObject
test2: str


def test_extract_fields_validate_happy_inheritance():
def return_dict() -> MyDictInheritance:
return {}

annotation = function_modifiers.extract_fields({"test": InheritedObject})
annotation.validate(return_dict)


def test_extract_fields_validate_not_subclass():
def return_dict() -> MyDictInheritanceBadCase:
return {}

annotation = function_modifiers.extract_fields({"test": SomeObject})
with pytest.raises(base.InvalidDecoratorException):
annotation.validate(return_dict)


@pytest.mark.parametrize(
"return_type",
[(int), (list), (np.ndarray), (pd.DataFrame), (MyDictBad)],
)
def test_extract_fields_validate_errors(return_type):
def return_dict() -> return_type:
return {}
Expand All @@ -340,6 +388,24 @@ def return_dict() -> return_type:
annotation.validate(return_dict)


def test_extract_fields_typeddict_empty_fields():
def return_dict() -> MyDict:
return {}

# don't need fields for TypedDict
annotation = function_modifiers.extract_fields()
annotation.validate(return_dict)


def test_extract_fields_typeddict_subset():
def return_dict() -> MyDict:
return {}

# test that a subset of fields is fine
annotation = function_modifiers.extract_fields({"test2": str})
annotation.validate(return_dict)


def test_valid_extract_fields():
"""Tests whole extract_fields decorator."""
annotation = function_modifiers.extract_fields(
Expand Down

0 comments on commit 622866a

Please sign in to comment.