Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Typed dict extract fields #1253

Merged
merged 4 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 24 additions & 5 deletions hamilton/function_modifiers/expanders.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
import typing
from typing import Any, Callable, Collection, Dict, Tuple, Union

import typing_extensions
import typing_inspect

from hamilton import node, registry
from hamilton import htypes, node, registry
from hamilton.dev_utils import deprecation
from hamilton.function_modifiers import base
from hamilton.function_modifiers.dependencies import (
Expand Down Expand Up @@ -733,7 +734,7 @@ def _validate_extract_fields(fields: dict):
class extract_fields(base.SingleNodeNodeTransformer):
"""Extracts fields from a dictionary of output."""

def __init__(self, fields: dict, fill_with: Any = None):
def __init__(self, fields: dict = None, fill_with: Any = None):
"""Constructor for a modifier that expands a single function into the following nodes:

- n functions, each of which take in the original dict and output a specific field
Expand All @@ -745,7 +746,6 @@ def __init__(self, fields: dict, fill_with: Any = None):
field value.
"""
super(extract_fields, self).__init__()
_validate_extract_fields(fields)
self.fields = fields
self.fill_with = fill_with

Expand All @@ -759,13 +759,32 @@ def validate(self, fn: Callable):
if typing_inspect.is_generic_type(output_type):
base_type = typing_inspect.get_origin(output_type)
if base_type == dict or base_type == Dict:
pass
_validate_extract_fields(self.fields)
else:
raise base.InvalidDecoratorException(
f"For extracting fields, output type must be a dict or typing.Dict, not: {output_type}"
)
elif output_type == dict:
pass
_validate_extract_fields(self.fields)
elif typing_extensions.is_typeddict(output_type):
if self.fields is None:
self.fields = typing.get_type_hints(output_type)
else:
# check that fields is a subset of TypedDict that is defined
typed_dict_fields = typing.get_type_hints(output_type)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic for checking if fields is a subset of the TypedDict fields is repeated. Consider refactoring this into a separate function to adhere to the DRY principle.

for field_name, field_type in self.fields.items():
expected_type = typed_dict_fields.get(field_name, None)
if expected_type == field_type:
pass # we're definitely good
elif expected_type is not None and htypes.custom_subclass_check(
field_type, expected_type
):
pass
else:
raise base.InvalidDecoratorException(
f"Error {self.fields} did not match a subset of the TypedDict annotation's fields {typed_dict_fields}."
)
_validate_extract_fields(self.fields)
else:
raise base.InvalidDecoratorException(
f"For extracting fields, output type must be a dict or typing.Dict, not: {output_type}"
Expand Down
70 changes: 68 additions & 2 deletions tests/function_modifiers/test_expanders.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import sys
from typing import Any, Dict, List, Optional, Type
from typing import Any, Dict, List, Optional, Type, TypedDict

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -313,13 +313,23 @@ def test_extract_fields_constructor_happy(fields):
expanders._validate_extract_fields(fields)


class MyDict(TypedDict):
test: int
test2: str


class MyDictBad(TypedDict):
test2: str


@pytest.mark.parametrize(
"return_type",
[
dict,
Dict,
Dict[str, str],
Dict[str, Any],
MyDict,
],
)
def test_extract_fields_validate_happy(return_type):
Expand All @@ -330,7 +340,45 @@ def return_dict() -> return_type:
annotation.validate(return_dict)


@pytest.mark.parametrize("return_type", [(int), (list), (np.ndarray), (pd.DataFrame)])
class SomeObject:
pass


class InheritedObject(SomeObject):
pass


class MyDictInheritance(TypedDict):
test: SomeObject
test2: str


class MyDictInheritanceBadCase(TypedDict):
test: InheritedObject
test2: str


def test_extract_fields_validate_happy_inheritance():
def return_dict() -> MyDictInheritance:
return {}

annotation = function_modifiers.extract_fields({"test": InheritedObject})
annotation.validate(return_dict)


def test_extract_fields_validate_not_subclass():
def return_dict() -> MyDictInheritanceBadCase:
return {}

annotation = function_modifiers.extract_fields({"test": SomeObject})
with pytest.raises(base.InvalidDecoratorException):
annotation.validate(return_dict)


@pytest.mark.parametrize(
"return_type",
[(int), (list), (np.ndarray), (pd.DataFrame), (MyDictBad)],
)
def test_extract_fields_validate_errors(return_type):
def return_dict() -> return_type:
return {}
Expand All @@ -340,6 +388,24 @@ def return_dict() -> return_type:
annotation.validate(return_dict)


def test_extract_fields_typeddict_empty_fields():
def return_dict() -> MyDict:
return {}

# don't need fields for TypedDict
annotation = function_modifiers.extract_fields()
annotation.validate(return_dict)


def test_extract_fields_typeddict_subset():
def return_dict() -> MyDict:
return {}

# test that a subset of fields is fine
annotation = function_modifiers.extract_fields({"test2": str})
annotation.validate(return_dict)


def test_valid_extract_fields():
"""Tests whole extract_fields decorator."""
annotation = function_modifiers.extract_fields(
Expand Down