diff --git a/Changelog.md b/Changelog.md index c835ae7..c3f30c4 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,8 +1,14 @@ # Changelog +### 0.4.0 (2021-MM-DD) + +Added `ItemAdapter.is_item_class` and `ItemAdapter.get_field_meta_from_class` +([#54](https://github.com/scrapy/itemadapter/pull/54)) + + ### 0.3.0 (2021-07-15) -Added suport for `pydantic` models ([#53](https://github.com/scrapy/itemadapter/pull/53)) +Added built-in support for `pydantic` models ([#53](https://github.com/scrapy/itemadapter/pull/53)) ### 0.2.0 (2020-11-06) diff --git a/README.md b/README.md index 552f939..8e4bbf4 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ Consider the following type definition: >>> ``` -The `ItemAdapter` object can be treated much like a dictionary: +An `ItemAdapter` object can be treated much like a dictionary: ```python >>> obj = InventoryItem(name='foo', price=20.5, stock=10) @@ -176,7 +176,13 @@ Return `True` if any of the registed adapters can handle the item (i.e. if any of them returns `True` for its `is_item` method with `item` as argument), `False` otherwise. -#### `get_field_meta(field_name: str) -> MappingProxyType` +#### class method `is_item_class(item_class: type) -> bool` + +Return `True` if any of the registered adapters can handle the item class +(i.e. if any of them returns `True` for its `is_item_class` method with +`item_class` as argument), `False` otherwise. + +#### class method `get_field_meta_from_class(item_class: type, field_name: str) -> MappingProxyType` Return a [`types.MappingProxyType`](https://docs.python.org/3/library/types.html#types.MappingProxyType) object, which is a read-only mapping with metadata about the given field. If the item class does not @@ -185,12 +191,18 @@ support field metadata, or there is no metadata for the given field, an empty ob The returned value is taken from the following sources, depending on the item type: * [`scrapy.item.Field`](https://docs.scrapy.org/en/latest/topics/items.html#item-fields) - for `scrapy.item.Item`s + for `scrapy.item.Item`s * [`dataclasses.field.metadata`](https://docs.python.org/3/library/dataclasses.html#dataclasses.field) for `dataclass`-based items * [`attr.Attribute.metadata`](https://www.attrs.org/en/stable/examples.html#metadata) for `attrs`-based items - * [`pydantic.fields.FieldInfo`](https://pydantic-docs.helpmanual.io/usage/schema/#field-customisation) for `pydantic`-based items + * [`pydantic.fields.FieldInfo`](https://pydantic-docs.helpmanual.io/usage/schema/#field-customisation) + for `pydantic`-based items + +#### `get_field_meta(field_name: str) -> MappingProxyType` + +Return metadata for the given field, if available. Unless overriden in a custom adapter class, by default +this method calls the adapter's `get_field_meta_from_class` method, passing the wrapped item's class. #### `field_names() -> collections.abc.KeysView` @@ -211,10 +223,7 @@ Return `True` if the given object belongs to (at least) one of the supported typ ### function `itemadapter.utils.get_field_meta_from_class(item_class: type, field_name: str) -> types.MappingProxyType` -Given an item class and a field name, return a -[`MappingProxyType`](https://docs.python.org/3/library/types.html#types.MappingProxyType) -object, which is a read-only mapping with metadata about the given field. If the item class does not -support field metadata, or there is no metadata for the given field, an empty object is returned. +Alias for `itemadapter.adapter.ItemAdapter.get_field_meta_from_class` --- @@ -223,10 +232,12 @@ support field metadata, or there is no metadata for the given field, an empty ob `scrapy.item.Item`, `dataclass`, `attrs`, and `pydantic` objects allow the definition of arbitrary field metadata. This can be accessed through a [`MappingProxyType`](https://docs.python.org/3/library/types.html#types.MappingProxyType) -object, which can be retrieved from an item instance with the -`itemadapter.adapter.ItemAdapter.get_field_meta` method, or from an item class -with the `itemadapter.utils.get_field_meta_from_class` function. -The definition procedure depends on the underlying type. +object, which can be retrieved from an item instance with +`itemadapter.adapter.ItemAdapter.get_field_meta`, or from an item class +with the `itemadapter.adapter.ItemAdapter.get_field_meta_from_class` +method (or its alias `itemadapter.utils.get_field_meta_from_class`). +The source of the data depends on the underlying type (see the docs for +`ItemAdapter.get_field_meta_from_class`). #### `scrapy.item.Item` objects @@ -306,19 +317,32 @@ _class `itemadapter.adapter.AdapterInterface(item: Any)`_ Abstract Base Class for adapters. An adapter that handles a specific type of item must inherit from this class and implement the abstract methods defined on it. `AdapterInterface` inherits from [`collections.abc.MutableMapping`](https://docs.python.org/3/library/collections.abc.html#collections.abc.MutableMapping), -so all methods from the `MutableMapping` class must be implemented as well. +so all methods from the `MutableMapping` interface must be implemented as well. + +* _class method `is_item_class(cls, item_class: type) -> bool`_ + + Return `True` if the adapter can handle the given item class, `False` otherwise. Abstract (mandatory). * _class method `is_item(cls, item: Any) -> bool`_ - Return `True` if the adapter can handle the given item, `False` otherwise. Abstract (mandatory). + Return `True` if the adapter can handle the given item, `False` otherwise. + The default implementation calls `cls.is_item_class(item.__class__)`. -* _method `get_field_meta(self, field_name: str) -> types.MappingProxyType`_ +* _class method `get_field_meta_from_class(cls, item_class: type) -> bool`_ - Return metadata for the given field name, if available. + Return metadata for the given item class and field name, if available. By default, this method returns an empty `MappingProxyType` object. Please supply your own method definition if you want to handle field metadata based on custom logic. See the [section on metadata support](#metadata-support) for additional information. +* _method `get_field_meta(self, field_name: str) -> types.MappingProxyType`_ + + Return metadata for the given field name, if available. It's usually not necessary to + override this method, since the `itemadapter.adapter.AdapterInterface` base class + provides a default implementation that calls `ItemAdapter.get_field_meta_from_class` + with the wrapped item's class as argument. + See the [section on metadata support](#metadata-support) for additional information. + * _method `field_names(self) -> collections.abc.KeysView`_: Return a [dynamic view](https://docs.python.org/3/library/collections.abc.html#collections.abc.KeysView) diff --git a/itemadapter/adapter.py b/itemadapter/adapter.py index cdfb279..0d8895b 100644 --- a/itemadapter/adapter.py +++ b/itemadapter/adapter.py @@ -6,9 +6,12 @@ from itemadapter.utils import ( _get_pydantic_model_metadata, + _get_scrapy_item_classes, + _is_attrs_class, + _is_dataclass, + _is_pydantic_model, is_attrs_instance, is_dataclass_instance, - is_item, is_pydantic_instance, is_scrapy_item, ) @@ -38,13 +41,22 @@ def __init__(self, item: Any) -> None: @classmethod @abstractmethod - def is_item(cls, item: Any) -> bool: - """Return True if the adapter can handle the given item, False otherwise""" + def is_item_class(cls, item_class: type) -> bool: + """Return True if the adapter can handle the given item class, False otherwise.""" raise NotImplementedError() + @classmethod + def is_item(cls, item: Any) -> bool: + """Return True if the adapter can handle the given item, False otherwise.""" + return cls.is_item_class(item.__class__) + + @classmethod + def get_field_meta_from_class(cls, item_class: type, field_name: str) -> MappingProxyType: + return MappingProxyType({}) + def get_field_meta(self, field_name: str) -> MappingProxyType: """Return metadata for the given field name, if available.""" - return MappingProxyType({}) + return self.get_field_meta_from_class(self.item.__class__, field_name) def field_names(self) -> KeysView: """Return a dynamic view of the item's field names.""" @@ -101,6 +113,19 @@ def __init__(self, item: Any) -> None: def is_item(cls, item: Any) -> bool: return is_attrs_instance(item) + @classmethod + def is_item_class(cls, item_class: type) -> bool: + return _is_attrs_class(item_class) + + @classmethod + def get_field_meta_from_class(cls, item_class: type, field_name: str) -> MappingProxyType: + from attr import fields_dict + + try: + return fields_dict(item_class)[field_name].metadata # type: ignore + except KeyError: + raise KeyError(f"{item_class.__name__} does not support field: {field_name}") + class DataclassAdapter(_MixinAttrsDataclassAdapter, AdapterInterface): def __init__(self, item: Any) -> None: @@ -114,18 +139,39 @@ def __init__(self, item: Any) -> None: def is_item(cls, item: Any) -> bool: return is_dataclass_instance(item) + @classmethod + def is_item_class(cls, item_class: type) -> bool: + return _is_dataclass(item_class) + + @classmethod + def get_field_meta_from_class(cls, item_class: type, field_name: str) -> MappingProxyType: + from dataclasses import fields + + for field in fields(item_class): + if field.name == field_name: + return field.metadata # type: ignore + raise KeyError(f"{item_class.__name__} does not support field: {field_name}") + class PydanticAdapter(AdapterInterface): item: Any - def get_field_meta(self, field_name: str) -> MappingProxyType: - return _get_pydantic_model_metadata(type(self.item), field_name) - @classmethod def is_item(cls, item: Any) -> bool: return is_pydantic_instance(item) + @classmethod + def is_item_class(cls, item_class: type) -> bool: + return _is_pydantic_model(item_class) + + @classmethod + def get_field_meta_from_class(cls, item_class: type, field_name: str) -> MappingProxyType: + try: + return _get_pydantic_model_metadata(item_class, field_name) + except KeyError: + raise KeyError(f"{item_class.__name__} does not support field: {field_name}") + def field_names(self) -> KeysView: return KeysView(self.item.__fields__) @@ -179,11 +225,8 @@ def __len__(self) -> int: class DictAdapter(_MixinDictScrapyItemAdapter, AdapterInterface): @classmethod - def is_item(cls, item: Any) -> bool: - return isinstance(item, dict) - - def get_field_meta(self, field_name: str) -> MappingProxyType: - return MappingProxyType({}) + def is_item_class(cls, item_class: type) -> bool: + return issubclass(item_class, dict) def field_names(self) -> KeysView: return KeysView(self.item) @@ -194,8 +237,13 @@ class ScrapyItemAdapter(_MixinDictScrapyItemAdapter, AdapterInterface): def is_item(cls, item: Any) -> bool: return is_scrapy_item(item) - def get_field_meta(self, field_name: str) -> MappingProxyType: - return MappingProxyType(self.item.fields[field_name]) + @classmethod + def is_item_class(cls, item_class: type) -> bool: + return issubclass(item_class, _get_scrapy_item_classes()) + + @classmethod + def get_field_meta_from_class(cls, item_class: type, field_name: str) -> MappingProxyType: + return MappingProxyType(item_class.fields[field_name]) # type: ignore def field_names(self) -> KeysView: return KeysView(self.item.fields) @@ -228,12 +276,25 @@ def __init__(self, item: Any) -> None: def is_item(cls, item: Any) -> bool: return any(adapter_class.is_item(item) for adapter_class in cls.ADAPTER_CLASSES) + @classmethod + def is_item_class(cls, item_class: type) -> bool: + return any( + adapter_class.is_item_class(item_class) for adapter_class in cls.ADAPTER_CLASSES + ) + + @classmethod + def get_field_meta_from_class(cls, item_class: type, field_name: str) -> MappingProxyType: + for adapter_class in cls.ADAPTER_CLASSES: + if adapter_class.is_item_class(item_class): + return adapter_class.get_field_meta_from_class(item_class, field_name) + raise TypeError(f"{item_class} is not a valid item class") + @property def item(self) -> Any: return self.adapter.item def __repr__(self) -> str: - values = ", ".join(["%s=%r" % (key, value) for key, value in self.items()]) + values = ", ".join([f"{key}={value!r}" for key, value in self.items()]) return f"" def __getitem__(self, field_name: str) -> Any: @@ -252,18 +313,7 @@ def __len__(self) -> int: return self.adapter.__len__() def get_field_meta(self, field_name: str) -> MappingProxyType: - """Return a read-only mapping with metadata for the given field name. If there is no metadata - for the field, or the wrapped item does not support field metadata, an empty object is - returned. - - Field metadata is taken from different sources, depending on the item type: - * scrapy.item.Item: corresponding scrapy.item.Field object - * dataclass items: "metadata" attribute for the corresponding field - * attrs items: "metadata" attribute for the corresponding field - - The returned value is an instance of types.MappingProxyType, i.e. a dynamic read-only view - of the original mapping, which gets automatically updated if the original mapping changes. - """ + """Return metadata for the given field name.""" return self.adapter.get_field_meta(field_name) def field_names(self) -> KeysView: @@ -285,7 +335,7 @@ def _asdict(obj: Any) -> Any: return obj.__class__(_asdict(x) for x in obj) elif isinstance(obj, ItemAdapter): return obj.asdict() - elif is_item(obj): + elif ItemAdapter.is_item(obj): return ItemAdapter(obj).asdict() else: return obj diff --git a/itemadapter/utils.py b/itemadapter/utils.py index 37dd19c..342ed39 100644 --- a/itemadapter/utils.py +++ b/itemadapter/utils.py @@ -129,28 +129,7 @@ def get_field_meta_from_class(item_class: type, field_name: str) -> MappingProxy The returned value is an instance of types.MappingProxyType, i.e. a dynamic read-only view of the original mapping, which gets automatically updated if the original mapping changes. """ - if issubclass(item_class, _get_scrapy_item_classes()): - return MappingProxyType(item_class.fields[field_name]) # type: ignore - elif _is_dataclass(item_class): - from dataclasses import fields - - for field in fields(item_class): - if field.name == field_name: - return field.metadata # type: ignore - raise KeyError("%s does not support field: %s" % (item_class.__name__, field_name)) - elif _is_attrs_class(item_class): - from attr import fields_dict - try: - return fields_dict(item_class)[field_name].metadata # type: ignore - except KeyError: - raise KeyError("%s does not support field: %s" % (item_class.__name__, field_name)) - elif _is_pydantic_model(item_class): - try: - return _get_pydantic_model_metadata(item_class, field_name) - except KeyError: - raise KeyError("%s does not support field: %s" % (item_class.__name__, field_name)) - elif issubclass(item_class, dict): - return MappingProxyType({}) - else: - raise TypeError("%s is not a valid item class" % (item_class,)) + from itemadapter.adapter import ItemAdapter + + return ItemAdapter.get_field_meta_from_class(item_class, field_name) diff --git a/tests/test_interface.py b/tests/test_interface.py index 4e4e849..21c0783 100644 --- a/tests/test_interface.py +++ b/tests/test_interface.py @@ -11,6 +11,8 @@ class AdapterInterfaceTest(unittest.TestCase): def test_interface_class_methods(self): with self.assertRaises(NotImplementedError): AdapterInterface.is_item(object()) + with self.assertRaises(NotImplementedError): + AdapterInterface.is_item_class(object) class FakeItemClass: @@ -27,8 +29,8 @@ class BaseFakeItemAdapter(AdapterInterface): """An adapter that only implements the required methods.""" @classmethod - def is_item(cls, item: Any) -> bool: - return isinstance(item, FakeItemClass) + def is_item_class(cls, item_class: type) -> bool: + return issubclass(item_class, FakeItemClass) def __getitem__(self, field_name: str) -> Any: if field_name in self.item._fields: @@ -63,13 +65,11 @@ def field_names(self) -> KeysView: class MetadataFakeItemAdapter(BaseFakeItemAdapter): - """An adapter that also implements the get_field_meta method.""" + """An adapter that also implements metadata-related methods.""" - def get_field_meta(self, field_name: str) -> MappingProxyType: - if field_name in self.item._fields: - return MappingProxyType(self.item._fields[field_name]) - else: - return super().get_field_meta(field_name) + @classmethod + def get_field_meta_from_class(cls, item_class: type, field_name: str) -> MappingProxyType: + return MappingProxyType(item_class._fields.get(field_name) or {}) class BaseFakeItemAdapterTest(unittest.TestCase): @@ -155,13 +155,26 @@ def test_get_value_keyerror_item_dict(self): with self.assertRaises(KeyError): adapter["name"] - def test_get_field_meta_defined_fields(self): + def test_get_field_meta(self): """Metadata is always empty for the default implementation.""" adapter = ItemAdapter(self.item_class()) self.assertEqual(adapter.get_field_meta("_undefined_"), MappingProxyType({})) self.assertEqual(adapter.get_field_meta("name"), MappingProxyType({})) self.assertEqual(adapter.get_field_meta("value"), MappingProxyType({})) + def test_get_field_meta_from_class(self): + """Metadata is always empty for the default implementation.""" + self.assertEqual( + ItemAdapter.get_field_meta_from_class(self.item_class, "_undefined_"), + MappingProxyType({}), + ) + self.assertEqual( + ItemAdapter.get_field_meta_from_class(self.item_class, "name"), MappingProxyType({}) + ) + self.assertEqual( + ItemAdapter.get_field_meta_from_class(self.item_class, "value"), MappingProxyType({}) + ) + def test_field_names(self): item = self.item_class(name="asdf", value=1234) adapter = ItemAdapter(item) @@ -174,12 +187,26 @@ class MetadataFakeItemAdapterTest(BaseFakeItemAdapterTest): item_class = FakeItemClass adapter_class = MetadataFakeItemAdapter - def test_get_field_meta_defined_fields(self): + def test_get_field_meta(self): adapter = ItemAdapter(self.item_class()) self.assertEqual(adapter.get_field_meta("_undefined_"), MappingProxyType({})) self.assertEqual(adapter.get_field_meta("name"), MappingProxyType({"serializer": str})) self.assertEqual(adapter.get_field_meta("value"), MappingProxyType({"serializer": int})) + def test_get_field_meta_from_class(self): + self.assertEqual( + ItemAdapter.get_field_meta_from_class(self.item_class, "_undefined_"), + MappingProxyType({}), + ) + self.assertEqual( + ItemAdapter.get_field_meta_from_class(self.item_class, "name"), + MappingProxyType({"serializer": str}), + ) + self.assertEqual( + ItemAdapter.get_field_meta_from_class(self.item_class, "value"), + MappingProxyType({"serializer": int}), + ) + class FieldNamesFakeItemAdapterTest(BaseFakeItemAdapterTest): diff --git a/tests/test_utils.py b/tests/test_utils.py index 7a5609f..136e3a7 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,3 +1,4 @@ +import importlib import unittest from unittest import mock from types import MappingProxyType @@ -10,6 +11,7 @@ is_pydantic_instance, is_scrapy_item, ) +from itemadapter import ItemAdapter from tests import ( AttrsItem, @@ -22,6 +24,9 @@ def mocked_import(name, *args, **kwargs): + """Allow only internal itemadapter imports.""" + if name.split(".")[0] == "itemadapter": + return importlib.__import__(name, *args, **kwargs) raise ImportError(name) @@ -57,26 +62,35 @@ def test_false(self): self.assertFalse(is_item(ScrapySubclassedItem)) self.assertFalse(is_item(AttrsItem)) self.assertFalse(is_item(PydanticModel)) + self.assertFalse(ItemAdapter.is_item_class(list)) + self.assertFalse(ItemAdapter.is_item_class(int)) + self.assertFalse(ItemAdapter.is_item_class(tuple)) def test_true_dict(self): self.assertTrue(is_item({"a": "dict"})) + self.assertTrue(ItemAdapter.is_item_class(dict)) @unittest.skipIf(not ScrapySubclassedItem, "scrapy module is not available") def test_true_scrapy(self): self.assertTrue(is_item(ScrapyItem())) self.assertTrue(is_item(ScrapySubclassedItem(name="asdf", value=1234))) + self.assertTrue(ItemAdapter.is_item_class(ScrapyItem)) + self.assertTrue(ItemAdapter.is_item_class(ScrapySubclassedItem)) @unittest.skipIf(not DataClassItem, "dataclasses module is not available") def test_true_dataclass(self): self.assertTrue(is_item(DataClassItem(name="asdf", value=1234))) + self.assertTrue(ItemAdapter.is_item_class(DataClassItem)) @unittest.skipIf(not AttrsItem, "attrs module is not available") def test_true_attrs(self): self.assertTrue(is_item(AttrsItem(name="asdf", value=1234))) + self.assertTrue(ItemAdapter.is_item_class(AttrsItem)) @unittest.skipIf(not PydanticModel, "pydantic module is not available") def test_true_pydantic(self): self.assertTrue(is_item(PydanticModel(name="asdf", value=1234))) + self.assertTrue(ItemAdapter.is_item_class(PydanticModel)) class AttrsTestCase(unittest.TestCase):