Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

introduce PageObjectRegistry with @hande_urls annotations #27

Merged
merged 14 commits into from
Apr 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ Changelog
TBR
------------------

* added a ``PageObjectRegistry`` class which has the ``handle_urls`` decorator
to conveniently declare and collect ``OverrideRule``.
* removed support for Python 3.6
* added support for Python 3.10
* Backward Incompatible Change:
Expand All @@ -13,7 +15,6 @@ TBR
specific attribute types like ``HttpResponseBody`` and
``HttpResponseHeaders``.


0.1.1 (2021-06-02)
------------------

Expand Down
17 changes: 17 additions & 0 deletions docs/api_reference.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
.. _`api-reference`:

=============
API Reference
=============
Expand Down Expand Up @@ -45,3 +47,18 @@ Mixins
.. autoclass:: web_poet.mixins.ResponseShortcutsMixin
:members:
:no-special-members:


.. _`api-overrides`:

Overrides
=========

See the tutorial section on :ref:`intro-overrides` for more context about its
use cases and some examples.

.. autofunction:: web_poet.handle_urls

.. automodule:: web_poet.overrides
:members:
:exclude-members: handle_urls
1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,5 +192,6 @@
intersphinx_mapping = {
'python': ('https://docs.python.org/3', None, ),
'scrapy': ('https://docs.scrapy.org/en/latest', None, ),
'url-matcher': ('https://url-matcher.readthedocs.io/en/stable/', None, ),
'parsel': ('https://parsel.readthedocs.io/en/latest/', None, ),
}
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ and the motivation behind ``web-poet``, start with :ref:`from-ground-up`.

intro/tutorial
intro/from-ground-up
intro/overrides

.. toctree::
:caption: Reference
Expand Down
484 changes: 484 additions & 0 deletions docs/intro/overrides.rst

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
install_requires=[
'attrs >= 21.3.0',
'parsel',
'url-matcher',
'multidict',
'w3lib >= 1.22.0',
],
Expand Down
39 changes: 39 additions & 0 deletions tests/po_lib/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""
This package is just for overrides testing purposes.
"""
from typing import Dict, Any, Callable

from url_matcher import Patterns

from .. import po_lib_sub # NOTE: this module contains a PO with @handle_rules
from web_poet import handle_urls, PageObjectRegistry


class POBase:
expected_overrides: Callable
expected_patterns: Patterns
expected_meta: Dict[str, Any]


class POTopLevelOverriden1:
...


class POTopLevelOverriden2:
...


# This first annotation is ignored. A single annotation per registry is allowed
@handle_urls("example.com", overrides=POTopLevelOverriden1)
@handle_urls("example.com", overrides=POTopLevelOverriden1, exclude="/*.jpg|", priority=300)
class POTopLevel1(POBase):
expected_overrides = POTopLevelOverriden1
expected_patterns = Patterns(["example.com"], ["/*.jpg|"], priority=300)
expected_meta = {} # type: ignore


@handle_urls("example.com", overrides=POTopLevelOverriden2)
class POTopLevel2(POBase):
expected_overrides = POTopLevelOverriden2
expected_patterns = Patterns(["example.com"])
expected_meta = {} # type: ignore
16 changes: 16 additions & 0 deletions tests/po_lib/a_module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from url_matcher import Patterns

from tests.po_lib import POBase
from web_poet import handle_urls


class POModuleOverriden:
...


@handle_urls("example.com", overrides=POModuleOverriden, extra_arg="foo")
class POModule(POBase):
expected_overrides = POModuleOverriden
expected_patterns = Patterns(["example.com"])
expected_meta = {"extra_arg": "foo"} # type: ignore

Empty file added tests/po_lib/an_empty_module.py
Empty file.
Empty file.
15 changes: 15 additions & 0 deletions tests/po_lib/nested_package/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from url_matcher import Patterns

from tests.po_lib import POBase
from web_poet import handle_urls


class PONestedPkgOverriden:
...


@handle_urls(include=["example.com", "example.org"], exclude=["/*.jpg|"], overrides=PONestedPkgOverriden)
class PONestedPkg(POBase):
expected_overrides = PONestedPkgOverriden
expected_patterns = Patterns(["example.com", "example.org"], ["/*.jpg|"])
expected_meta = {} # type: ignore
15 changes: 15 additions & 0 deletions tests/po_lib/nested_package/a_nested_module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from url_matcher import Patterns

from tests.po_lib import POBase
from web_poet import handle_urls


class PONestedModuleOverriden:
...


@handle_urls(include=["example.com", "example.org"], exclude=["/*.jpg|"], overrides=PONestedModuleOverriden)
class PONestedModule(POBase):
expected_overrides = PONestedModuleOverriden
expected_patterns = Patterns(include=["example.com", "example.org"], exclude=["/*.jpg|"])
expected_meta = {} # type: ignore
25 changes: 25 additions & 0 deletions tests/po_lib_sub/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""This package is being used by tests/po_lib to validate some behaviors on
external depedencies.
"""
from typing import Dict, Any, Callable

from url_matcher import Patterns

from web_poet import handle_urls


class POBase:
expected_overrides: Callable
expected_patterns: Patterns
expected_meta: Dict[str, Any]


class POLibSubOverriden:
...


@handle_urls("sub_example.com", overrides=POLibSubOverriden)
class POLibSub(POBase):
expected_overrides = POLibSubOverriden
expected_patterns = Patterns(["sub_example.com"])
expected_meta = {} # type: ignore
112 changes: 112 additions & 0 deletions tests/test_overrides.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import pytest
from url_matcher import Patterns

from tests.po_lib_sub import POLibSub
from tests.po_lib import (
POTopLevel1,
POTopLevel2,
POTopLevelOverriden2,
)
from tests.po_lib.a_module import POModule, POModuleOverriden
from tests.po_lib.nested_package import PONestedPkg
from tests.po_lib.nested_package.a_nested_module import PONestedModule
from web_poet import (
default_registry,
consume_modules,
OverrideRule,
PageObjectRegistry,
)


POS = {POTopLevel1, POTopLevel2, POModule, PONestedPkg, PONestedModule}


def test_override_rule_uniqueness():
"""The same instance of an OverrideRule with the same attribute values should
have the same hash identity.
"""

patterns = Patterns(include=["example.com"], exclude=["example.com/blog"])

rule1 = OverrideRule(
for_patterns=patterns,
use=POTopLevel1,
instead_of=POTopLevelOverriden2,
meta={"key_1": 1},
)
rule2 = OverrideRule(
for_patterns=patterns,
use=POTopLevel1,
instead_of=POTopLevelOverriden2,
meta={"key_2": 2},
)

assert hash(rule1) == hash(rule2)


def test_list_page_objects_all():
rules = default_registry.get_overrides()
page_objects = {po.use for po in rules}

# Note that the 'tests_extra.po_lib_sub_not_imported.POLibSubNotImported'
# Page Object is not included here since it was never imported anywhere in
# our test package. It would only be included if we run any of the following
# below. (Note that they should run before `get_overrides` is called.)
# - from tests_extra import po_lib_sub_not_imported
# - import tests_extra.po_lib_sub_not_imported
# - web_poet.consume_modules("tests_extra")
# Merely having `import tests_extra` won't work since the subpackages and
# modules needs to be traversed and imported as well.
assert all(["po_lib_sub_not_imported" not in po.__module__ for po in page_objects])

# Ensure that ALL Override Rules are returned as long as the given
# registry's @handle_urls annotation was used.
assert page_objects == POS.union({POLibSub})
for rule in rules:
assert rule.instead_of == rule.use.expected_overrides, rule.use
assert rule.for_patterns == rule.use.expected_patterns, rule.use
assert rule.meta == rule.use.expected_meta, rule.use


def test_consume_module_not_existing():
with pytest.raises(ImportError):
consume_modules("this_does_not_exist")


def test_list_page_objects_all_consume():
"""A test similar to the one above but calls ``consume_modules()`` to properly
load the @handle_urls annotations from other modules/packages.
"""
consume_modules("tests_extra")
rules = default_registry.get_overrides()
page_objects = {po.use for po in rules}
assert any(["po_lib_sub_not_imported" in po.__module__ for po in page_objects])


def test_registry_search_overrides():
rules = default_registry.search_overrides(use=POTopLevel2)
assert len(rules) == 1
assert rules[0].use == POTopLevel2

rules = default_registry.search_overrides(instead_of=POTopLevelOverriden2)
assert len(rules) == 1
assert rules[0].instead_of == POTopLevelOverriden2

# Such rules doesn't exist
rules = default_registry.search_overrides(use=POModuleOverriden)
assert len(rules) == 0


def test_from_override_rules():
rules = [
OverrideRule(
for_patterns=Patterns(include=["sample.com"]),
use=POTopLevel1,
instead_of=POTopLevelOverriden2,
)
]

registry = PageObjectRegistry.from_override_rules(rules)

assert registry.get_overrides() == rules
assert default_registry.get_overrides() != rules
5 changes: 5 additions & 0 deletions tests_extra/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""
This test package was created separately to see the behavior of retrieving the
Override rules declared on a registry where @handle_urls is defined on another
package.
"""
28 changes: 28 additions & 0 deletions tests_extra/po_lib_sub_not_imported/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""
This package quite is similar to tests/po_lib_sub in terms of code contents.

What we're ultimately trying to test here is to see if the `default_registry`
captures the rules annotated in this module if it was not imported.
"""
from typing import Dict, Any, Callable

from url_matcher import Patterns

from web_poet import handle_urls


class POBase:
expected_overrides: Callable
expected_patterns: Patterns
expected_meta: Dict[str, Any]


class POLibSubOverridenNotImported:
...


@handle_urls("sub_example_not_imported.com", overrides=POLibSubOverridenNotImported)
class POLibSubNotImported(POBase):
expected_overrides = POLibSubOverridenNotImported
expected_patterns = Patterns(["sub_example_not_imported.com"])
expected_meta = {} # type: ignore
4 changes: 4 additions & 0 deletions web_poet/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
from .pages import WebPage, ItemPage, ItemWebPage, Injectable
from .page_inputs import HttpResponse, HttpResponseBody, HttpResponseHeaders
from .overrides import PageObjectRegistry, consume_modules, OverrideRule

default_registry = PageObjectRegistry()
handle_urls = default_registry.handle_urls
Loading