Skip to content

Commit

Permalink
Merge pull request #2433 from SnoopJ/chore/gh2230-tidy-up
Browse files Browse the repository at this point in the history
url: fix premature loop termination, docstring cleanup & slight refactoring
  • Loading branch information
dgw authored Mar 28, 2023
2 parents 2a09b9b + 2051f95 commit 9497c45
Showing 1 changed file with 51 additions and 22 deletions.
73 changes: 51 additions & 22 deletions sopel/modules/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from ipaddress import ip_address
import logging
import re
from typing import Generator, List, Optional, Tuple, TYPE_CHECKING
from typing import Generator, List, NamedTuple, Optional, TYPE_CHECKING
from urllib.parse import urlparse

import dns.resolver
Expand Down Expand Up @@ -289,10 +289,10 @@ def title_command(bot: SopelWrapper, trigger: Trigger):
# needs to be a list so len() can be checked later
urls = list(web.search_urls(trigger))

for url, title, domain, tinyurl, dispatched in process_urls(
for url, title, domain, tinyurl, ignored in process_urls(
bot, trigger, urls, requested=True
):
if dispatched:
if ignored:
result_count += 1
continue
message = "%s | %s" % (title, domain)
Expand All @@ -317,8 +317,13 @@ def title_command(bot: SopelWrapper, trigger: Trigger):
def title_auto(bot: SopelWrapper, trigger: Trigger):
"""
Automatically show titles for URLs. For shortened URLs/redirects, find
where the URL redirects to and show the title for that (or call a function
from another plugin to give more information).
where the URL redirects to and show the title for that.
.. note::
URLs that match (before redirection) any other registered callbacks
will *not* have their titles shown.
"""
# Enabled or disabled by feature flag
if not bot.settings.url.enable_auto_title:
Expand All @@ -342,8 +347,8 @@ def title_auto(bot: SopelWrapper, trigger: Trigger):
continue
urls.append(url)

for url, title, domain, tinyurl, dispatched in process_urls(bot, trigger, urls):
if not dispatched:
for url, title, domain, tinyurl, ignored in process_urls(bot, trigger, urls):
if not ignored:
message = '%s | %s' % (title, domain)
if tinyurl:
message += ' ( %s )' % tinyurl
Expand All @@ -353,16 +358,30 @@ def title_auto(bot: SopelWrapper, trigger: Trigger):
bot.memory["last_seen_url"][trigger.sender] = url


class URLInfo(NamedTuple):
"""Helper class for information about a URL handled by this plugin."""

url: str

title: Optional[str]
"""The title associated with ``url``, if appropriate."""

hostname: Optional[str]
"""The hostname associated with ``url``, if appropriate."""

tinyurl: Optional[str]
"""A shortened form of ``url``, if appropriate."""

ignored: bool
"""Whether or not this URL matches any registered callbacks or is explicitly excluded."""


def process_urls(
bot: SopelWrapper,
trigger: Trigger,
urls: List[str],
requested: bool = False,
) -> Generator[
Tuple[str, Optional[str], Optional[str], Optional[str], bool],
None,
None,
]:
) -> Generator[URLInfo, None, None]:
"""
For each URL in the list, ensure it should be titled, and do so.
Expand All @@ -371,15 +390,25 @@ def process_urls(
:param urls: The URLs detected in the triggering message
:param requested: Whether the title was explicitly requested (vs automatic)
See if it's handled by another plugin. If not, find where it redirects to,
if anywhere. If that redirected URL should be handled by another plugin,
dispatch the callback for it. Return a list of
(url, title, hostname, tinyurl, dispatched) tuples for each URL.
Yields a tuple ``(url, title, hostname, tinyurl, ignored)`` for each URL.
.. note:
If a URL in ``urls`` has any registered callbacks, this function will NOT
retrieve the title, and considers the URL as dispatched to those callbacks.
In this case, only the ``url`` and ``ignored=True`` will be set; all
other values will be ``None``.
.. note:
If a callback was dispatched, only the url and dispatched=True will be set.
For titles explicitly requested by the user, ``exclusion_char`` and
exclusions from the ``.urlban``/``.urlpban`` commands are skipped.
.. versionchanged:: 8.0
This function **does not** notify callbacks registered for URLs
redirected to from URLs passed to this function. See #2432, #2230.
For titles explicitly requested by the user, exclusion_char and excludes
are skipped.
"""
shorten_url_length = bot.config.url.shorten_url_length
for url in urls:
Expand All @@ -389,10 +418,10 @@ def process_urls(

parsed_url = urlparse(url)

# Check the URL does not match an existing URL callback
if check_callbacks(bot, url, use_excludes=not requested):
yield (url, None, None, None, True)
return
# URL matches a callback OR is excluded, ignore
yield URLInfo(url, None, None, None, True)
continue

# Prevent private addresses from being queried if enable_private_resolution is False
# FIXME: This does nothing when an attacker knows how to host a 302
Expand Down

0 comments on commit 9497c45

Please sign in to comment.