From 40205f67df5f59df4b88ce47bbbe98f1eff36230 Mon Sep 17 00:00:00 2001 From: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Date: Fri, 4 Feb 2022 15:44:54 -0800 Subject: [PATCH] Add types to the public API (#244) Resolves #189 --- .travis.yml | 1 + MANIFEST.in | 1 + tldextract/__init__.py | 3 +- tldextract/cache.py | 4 +-- tldextract/py.typed | 0 tldextract/suffix_list.py | 2 +- tldextract/tldextract.py | 58 +++++++++++++++++++++------------------ tox.ini | 9 +++++- 8 files changed, 47 insertions(+), 31 deletions(-) create mode 100644 tldextract/py.typed diff --git a/.travis.yml b/.travis.yml index 4527f478..e26ce3cb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,6 +14,7 @@ matrix: env: TOXENV=pypy3 - env: TOXENV=codestyle - env: TOXENV=lint + - env: TOXENV=typecheck python: 3.9 install: pip install tox script: tox diff --git a/MANIFEST.in b/MANIFEST.in index e29f070e..f0350d53 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ include tldextract/.tld_set_snapshot +include tldextract/py.typed include LICENSE recursive-include tests *.py *.dat diff --git a/tldextract/__init__.py b/tldextract/__init__.py index 4c6fc6c1..d408c14c 100644 --- a/tldextract/__init__.py +++ b/tldextract/__init__.py @@ -2,4 +2,5 @@ from .tldextract import extract, TLDExtract -from ._version import version as __version__ +from . import _version +__version__: str = _version.version diff --git a/tldextract/cache.py b/tldextract/cache.py index 371ae07e..8a664888 100644 --- a/tldextract/cache.py +++ b/tldextract/cache.py @@ -15,7 +15,7 @@ _DID_LOG_UNABLE_TO_CACHE = False -def get_pkg_unique_identifier(): +def get_pkg_unique_identifier() -> str: """ Generate an identifier unique to the python version, tldextract version, and python instance @@ -46,7 +46,7 @@ def get_pkg_unique_identifier(): return pkg_identifier -def get_cache_dir(): +def get_cache_dir() -> str: """ Get a cache dir that we have permission to write to diff --git a/tldextract/py.typed b/tldextract/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/tldextract/suffix_list.py b/tldextract/suffix_list.py index a4371c83..fba5218b 100644 --- a/tldextract/suffix_list.py +++ b/tldextract/suffix_list.py @@ -5,7 +5,7 @@ import re import requests -from requests_file import FileAdapter +from requests_file import FileAdapter # type: ignore[import] LOG = logging.getLogger("tldextract") diff --git a/tldextract/tldextract.py b/tldextract/tldextract.py index 536f056d..063b78f7 100644 --- a/tldextract/tldextract.py +++ b/tldextract/tldextract.py @@ -49,10 +49,10 @@ '127.0.0.1' """ -import collections import logging import os from functools import wraps +from typing import List, NamedTuple, Optional, Sequence, Union import idna @@ -71,14 +71,15 @@ ) -class ExtractResult(collections.namedtuple("ExtractResult", "subdomain domain suffix")): +class ExtractResult(NamedTuple): """namedtuple of a URL's subdomain, domain, and suffix.""" - # Necessary for __dict__ member to get populated in Python 3+ - __slots__ = () + subdomain: str + domain: str + suffix: str @property - def registered_domain(self): + def registered_domain(self) -> str: """ Joins the domain and suffix fields with a dot, if they're both set. @@ -92,7 +93,7 @@ def registered_domain(self): return "" @property - def fqdn(self): + def fqdn(self) -> str: """ Returns a Fully Qualified Domain Name, if there is a proper domain/suffix. @@ -102,12 +103,13 @@ def fqdn(self): '' """ if self.domain and self.suffix: - # self is the namedtuple (subdomain domain suffix) + # Disable bogus lint error (https://github.com/PyCQA/pylint/issues/2568) + # pylint: disable-next=not-an-iterable return ".".join(i for i in self if i) return "" @property - def ipv4(self): + def ipv4(self) -> str: """ Returns the ipv4 if that is what the presented domain/url is @@ -130,13 +132,13 @@ class TLDExtract: # TODO: Agreed with Pylint: too-many-arguments def __init__( # pylint: disable=too-many-arguments self, - cache_dir=get_cache_dir(), - suffix_list_urls=PUBLIC_SUFFIX_LIST_URLS, - fallback_to_snapshot=True, - include_psl_private_domains=False, - extra_suffixes=(), - cache_fetch_timeout=CACHE_TIMEOUT, - ): + cache_dir: str = get_cache_dir(), + suffix_list_urls: Sequence[str] = PUBLIC_SUFFIX_LIST_URLS, + fallback_to_snapshot: bool = True, + include_psl_private_domains: bool = False, + extra_suffixes: Sequence[str] = (), + cache_fetch_timeout: Union[str, float, None] = CACHE_TIMEOUT, + ) -> None: """ Constructs a callable for extracting subdomain, domain, and suffix components from a URL. @@ -193,14 +195,18 @@ def __init__( # pylint: disable=too-many-arguments self.include_psl_private_domains = include_psl_private_domains self.extra_suffixes = extra_suffixes - self._extractor = None + self._extractor: Optional[_PublicSuffixListTLDExtractor] = None - self.cache_fetch_timeout = cache_fetch_timeout + self.cache_fetch_timeout = ( + float(cache_fetch_timeout) + if isinstance(cache_fetch_timeout, str) + else cache_fetch_timeout + ) self._cache = DiskCache(cache_dir) - if isinstance(self.cache_fetch_timeout, str): - self.cache_fetch_timeout = float(self.cache_fetch_timeout) - def __call__(self, url, include_psl_private_domains=None): + def __call__( + self, url: str, include_psl_private_domains: Optional[bool] = None + ) -> ExtractResult: """ Takes a string URL and splits it into its subdomain, domain, and suffix (effective TLD, gTLD, ccTLD, etc.) component. @@ -238,7 +244,7 @@ def __call__(self, url, include_psl_private_domains=None): domain = labels[suffix_index - 1] if suffix_index else "" return ExtractResult(subdomain, domain, suffix) - def update(self, fetch_now=False): + def update(self, fetch_now: bool = False) -> None: """Force fetch the latest suffix list definitions.""" self._extractor = None self._cache.clear() @@ -246,7 +252,7 @@ def update(self, fetch_now=False): self._get_tld_extractor() @property - def tlds(self): + def tlds(self) -> List[str]: """ Returns the list of tld's used by default @@ -254,7 +260,7 @@ def tlds(self): """ return list(self._get_tld_extractor().tlds()) - def _get_tld_extractor(self): + def _get_tld_extractor(self) -> "_PublicSuffixListTLDExtractor": """Get or compute this object's TLDExtractor. Looks up the TLDExtractor in roughly the following order, based on the settings passed to __init__: @@ -290,9 +296,9 @@ def _get_tld_extractor(self): @wraps(TLD_EXTRACTOR.__call__) -def extract( - url, include_psl_private_domains=False -): # pylint: disable=missing-function-docstring +def extract( # pylint: disable=missing-function-docstring + url: str, include_psl_private_domains: Optional[bool] = False +) -> ExtractResult: return TLD_EXTRACTOR(url, include_psl_private_domains=include_psl_private_domains) diff --git a/tox.ini b/tox.ini index fc979080..ffa6a764 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py{36,37,38,39,py3},codestyle,lint +envlist = py{36,37,38,39,py3},codestyle,lint,typecheck [testenv] deps = @@ -24,6 +24,13 @@ deps = responses commands = pytest --pylint -m pylint {posargs} +[testenv:typecheck] +deps = + mypy + types-requests + types-filelock +commands = mypy tldextract --show-error-codes + [pycodestyle] # E203 - whitespace before; disagrees with PEP8 https://github.com/psf/black/issues/354#issuecomment-397684838 # E501 - line too long