Skip to content

Commit

Permalink
add caching function for TLD info extraction (#barseghyanartur/tld/is…
Browse files Browse the repository at this point in the history
  • Loading branch information
adbar committed Nov 8, 2021
1 parent 64e817b commit b290699
Showing 1 changed file with 10 additions and 3 deletions.
13 changes: 10 additions & 3 deletions courlan/urlutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,24 @@

import re

from functools import lru_cache
from urllib.parse import urlparse

from tld import get_fld, get_tld


@lru_cache(maxsize=1024)
def get_tldinfo(url):
'''Cached function to extract top-level domain info'''
return get_tld(url, as_object=True, fail_silently=True)


def extract_domain(url, blacklist=None):
'''Extract domain name information using top-level domain info'''
if blacklist is None:
blacklist = set()
# new code: Python >= 3.6 with tld module
tldinfo = get_tld(url, as_object=True, fail_silently=True)
tldinfo = get_tldinfo(url)
# invalid input OR domain TLD blacklist
if tldinfo is None or tldinfo.domain in blacklist:
return None
Expand Down Expand Up @@ -71,8 +78,8 @@ def is_external(url, reference, ignore_suffix=True):
# new code: Python >= 3.6 with tld module
if ignore_suffix is True:
try:
ref_domain, domain = get_tld(reference, as_object=True, fail_silently=True).domain, \
get_tld(url, as_object=True, fail_silently=True).domain
ref_domain, domain = get_tldinfo(reference).domain, \
get_tldinfo(url).domain
# invalid input
except AttributeError:
return True
Expand Down

0 comments on commit b290699

Please sign in to comment.