From ebcea683d52d844dc6f3c6053de01d3d23779e52 Mon Sep 17 00:00:00 2001 From: Jovial Joe Jayarson Date: Wed, 3 Apr 2024 18:17:11 +0530 Subject: [PATCH] feat: adds `consider_tld` parameter to `domain`, `hostname` and `url` modules > co-authored by: @aviiciii --- src/validators/_tld.txt | 1449 ++++++++++++++++++++++++++++++++++++ src/validators/domain.py | 17 +- src/validators/hostname.py | 7 +- src/validators/url.py | 7 + tests/test_domain.py | 47 ++ 5 files changed, 1524 insertions(+), 3 deletions(-) create mode 100644 src/validators/_tld.txt diff --git a/src/validators/_tld.txt b/src/validators/_tld.txt new file mode 100644 index 00000000..b26d13ad --- /dev/null +++ b/src/validators/_tld.txt @@ -0,0 +1,1449 @@ +# Version 2024040300, Last Updated Wed Apr 3 07:07:01 2024 UTC +AAA +AARP +ABB +ABBOTT +ABBVIE +ABC +ABLE +ABOGADO +ABUDHABI +AC +ACADEMY +ACCENTURE +ACCOUNTANT +ACCOUNTANTS +ACO +ACTOR +AD +ADS +ADULT +AE +AEG +AERO +AETNA +AF +AFL +AFRICA +AG +AGAKHAN +AGENCY +AI +AIG +AIRBUS +AIRFORCE +AIRTEL +AKDN +AL +ALIBABA +ALIPAY +ALLFINANZ +ALLSTATE +ALLY +ALSACE +ALSTOM +AM +AMAZON +AMERICANEXPRESS +AMERICANFAMILY +AMEX +AMFAM +AMICA +AMSTERDAM +ANALYTICS +ANDROID +ANQUAN +ANZ +AO +AOL +APARTMENTS +APP +APPLE +AQ +AQUARELLE +AR +ARAB +ARAMCO +ARCHI +ARMY +ARPA +ART +ARTE +AS +ASDA +ASIA +ASSOCIATES +AT +ATHLETA +ATTORNEY +AU +AUCTION +AUDI +AUDIBLE +AUDIO +AUSPOST +AUTHOR +AUTO +AUTOS +AW +AWS +AX +AXA +AZ +AZURE +BA +BABY +BAIDU +BANAMEX +BAND +BANK +BAR +BARCELONA +BARCLAYCARD +BARCLAYS +BAREFOOT +BARGAINS +BASEBALL +BASKETBALL +BAUHAUS +BAYERN +BB +BBC +BBT +BBVA +BCG +BCN +BD +BE +BEATS +BEAUTY +BEER +BENTLEY +BERLIN +BEST +BESTBUY +BET +BF +BG +BH +BHARTI +BI +BIBLE +BID +BIKE +BING +BINGO +BIO +BIZ +BJ +BLACK +BLACKFRIDAY +BLOCKBUSTER +BLOG +BLOOMBERG +BLUE +BM +BMS +BMW +BN +BNPPARIBAS +BO +BOATS +BOEHRINGER +BOFA +BOM +BOND +BOO +BOOK +BOOKING +BOSCH +BOSTIK +BOSTON +BOT +BOUTIQUE +BOX +BR +BRADESCO +BRIDGESTONE +BROADWAY +BROKER +BROTHER +BRUSSELS +BS +BT +BUILD +BUILDERS +BUSINESS +BUY +BUZZ +BV +BW +BY +BZ +BZH +CA +CAB +CAFE +CAL +CALL +CALVINKLEIN +CAM +CAMERA +CAMP +CANON +CAPETOWN +CAPITAL +CAPITALONE +CAR +CARAVAN +CARDS +CARE +CAREER +CAREERS +CARS +CASA +CASE +CASH +CASINO +CAT +CATERING +CATHOLIC +CBA +CBN +CBRE +CC +CD +CENTER +CEO +CERN +CF +CFA +CFD +CG +CH +CHANEL +CHANNEL +CHARITY +CHASE +CHAT +CHEAP +CHINTAI +CHRISTMAS +CHROME +CHURCH +CI +CIPRIANI +CIRCLE +CISCO +CITADEL +CITI +CITIC +CITY +CK +CL +CLAIMS +CLEANING +CLICK +CLINIC +CLINIQUE +CLOTHING +CLOUD +CLUB +CLUBMED +CM +CN +CO +COACH +CODES +COFFEE +COLLEGE +COLOGNE +COM +COMMBANK +COMMUNITY +COMPANY +COMPARE +COMPUTER +COMSEC +CONDOS +CONSTRUCTION +CONSULTING +CONTACT +CONTRACTORS +COOKING +COOL +COOP +CORSICA +COUNTRY +COUPON +COUPONS +COURSES +CPA +CR +CREDIT +CREDITCARD +CREDITUNION +CRICKET +CROWN +CRS +CRUISE +CRUISES +CU +CUISINELLA +CV +CW +CX +CY +CYMRU +CYOU +CZ +DABUR +DAD +DANCE +DATA +DATE +DATING +DATSUN +DAY +DCLK +DDS +DE +DEAL +DEALER +DEALS +DEGREE +DELIVERY +DELL +DELOITTE +DELTA +DEMOCRAT +DENTAL +DENTIST +DESI +DESIGN +DEV +DHL +DIAMONDS +DIET +DIGITAL +DIRECT +DIRECTORY +DISCOUNT +DISCOVER +DISH +DIY +DJ +DK +DM +DNP +DO +DOCS +DOCTOR +DOG +DOMAINS +DOT +DOWNLOAD +DRIVE +DTV +DUBAI +DUNLOP +DUPONT +DURBAN +DVAG +DVR +DZ +EARTH +EAT +EC +ECO +EDEKA +EDU +EDUCATION +EE +EG +EMAIL +EMERCK +ENERGY +ENGINEER +ENGINEERING +ENTERPRISES +EPSON +EQUIPMENT +ER +ERICSSON +ERNI +ES +ESQ +ESTATE +ET +EU +EUROVISION +EUS +EVENTS +EXCHANGE +EXPERT +EXPOSED +EXPRESS +EXTRASPACE +FAGE +FAIL +FAIRWINDS +FAITH +FAMILY +FAN +FANS +FARM +FARMERS +FASHION +FAST +FEDEX +FEEDBACK +FERRARI +FERRERO +FI +FIDELITY +FIDO +FILM +FINAL +FINANCE +FINANCIAL +FIRE +FIRESTONE +FIRMDALE +FISH +FISHING +FIT +FITNESS +FJ +FK +FLICKR +FLIGHTS +FLIR +FLORIST +FLOWERS +FLY +FM +FO +FOO +FOOD +FOOTBALL +FORD +FOREX +FORSALE +FORUM +FOUNDATION +FOX +FR +FREE +FRESENIUS +FRL +FROGANS +FRONTIER +FTR +FUJITSU +FUN +FUND +FURNITURE +FUTBOL +FYI +GA +GAL +GALLERY +GALLO +GALLUP +GAME +GAMES +GAP +GARDEN +GAY +GB +GBIZ +GD +GDN +GE +GEA +GENT +GENTING +GEORGE +GF +GG +GGEE +GH +GI +GIFT +GIFTS +GIVES +GIVING +GL +GLASS +GLE +GLOBAL +GLOBO +GM +GMAIL +GMBH +GMO +GMX +GN +GODADDY +GOLD +GOLDPOINT +GOLF +GOO +GOODYEAR +GOOG +GOOGLE +GOP +GOT +GOV +GP +GQ +GR +GRAINGER +GRAPHICS +GRATIS +GREEN +GRIPE +GROCERY +GROUP +GS +GT +GU +GUCCI +GUGE +GUIDE +GUITARS +GURU +GW +GY +HAIR +HAMBURG +HANGOUT +HAUS +HBO +HDFC +HDFCBANK +HEALTH +HEALTHCARE +HELP +HELSINKI +HERE +HERMES +HIPHOP +HISAMITSU +HITACHI +HIV +HK +HKT +HM +HN +HOCKEY +HOLDINGS +HOLIDAY +HOMEDEPOT +HOMEGOODS +HOMES +HOMESENSE +HONDA +HORSE +HOSPITAL +HOST +HOSTING +HOT +HOTELS +HOTMAIL +HOUSE +HOW +HR +HSBC +HT +HU +HUGHES +HYATT +HYUNDAI +IBM +ICBC +ICE +ICU +ID +IE +IEEE +IFM +IKANO +IL +IM +IMAMAT +IMDB +IMMO +IMMOBILIEN +IN +INC +INDUSTRIES +INFINITI +INFO +ING +INK +INSTITUTE +INSURANCE +INSURE +INT +INTERNATIONAL +INTUIT +INVESTMENTS +IO +IPIRANGA +IQ +IR +IRISH +IS +ISMAILI +IST +ISTANBUL +IT +ITAU +ITV +JAGUAR +JAVA +JCB +JE +JEEP +JETZT +JEWELRY +JIO +JLL +JM +JMP +JNJ +JO +JOBS +JOBURG +JOT +JOY +JP +JPMORGAN +JPRS +JUEGOS +JUNIPER +KAUFEN +KDDI +KE +KERRYHOTELS +KERRYLOGISTICS +KERRYPROPERTIES +KFH +KG +KH +KI +KIA +KIDS +KIM +KINDLE +KITCHEN +KIWI +KM +KN +KOELN +KOMATSU +KOSHER +KP +KPMG +KPN +KR +KRD +KRED +KUOKGROUP +KW +KY +KYOTO +KZ +LA +LACAIXA +LAMBORGHINI +LAMER +LANCASTER +LAND +LANDROVER +LANXESS +LASALLE +LAT +LATINO +LATROBE +LAW +LAWYER +LB +LC +LDS +LEASE +LECLERC +LEFRAK +LEGAL +LEGO +LEXUS +LGBT +LI +LIDL +LIFE +LIFEINSURANCE +LIFESTYLE +LIGHTING +LIKE +LILLY +LIMITED +LIMO +LINCOLN +LINK +LIPSY +LIVE +LIVING +LK +LLC +LLP +LOAN +LOANS +LOCKER +LOCUS +LOL +LONDON +LOTTE +LOTTO +LOVE +LPL +LPLFINANCIAL +LR +LS +LT +LTD +LTDA +LU +LUNDBECK +LUXE +LUXURY +LV +LY +MA +MADRID +MAIF +MAISON +MAKEUP +MAN +MANAGEMENT +MANGO +MAP +MARKET +MARKETING +MARKETS +MARRIOTT +MARSHALLS +MATTEL +MBA +MC +MCKINSEY +MD +ME +MED +MEDIA +MEET +MELBOURNE +MEME +MEMORIAL +MEN +MENU +MERCKMSD +MG +MH +MIAMI +MICROSOFT +MIL +MINI +MINT +MIT +MITSUBISHI +MK +ML +MLB +MLS +MM +MMA +MN +MO +MOBI +MOBILE +MODA +MOE +MOI +MOM +MONASH +MONEY +MONSTER +MORMON +MORTGAGE +MOSCOW +MOTO +MOTORCYCLES +MOV +MOVIE +MP +MQ +MR +MS +MSD +MT +MTN +MTR +MU +MUSEUM +MUSIC +MV +MW +MX +MY +MZ +NA +NAB +NAGOYA +NAME +NATURA +NAVY +NBA +NC +NE +NEC +NET +NETBANK +NETFLIX +NETWORK +NEUSTAR +NEW +NEWS +NEXT +NEXTDIRECT +NEXUS +NF +NFL +NG +NGO +NHK +NI +NICO +NIKE +NIKON +NINJA +NISSAN +NISSAY +NL +NO +NOKIA +NORTON +NOW +NOWRUZ +NOWTV +NP +NR +NRA +NRW +NTT +NU +NYC +NZ +OBI +OBSERVER +OFFICE +OKINAWA +OLAYAN +OLAYANGROUP +OLLO +OM +OMEGA +ONE +ONG +ONL +ONLINE +OOO +OPEN +ORACLE +ORANGE +ORG +ORGANIC +ORIGINS +OSAKA +OTSUKA +OTT +OVH +PA +PAGE +PANASONIC +PARIS +PARS +PARTNERS +PARTS +PARTY +PAY +PCCW +PE +PET +PF +PFIZER +PG +PH +PHARMACY +PHD +PHILIPS +PHONE +PHOTO +PHOTOGRAPHY +PHOTOS +PHYSIO +PICS +PICTET +PICTURES +PID +PIN +PING +PINK +PIONEER +PIZZA +PK +PL +PLACE +PLAY +PLAYSTATION +PLUMBING +PLUS +PM +PN +PNC +POHL +POKER +POLITIE +PORN +POST +PR +PRAMERICA +PRAXI +PRESS +PRIME +PRO +PROD +PRODUCTIONS +PROF +PROGRESSIVE +PROMO +PROPERTIES +PROPERTY +PROTECTION +PRU +PRUDENTIAL +PS +PT +PUB +PW +PWC +PY +QA +QPON +QUEBEC +QUEST +RACING +RADIO +RE +READ +REALESTATE +REALTOR +REALTY +RECIPES +RED +REDSTONE +REDUMBRELLA +REHAB +REISE +REISEN +REIT +RELIANCE +REN +RENT +RENTALS +REPAIR +REPORT +REPUBLICAN +REST +RESTAURANT +REVIEW +REVIEWS +REXROTH +RICH +RICHARDLI +RICOH +RIL +RIO +RIP +RO +ROCKS +RODEO +ROGERS +ROOM +RS +RSVP +RU +RUGBY +RUHR +RUN +RW +RWE +RYUKYU +SA +SAARLAND +SAFE +SAFETY +SAKURA +SALE +SALON +SAMSCLUB +SAMSUNG +SANDVIK +SANDVIKCOROMANT +SANOFI +SAP +SARL +SAS +SAVE +SAXO +SB +SBI +SBS +SC +SCB +SCHAEFFLER +SCHMIDT +SCHOLARSHIPS +SCHOOL +SCHULE +SCHWARZ +SCIENCE +SCOT +SD +SE +SEARCH +SEAT +SECURE +SECURITY +SEEK +SELECT +SENER +SERVICES +SEVEN +SEW +SEX +SEXY +SFR +SG +SH +SHANGRILA +SHARP +SHAW +SHELL +SHIA +SHIKSHA +SHOES +SHOP +SHOPPING +SHOUJI +SHOW +SI +SILK +SINA +SINGLES +SITE +SJ +SK +SKI +SKIN +SKY +SKYPE +SL +SLING +SM +SMART +SMILE +SN +SNCF +SO +SOCCER +SOCIAL +SOFTBANK +SOFTWARE +SOHU +SOLAR +SOLUTIONS +SONG +SONY +SOY +SPA +SPACE +SPORT +SPOT +SR +SRL +SS +ST +STADA +STAPLES +STAR +STATEBANK +STATEFARM +STC +STCGROUP +STOCKHOLM +STORAGE +STORE +STREAM +STUDIO +STUDY +STYLE +SU +SUCKS +SUPPLIES +SUPPLY +SUPPORT +SURF +SURGERY +SUZUKI +SV +SWATCH +SWISS +SX +SY +SYDNEY +SYSTEMS +SZ +TAB +TAIPEI +TALK +TAOBAO +TARGET +TATAMOTORS +TATAR +TATTOO +TAX +TAXI +TC +TCI +TD +TDK +TEAM +TECH +TECHNOLOGY +TEL +TEMASEK +TENNIS +TEVA +TF +TG +TH +THD +THEATER +THEATRE +TIAA +TICKETS +TIENDA +TIPS +TIRES +TIROL +TJ +TJMAXX +TJX +TK +TKMAXX +TL +TM +TMALL +TN +TO +TODAY +TOKYO +TOOLS +TOP +TORAY +TOSHIBA +TOTAL +TOURS +TOWN +TOYOTA +TOYS +TR +TRADE +TRADING +TRAINING +TRAVEL +TRAVELERS +TRAVELERSINSURANCE +TRUST +TRV +TT +TUBE +TUI +TUNES +TUSHU +TV +TVS +TW +TZ +UA +UBANK +UBS +UG +UK +UNICOM +UNIVERSITY +UNO +UOL +UPS +US +UY +UZ +VA +VACATIONS +VANA +VANGUARD +VC +VE +VEGAS +VENTURES +VERISIGN +VERSICHERUNG +VET +VG +VI +VIAJES +VIDEO +VIG +VIKING +VILLAS +VIN +VIP +VIRGIN +VISA +VISION +VIVA +VIVO +VLAANDEREN +VN +VODKA +VOLVO +VOTE +VOTING +VOTO +VOYAGE +VU +WALES +WALMART +WALTER +WANG +WANGGOU +WATCH +WATCHES +WEATHER +WEATHERCHANNEL +WEBCAM +WEBER +WEBSITE +WED +WEDDING +WEIBO +WEIR +WF +WHOSWHO +WIEN +WIKI +WILLIAMHILL +WIN +WINDOWS +WINE +WINNERS +WME +WOLTERSKLUWER +WOODSIDE +WORK +WORKS +WORLD +WOW +WS +WTC +WTF +XBOX +XEROX +XIHUAN +XIN +XN--11B4C3D +XN--1CK2E1B +XN--1QQW23A +XN--2SCRJ9C +XN--30RR7Y +XN--3BST00M +XN--3DS443G +XN--3E0B707E +XN--3HCRJ9C +XN--3PXU8K +XN--42C2D9A +XN--45BR5CYL +XN--45BRJ9C +XN--45Q11C +XN--4DBRK0CE +XN--4GBRIM +XN--54B7FTA0CC +XN--55QW42G +XN--55QX5D +XN--5SU34J936BGSG +XN--5TZM5G +XN--6FRZ82G +XN--6QQ986B3XL +XN--80ADXHKS +XN--80AO21A +XN--80AQECDR1A +XN--80ASEHDB +XN--80ASWG +XN--8Y0A063A +XN--90A3AC +XN--90AE +XN--90AIS +XN--9DBQ2A +XN--9ET52U +XN--9KRT00A +XN--B4W605FERD +XN--BCK1B9A5DRE4C +XN--C1AVG +XN--C2BR7G +XN--CCK2B3B +XN--CCKWCXETD +XN--CG4BKI +XN--CLCHC0EA0B2G2A9GCD +XN--CZR694B +XN--CZRS0T +XN--CZRU2D +XN--D1ACJ3B +XN--D1ALF +XN--E1A4C +XN--ECKVDTC9D +XN--EFVY88H +XN--FCT429K +XN--FHBEI +XN--FIQ228C5HS +XN--FIQ64B +XN--FIQS8S +XN--FIQZ9S +XN--FJQ720A +XN--FLW351E +XN--FPCRJ9C3D +XN--FZC2C9E2C +XN--FZYS8D69UVGM +XN--G2XX48C +XN--GCKR3F0F +XN--GECRJ9C +XN--GK3AT1E +XN--H2BREG3EVE +XN--H2BRJ9C +XN--H2BRJ9C8C +XN--HXT814E +XN--I1B6B1A6A2E +XN--IMR513N +XN--IO0A7I +XN--J1AEF +XN--J1AMH +XN--J6W193G +XN--JLQ480N2RG +XN--JVR189M +XN--KCRX77D1X4A +XN--KPRW13D +XN--KPRY57D +XN--KPUT3I +XN--L1ACC +XN--LGBBAT1AD8J +XN--MGB9AWBF +XN--MGBA3A3EJT +XN--MGBA3A4F16A +XN--MGBA7C0BBN0A +XN--MGBAAM7A8H +XN--MGBAB2BD +XN--MGBAH1A3HJKRD +XN--MGBAI9AZGQP6J +XN--MGBAYH7GPA +XN--MGBBH1A +XN--MGBBH1A71E +XN--MGBC0A9AZCG +XN--MGBCA7DZDO +XN--MGBCPQ6GPA1A +XN--MGBERP4A5D4AR +XN--MGBGU82A +XN--MGBI4ECEXP +XN--MGBPL2FH +XN--MGBT3DHD +XN--MGBTX2B +XN--MGBX4CD0AB +XN--MIX891F +XN--MK1BU44C +XN--MXTQ1M +XN--NGBC5AZD +XN--NGBE9E0A +XN--NGBRX +XN--NODE +XN--NQV7F +XN--NQV7FS00EMA +XN--NYQY26A +XN--O3CW4H +XN--OGBPF8FL +XN--OTU796D +XN--P1ACF +XN--P1AI +XN--PGBS0DH +XN--PSSY2U +XN--Q7CE6A +XN--Q9JYB4C +XN--QCKA1PMC +XN--QXA6A +XN--QXAM +XN--RHQV96G +XN--ROVU88B +XN--RVC1E0AM3E +XN--S9BRJ9C +XN--SES554G +XN--T60B56A +XN--TCKWE +XN--TIQ49XQYJ +XN--UNUP4Y +XN--VERMGENSBERATER-CTB +XN--VERMGENSBERATUNG-PWB +XN--VHQUV +XN--VUQ861B +XN--W4R85EL8FHU5DNRA +XN--W4RS40L +XN--WGBH1C +XN--WGBL6A +XN--XHQ521B +XN--XKC2AL3HYE2A +XN--XKC2DL3A5EE0H +XN--Y9A3AQ +XN--YFRO4I67O +XN--YGBI2AMMX +XN--ZFR164B +XXX +XYZ +YACHTS +YAHOO +YAMAXUN +YANDEX +YE +YODOBASHI +YOGA +YOKOHAMA +YOU +YOUTUBE +YT +YUN +ZA +ZAPPOS +ZARA +ZERO +ZIP +ZM +ZONE +ZUERICH +ZW diff --git a/src/validators/domain.py b/src/validators/domain.py index 540234e2..9051462b 100644 --- a/src/validators/domain.py +++ b/src/validators/domain.py @@ -1,14 +1,23 @@ """Domain.""" # standard +from pathlib import Path import re # local from .utils import validator +def _iana_tld(): + """Load IANA TLDs.""" + with Path(__file__).parent.joinpath("_tld.txt").open() as tld_f: + return (line.strip() for line in tld_f.readlines()[1:]) + + @validator -def domain(value: str, /, *, rfc_1034: bool = False, rfc_2782: bool = False): +def domain( + value: str, /, *, consider_tld: bool = False, rfc_1034: bool = False, rfc_2782: bool = False +): """Return whether or not given value is a valid domain. Examples: @@ -23,6 +32,8 @@ def domain(value: str, /, *, rfc_1034: bool = False, rfc_2782: bool = False): Args: value: Domain string to validate. + consider_tld: + Restrict domain to TLDs allowed by IANA. rfc_1034: Allows optional trailing dot in the domain name. Ref: [RFC 1034](https://www.rfc-editor.org/rfc/rfc1034). @@ -41,6 +52,10 @@ def domain(value: str, /, *, rfc_1034: bool = False, rfc_2782: bool = False): """ if not value: return False + + if consider_tld and value.rstrip(".").rsplit(".", 1)[-1].upper() not in _iana_tld(): + return False + try: return not re.search(r"\s", value) and re.match( # First character of the domain diff --git a/src/validators/hostname.py b/src/validators/hostname.py index 10450585..7ad634ae 100644 --- a/src/validators/hostname.py +++ b/src/validators/hostname.py @@ -55,6 +55,7 @@ def hostname( skip_ipv4_addr: bool = False, may_have_port: bool = True, maybe_simple: bool = True, + consider_tld: bool = False, private: Optional[bool] = None, # only for ip-addresses rfc_1034: bool = False, rfc_2782: bool = False, @@ -94,6 +95,8 @@ def hostname( Hostname string may contain port number. maybe_simple: Hostname string maybe only hyphens and alpha-numerals. + consider_tld: + Restrict domain to TLDs allowed by IANA. private: Embedded IP address is public if `False`, private/local if `True`. rfc_1034: @@ -113,14 +116,14 @@ def hostname( if may_have_port and (host_seg := _port_validator(value)): return ( (_simple_hostname_regex().match(host_seg) if maybe_simple else False) - or domain(host_seg, rfc_1034=rfc_1034, rfc_2782=rfc_2782) + or domain(host_seg, consider_tld=consider_tld, rfc_1034=rfc_1034, rfc_2782=rfc_2782) or (False if skip_ipv4_addr else ipv4(host_seg, cidr=False, private=private)) or (False if skip_ipv6_addr else ipv6(host_seg, cidr=False)) ) return ( (_simple_hostname_regex().match(value) if maybe_simple else False) - or domain(value, rfc_1034=rfc_1034, rfc_2782=rfc_2782) + or domain(value, consider_tld=consider_tld, rfc_1034=rfc_1034, rfc_2782=rfc_2782) or (False if skip_ipv4_addr else ipv4(value, cidr=False, private=private)) or (False if skip_ipv6_addr else ipv6(value, cidr=False)) ) diff --git a/src/validators/url.py b/src/validators/url.py index 38d898ec..43cf0561 100644 --- a/src/validators/url.py +++ b/src/validators/url.py @@ -81,6 +81,7 @@ def _validate_netloc( skip_ipv4_addr: bool, may_have_port: bool, simple_host: bool, + consider_tld: bool, private: Optional[bool], rfc_1034: bool, rfc_2782: bool, @@ -99,6 +100,7 @@ def _validate_netloc( skip_ipv4_addr=skip_ipv4_addr, may_have_port=may_have_port, maybe_simple=simple_host, + consider_tld=consider_tld, private=private, rfc_1034=rfc_1034, rfc_2782=rfc_2782, @@ -114,6 +116,7 @@ def _validate_netloc( skip_ipv4_addr=skip_ipv4_addr, may_have_port=may_have_port, maybe_simple=simple_host, + consider_tld=consider_tld, private=private, rfc_1034=rfc_1034, rfc_2782=rfc_2782, @@ -155,6 +158,7 @@ def url( may_have_port: bool = True, simple_host: bool = False, strict_query: bool = True, + consider_tld: bool = False, private: Optional[bool] = None, # only for ip-addresses rfc_1034: bool = False, rfc_2782: bool = False, @@ -196,6 +200,8 @@ def url( URL string maybe only hyphens and alpha-numerals. strict_query: Fail validation on query string parsing error. + consider_tld: + Restrict domain to TLDs allowed by IANA. private: Embedded IP address is public if `False`, private/local if `True`. rfc_1034: @@ -227,6 +233,7 @@ def url( skip_ipv4_addr, may_have_port, simple_host, + consider_tld, private, rfc_1034, rfc_2782, diff --git a/tests/test_domain.py b/tests/test_domain.py index c8f5c4d9..21aadf90 100644 --- a/tests/test_domain.py +++ b/tests/test_domain.py @@ -32,6 +32,28 @@ def test_returns_true_on_valid_domain(value: str, rfc_1034: bool, rfc_2782: bool assert domain(value, rfc_1034=rfc_1034, rfc_2782=rfc_2782) +@pytest.mark.parametrize( + ("value", "consider_tld", "rfc_1034", "rfc_2782"), + [ + ("example.com", True, False, False), + ("exa_mple.com", True, False, True), + ("xn----gtbspbbmkef.xn--p1ai", True, False, False), + ("underscore_subdomain.example.com", True, False, True), + ("someThing.versicherung.", True, True, False), + ("11.com", True, False, False), + ("3.cn.", True, True, False), + ("_example.com", True, False, True), + ("example_.com", True, False, True), + ("somerandomexample.xn--fiqs8s", True, False, False), + ], +) +def test_returns_true_on_valid_top_level_domain( + value: str, consider_tld: bool, rfc_1034: bool, rfc_2782: bool +): + """Test returns true on valid top level domain.""" + assert domain(value, consider_tld=consider_tld, rfc_1034=rfc_1034, rfc_2782=rfc_2782) + + @pytest.mark.parametrize( ("value", "rfc_1034", "rfc_2782"), [ @@ -55,3 +77,28 @@ def test_returns_true_on_valid_domain(value: str, rfc_1034: bool, rfc_2782: bool def test_returns_failed_validation_on_invalid_domain(value: str, rfc_1034: bool, rfc_2782: bool): """Test returns failed validation on invalid domain.""" assert isinstance(domain(value, rfc_1034=rfc_1034, rfc_2782=rfc_2782), ValidationError) + + +@pytest.mark.parametrize( + ("value", "consider_tld", "rfc_1034", "rfc_2782"), + [ + ("example.266", True, False, False), + ("exa_mple.org_", True, False, True), + ("xn----gtbspbbmkef.xn-p1ai", True, False, False), + ("underscore_subdomain.example.flat", True, False, True), + ("someThing.versicherung.reddit.", True, True, False), + ("11.twitter", True, False, False), + ("3.cnx.", True, True, False), + ("_example.#13", True, False, True), + ("example_.fo-ul", True, False, True), + ("somerandomexample.xn-n-fiqs8s", True, False, False), + ], +) +def test_returns_failed_validation_invalid_top_level_domain( + value: str, consider_tld: bool, rfc_1034: bool, rfc_2782: bool +): + """Test returns failed validation invalid top level domain.""" + assert isinstance( + domain(value, consider_tld=consider_tld, rfc_1034=rfc_1034, rfc_2782=rfc_2782), + ValidationError, + )