From 22a4853bb40f2aae01e5aca4067fe219e564f615 Mon Sep 17 00:00:00 2001 From: ReversingLabs <55623149+rl-devops@users.noreply.github.com> Date: Fri, 27 Jan 2023 14:14:39 +0100 Subject: [PATCH 1/8] add central compiled regex pool --- testdata/example.com/nameservers | 2 +- testdata/example.org/input | 2 +- testdata/example.org/nameservers | 2 +- testdata/google.com/input | 6 ++--- testdata/hello.xyz/input | 44 ++++++++++++++++++++++++++++++++ testdata/hello.xyz/nameservers | 2 ++ testdata/hello.xyz/output | 15 +++++++++++ testdata/make_testdata.sh | 1 + testdata/meta.co.jp/nameservers | 2 +- testdata/meta.co.jp/output | 2 +- testdata/meta.co.uk/input | 2 +- testdata/meta.co.uk/nameservers | 2 +- testdata/meta.co.uk/output | 6 ++--- testdata/meta.com.sg/output | 2 +- testdata/meta.com.tr/input | 2 +- testdata/meta.com.tr/output | 2 +- testdata/meta.com/input | 6 ++--- testdata/meta.com/nameservers | 4 +-- testdata/meta.com/output | 2 +- testdata/meta.kr/nameservers | 6 ++--- testdata/meta.kr/output | 2 +- whois/_0_init_tld.py | 35 ++++++++++++++++++++++++- 22 files changed, 122 insertions(+), 27 deletions(-) create mode 100644 testdata/hello.xyz/input create mode 100644 testdata/hello.xyz/nameservers create mode 100644 testdata/hello.xyz/output diff --git a/testdata/example.com/nameservers b/testdata/example.com/nameservers index 2049f2c..3ebd9d0 100644 --- a/testdata/example.com/nameservers +++ b/testdata/example.com/nameservers @@ -1,2 +1,2 @@ -example.com name server a.iana-servers.net. example.com name server b.iana-servers.net. +example.com name server a.iana-servers.net. diff --git a/testdata/example.org/input b/testdata/example.org/input index 675107e..b028563 100644 --- a/testdata/example.org/input +++ b/testdata/example.org/input @@ -58,7 +58,7 @@ Name Server: a.iana-servers.net Name Server: b.iana-servers.net DNSSEC: signedDelegation URL of the ICANN Whois Inaccuracy Complaint Form: https://www.icann.org/wicf/ ->>> Last update of WHOIS database: 2023-01-04T15:03:07Z <<< +>>> Last update of WHOIS database: 2023-01-27T13:06:45Z <<< For more information on Whois status codes, please visit https://icann.org/epp diff --git a/testdata/example.org/nameservers b/testdata/example.org/nameservers index bc6dee9..0a6aff5 100644 --- a/testdata/example.org/nameservers +++ b/testdata/example.org/nameservers @@ -1,2 +1,2 @@ -example.org name server b.iana-servers.net. example.org name server a.iana-servers.net. +example.org name server b.iana-servers.net. diff --git a/testdata/google.com/input b/testdata/google.com/input index 02c74cd..114b1eb 100644 --- a/testdata/google.com/input +++ b/testdata/google.com/input @@ -32,12 +32,12 @@ Tech State/Province: CA Tech Country: US Tech Email: Select Request Email Form at https://domains.markmonitor.com/whois/google.com Name Server: ns1.google.com -Name Server: ns2.google.com -Name Server: ns3.google.com Name Server: ns4.google.com +Name Server: ns3.google.com +Name Server: ns2.google.com DNSSEC: unsigned URL of the ICANN WHOIS Data Problem Reporting System: http://wdprs.internic.net/ ->>> Last update of WHOIS database: 2023-01-04T14:55:04+0000 <<< +>>> Last update of WHOIS database: 2023-01-27T13:04:53+0000 <<< For more information on WHOIS status codes, please visit: https://www.icann.org/resources/pages/epp-status-codes diff --git a/testdata/hello.xyz/input b/testdata/hello.xyz/input new file mode 100644 index 0000000..4370d35 --- /dev/null +++ b/testdata/hello.xyz/input @@ -0,0 +1,44 @@ +[Querying whois.nic.xyz] +[whois.nic.xyz] +Domain Name: HELLO.XYZ +Registry Domain ID: D2208533-CNIC +Registrar WHOIS Server: whois.namecheap.com +Registrar URL: https://namecheap.com +Updated Date: 2022-03-14T11:17:22.0Z +Creation Date: 2014-03-20T15:01:22.0Z +Registry Expiry Date: 2023-03-20T23:59:59.0Z +Registrar: Namecheap +Registrar IANA ID: 1068 +Domain Status: clientTransferProhibited https://icann.org/epp#clientTransferProhibited +Registrant Organization: Privacy service provided by Withheld for Privacy ehf +Registrant State/Province: Capital Region +Registrant Country: IS +Registrant Email: Please query the RDDS service of the Registrar of Record identified in this output for information on how to contact the Registrant, Admin, or Tech contact of the queried domain name. +Admin Email: Please query the RDDS service of the Registrar of Record identified in this output for information on how to contact the Registrant, Admin, or Tech contact of the queried domain name. +Tech Email: Please query the RDDS service of the Registrar of Record identified in this output for information on how to contact the Registrant, Admin, or Tech contact of the queried domain name. +Name Server: DNS1.REGISTRAR-SERVERS.COM +Name Server: DNS2.REGISTRAR-SERVERS.COM +DNSSEC: unsigned +Billing Email: Please query the RDDS service of the Registrar of Record identified in this output for information on how to contact the Registrant, Admin, or Tech contact of the queried domain name. +Registrar Abuse Contact Email: abuse@namecheap.com +Registrar Abuse Contact Phone: +1.9854014545 +URL of the ICANN Whois Inaccuracy Complaint Form: https://www.icann.org/wicf/ +>>> Last update of WHOIS database: 2023-01-27T13:06:56.0Z <<< + +For more information on Whois status codes, please visit https://icann.org/epp + +>>> IMPORTANT INFORMATION ABOUT THE DEPLOYMENT OF RDAP: please visit +https://www.centralnic.com/support/rdap <<< + +The Whois and RDAP services are provided by CentralNic, and contain +information pertaining to Internet domain names registered by our +our customers. By using this service you are agreeing (1) not to use any +information presented here for any purpose other than determining +ownership of domain names, (2) not to store or reproduce this data in +any way, (3) not to use any high-volume, automated, electronic processes +to obtain data from this service. Abuse of this service is monitored and +actions in contravention of these terms will result in being permanently +blacklisted. All data is (c) CentralNic Ltd (https://www.centralnic.com) + +Access to the Whois and RDAP services is rate limited. For more +information, visit https://registrar-console.centralnic.com/pub/whois_guidance. diff --git a/testdata/hello.xyz/nameservers b/testdata/hello.xyz/nameservers new file mode 100644 index 0000000..30f236e --- /dev/null +++ b/testdata/hello.xyz/nameservers @@ -0,0 +1,2 @@ +hello.xyz name server dns1.registrar-servers.com. +hello.xyz name server dns2.registrar-servers.com. diff --git a/testdata/hello.xyz/output b/testdata/hello.xyz/output new file mode 100644 index 0000000..4da00a0 --- /dev/null +++ b/testdata/hello.xyz/output @@ -0,0 +1,15 @@ + +test domain: <<<<<<<<<< hello.xyz >>>>>>>>>>>>>>>>>>>> +name str 'hello.xyz' +tld str 'xyz' +registrar str 'Namecheap' +registrant_country str 'IS' +creation_date datetime.datetime 2014-03-20 15:01:22 +expiration_date datetime.datetime 2023-03-20 23:59:59 +last_updated datetime.datetime 2022-03-14 11:17:22 +status str 'clientTransferProhibited https://icann.org/epp#clientTransferProhibited' +statuses list ['clientTransferProhibited https://icann.org/epp#clientTransferProhibited'] +dnssec bool False +name_servers list ['dns1.registrar-servers.com', 'dns2.registrar-servers.com'] +registrant str 'Privacy service provided by Withheld for Privacy ehf' +emails list ['abuse@namecheap.com'] diff --git a/testdata/make_testdata.sh b/testdata/make_testdata.sh index 7e82154..377c0b6 100755 --- a/testdata/make_testdata.sh +++ b/testdata/make_testdata.sh @@ -15,6 +15,7 @@ getDomains() meta.co.jp # jp has [registrar] type keywords not registrar: meta.kr # has both korean and english text meta.com.tr # has utf 8 response text and different formatting style + hello.xyz # has sometimes IANA Source beginning on mac ) } diff --git a/testdata/meta.co.jp/nameservers b/testdata/meta.co.jp/nameservers index cf933f8..e88ffcc 100644 --- a/testdata/meta.co.jp/nameservers +++ b/testdata/meta.co.jp/nameservers @@ -1,3 +1,3 @@ meta.co.jp name server ns2.meta.co.jp. -meta.co.jp name server ns2.sphere.ad.jp. meta.co.jp name server ns.meta.co.jp. +meta.co.jp name server ns2.sphere.ad.jp. diff --git a/testdata/meta.co.jp/output b/testdata/meta.co.jp/output index 46a4333..5bfd789 100644 --- a/testdata/meta.co.jp/output +++ b/testdata/meta.co.jp/output @@ -1,7 +1,7 @@ test domain: <<<<<<<<<< meta.co.jp >>>>>>>>>>>>>>>>>>>> name str 'meta.co.jp' -tld str 'co_jp' +tld str 'co.jp' registrar str 'JPRS' registrant_country str '' creation_date datetime.datetime 1994-04-01 00:00:00 diff --git a/testdata/meta.co.uk/input b/testdata/meta.co.uk/input index 230f3cd..e6d34e8 100644 --- a/testdata/meta.co.uk/input +++ b/testdata/meta.co.uk/input @@ -31,7 +31,7 @@ c.ns.facebook.com d.ns.facebook.com - WHOIS lookup made at 15:03:07 04-Jan-2023 + WHOIS lookup made at 13:06:46 27-Jan-2023 -- This WHOIS information is provided for free by Nominet UK the central registry diff --git a/testdata/meta.co.uk/nameservers b/testdata/meta.co.uk/nameservers index c4cff21..982a5d1 100644 --- a/testdata/meta.co.uk/nameservers +++ b/testdata/meta.co.uk/nameservers @@ -1,4 +1,4 @@ +meta.co.uk name server c.ns.facebook.com. meta.co.uk name server d.ns.facebook.com. meta.co.uk name server a.ns.facebook.com. -meta.co.uk name server c.ns.facebook.com. meta.co.uk name server b.ns.facebook.com. diff --git a/testdata/meta.co.uk/output b/testdata/meta.co.uk/output index 2816f17..36fbd5a 100644 --- a/testdata/meta.co.uk/output +++ b/testdata/meta.co.uk/output @@ -1,7 +1,7 @@ test domain: <<<<<<<<<< meta.co.uk >>>>>>>>>>>>>>>>>>>> name str 'meta.co.uk' -tld str 'co_uk' +tld str 'co.uk' registrar str 'Hogan Lovells International LLP [Tag = LOVELLSLLP]' registrant_country str '' creation_date datetime.datetime 2001-11-01 00:00:00 @@ -10,7 +10,7 @@ last_updated datetime.datetime 2022-07-28 00:00:00 status str 'Registered until expiry date.' statuses list ['Registered until expiry date.'] dnssec bool False -name_servers list ['a.ns.facebook.com', 'b.ns.facebook.com'] +name_servers list ['a.ns.facebook.com', 'b.ns.facebook.com', 'c.ns.facebook.com', 'd.ns.facebook.com'] owner str '' -registrant str '' +registrant str 'Meta Platforms, Inc.' emails list [''] diff --git a/testdata/meta.com.sg/output b/testdata/meta.com.sg/output index 0a7a7fa..186c138 100644 --- a/testdata/meta.com.sg/output +++ b/testdata/meta.com.sg/output @@ -1,7 +1,7 @@ test domain: <<<<<<<<<< meta.com.sg >>>>>>>>>>>>>>>>>>>> name str 'meta.com.sg' -tld str 'com_sg' +tld str 'com.sg' registrar str 'SINGNET PTE LTD' registrant_country str '' creation_date datetime.datetime 1998-12-03 17:04:50 diff --git a/testdata/meta.com.tr/input b/testdata/meta.com.tr/input index 7434ed4..c578d8b 100644 --- a/testdata/meta.com.tr/input +++ b/testdata/meta.com.tr/input @@ -33,4 +33,4 @@ Expires on..............: 2026-Dec-27. ** Whois Server: -Last Update Time: 2023-01-04T18:01:59+03:00 +Last Update Time: 2023-01-27T16:04:30+03:00 diff --git a/testdata/meta.com.tr/output b/testdata/meta.com.tr/output index 95e3886..9243bc3 100644 --- a/testdata/meta.com.tr/output +++ b/testdata/meta.com.tr/output @@ -1,7 +1,7 @@ test domain: <<<<<<<<<< meta.com.tr >>>>>>>>>>>>>>>>>>>> name str 'meta.com.tr' -tld str 'com_tr' +tld str 'com.tr' registrar str 'ODTÜ GELİŞTİRME VAKFI BİLGİ TEKNOLOJİLERİ SAN. VE TİC. A.Ş.' registrant_country str '' creation_date datetime.datetime 2006-12-28 00:00:00 diff --git a/testdata/meta.com/input b/testdata/meta.com/input index f8d6617..981b878 100644 --- a/testdata/meta.com/input +++ b/testdata/meta.com/input @@ -6,9 +6,9 @@ Domain Name: META.COM Registry Domain ID: 1433704_DOMAIN_COM-VRSN Registrar WHOIS Server: whois.registrarsafe.com Registrar URL: https://www.registrarsafe.com -Updated Date: 2022-07-27T19:07:55Z +Updated Date: 2023-01-25T20:09:06Z Creation Date: 1991-01-21T05:00:00Z -Registrar Registration Expiration Date: 2031-01-22T05:00:00Z +Registrar Registration Expiration Date: 2032-01-22T05:00:00Z Registrar: RegistrarSafe, LLC Registrar IANA ID: 3237 Registrar Abuse Contact Email: abusecomplaints@registrarsafe.com @@ -64,7 +64,7 @@ Name Server: A.NS.FACEBOOK.COM Name Server: D.NS.FACEBOOK.COM DNSSEC: unsigned URL of the ICANN WHOIS Data Problem Reporting System: http://wdprs.internic.net/ ->>> Last update of WHOIS database: 2023-01-04T15:03:11Z <<< +>>> Last update of WHOIS database: 2023-01-27T13:06:50Z <<< Search results obtained from the RegistrarSafe, LLC WHOIS database are provided by RegistrarSafe, LLC for information purposes only, to assist diff --git a/testdata/meta.com/nameservers b/testdata/meta.com/nameservers index 75d7b5c..175b098 100644 --- a/testdata/meta.com/nameservers +++ b/testdata/meta.com/nameservers @@ -1,4 +1,4 @@ -meta.com name server a.ns.facebook.com. +meta.com name server b.ns.facebook.com. meta.com name server d.ns.facebook.com. meta.com name server c.ns.facebook.com. -meta.com name server b.ns.facebook.com. +meta.com name server a.ns.facebook.com. diff --git a/testdata/meta.com/output b/testdata/meta.com/output index 65ba15c..4a59648 100644 --- a/testdata/meta.com/output +++ b/testdata/meta.com/output @@ -6,7 +6,7 @@ registrar str 'RegistrarSafe, LLC' registrant_country str 'US' creation_date datetime.datetime 1991-01-21 05:00:00 expiration_date NoneType None -last_updated datetime.datetime 2022-07-27 19:07:55 +last_updated datetime.datetime 2023-01-25 20:09:06 status str 'clientDeleteProhibited https://www.icann.org/epp#clientDeleteProhibited' statuses list ['clientDeleteProhibited https://www.icann.org/epp#clientDeleteProhibited', 'clientTransferProhibited https://www.icann.org/epp#clientTransferProhibited', 'clientUpdateProhibited https://www.icann.org/epp#clientUpdateProhibited', 'serverDeleteProhibited https://www.icann.org/epp#serverDeleteProhibited', 'serverTransferProhibited https://www.icann.org/epp#serverTransferProhibited', 'serverUpdateProhibited https://www.icann.org/epp#serverUpdateProhibited'] dnssec bool False diff --git a/testdata/meta.kr/nameservers b/testdata/meta.kr/nameservers index e8b5d05..69e42b5 100644 --- a/testdata/meta.kr/nameservers +++ b/testdata/meta.kr/nameservers @@ -1,4 +1,4 @@ -meta.kr name server ns4.lovellsnames.org. -meta.kr name server ns3.lovellsnames.org. -meta.kr name server ns2.lovellsnames.org. meta.kr name server ns1.lovellsnames.org. +meta.kr name server ns2.lovellsnames.org. +meta.kr name server ns3.lovellsnames.org. +meta.kr name server ns4.lovellsnames.org. diff --git a/testdata/meta.kr/output b/testdata/meta.kr/output index 902e653..d6879c5 100644 --- a/testdata/meta.kr/output +++ b/testdata/meta.kr/output @@ -10,6 +10,6 @@ last_updated datetime.datetime 2022-04-05 00:00:00 status str '' statuses list [''] dnssec bool False -name_servers list [] +name_servers list ['ns1.lovellsnames.org', 'ns2.lovellsnames.org', 'ns3.lovellsnames.org', 'ns4.lovellsnames.org'] registrant str 'Gabia C&S' emails list ['domreg@101domain.com'] diff --git a/whois/_0_init_tld.py b/whois/_0_init_tld.py index f7534ed..13c018c 100644 --- a/whois/_0_init_tld.py +++ b/whois/_0_init_tld.py @@ -14,6 +14,7 @@ Verbose = False TLD_RE: Dict[str, Any] = {} +REG_COLLECTION_BY_KEY: Dict = {} def validTlds(): @@ -70,7 +71,9 @@ def get_tld_re(tld: str, override: bool = False) -> Any: # we want now to exclude _server hints tld_re = dict( - (k, re.compile(v, re.IGNORECASE) if (isinstance(v, str) and k[0] != "_") else v) for k, v in tmp.items() + # (k, re.compile(v, re.IGNORECASE) if (isinstance(v, str) and k[0] != "_") else v) for k, v in tmp.items() + # dont recompile each re by themselves, reuse existing compiled re + (k, REG_COLLECTION_BY_KEY[k][v] if (isinstance(v, str) and k[0] != "_") else v) for k, v in tmp.items() ) # meta domains start with _: examples _centralnic and _donuts @@ -109,11 +112,41 @@ def initOne(tld, override: bool = False): if Verbose: print(f"{tld} -> {tld2}", file=sys.stderr) +def buildRegCollection(zz: Dict): + regCollection = {} + # get all regexes + for name in zz: + # print(name) + z = zz[name] + for key in z: + if key is None: + continue + + if key.startswith("_"): + continue + + if key in ["extend"]: + continue + + if key not in regCollection: + regCollection[key] = {} + + reg = z[key] + if reg is None: + continue + + regCollection[key][reg] = None + if isinstance(reg, str): + regCollection[key][reg] = re.compile(reg, flags=re.IGNORECASE) + + return regCollection def initOnImport(): + global REG_COLLECTION_BY_KEY # here we run the import processing # we load all tld's on import so we dont lose time later # we keep ZZ so we can later reuse it if we want to aoverrid or update tld's + REG_COLLECTION_BY_KEY= buildRegCollection(ZZ) override = False for tld in ZZ.keys(): initOne(tld, override) From a37d4746e059ce6b9a1a62c6e417fd825ffe676b Mon Sep 17 00:00:00 2001 From: ReversingLabs <55623149+rl-devops@users.noreply.github.com> Date: Fri, 27 Jan 2023 14:25:39 +0100 Subject: [PATCH 2/8] update readme and run test set ./test2.py -a --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 561a0d8..9a717a4 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,7 @@ Raise an issue https://github.com/DannyCork/python-whois/issues/new 2023-01-27: maarten_boot * add autodetect via iana tld file (this has only tld's) + * add a central collection of all compiled regexes and reuse them: REG_COLLECTION_BY_KEY in _0_init_tld.py ## Support * Python 3.x is supported. From aa72e28f3235151ad7a2d7b49e93b00c80d6034e Mon Sep 17 00:00:00 2001 From: ReversingLabs <55623149+rl-devops@users.noreply.github.com> Date: Fri, 27 Jan 2023 14:41:06 +0100 Subject: [PATCH 3/8] add addition strings for None output: domain does not exist --- whois/_0_init_tld.py | 7 +++++-- whois/_2_parse.py | 3 +++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/whois/_0_init_tld.py b/whois/_0_init_tld.py index 13c018c..368b53d 100644 --- a/whois/_0_init_tld.py +++ b/whois/_0_init_tld.py @@ -73,7 +73,8 @@ def get_tld_re(tld: str, override: bool = False) -> Any: tld_re = dict( # (k, re.compile(v, re.IGNORECASE) if (isinstance(v, str) and k[0] != "_") else v) for k, v in tmp.items() # dont recompile each re by themselves, reuse existing compiled re - (k, REG_COLLECTION_BY_KEY[k][v] if (isinstance(v, str) and k[0] != "_") else v) for k, v in tmp.items() + (k, REG_COLLECTION_BY_KEY[k][v] if (isinstance(v, str) and k[0] != "_") else v) + for k, v in tmp.items() ) # meta domains start with _: examples _centralnic and _donuts @@ -112,6 +113,7 @@ def initOne(tld, override: bool = False): if Verbose: print(f"{tld} -> {tld2}", file=sys.stderr) + def buildRegCollection(zz: Dict): regCollection = {} # get all regexes @@ -141,12 +143,13 @@ def buildRegCollection(zz: Dict): return regCollection + def initOnImport(): global REG_COLLECTION_BY_KEY # here we run the import processing # we load all tld's on import so we dont lose time later # we keep ZZ so we can later reuse it if we want to aoverrid or update tld's - REG_COLLECTION_BY_KEY= buildRegCollection(ZZ) + REG_COLLECTION_BY_KEY = buildRegCollection(ZZ) override = False for tld in ZZ.keys(): initOne(tld, override) diff --git a/whois/_2_parse.py b/whois/_2_parse.py index fc95cb5..9fd9ead 100644 --- a/whois/_2_parse.py +++ b/whois/_2_parse.py @@ -86,6 +86,8 @@ def handleShortResponse( # NOTE: from here s is lowercase only # --------------------------------- noneStrings = [ + "the domain has not been registered", + "no match found for", "no matching record", "not found", "no data found", @@ -98,6 +100,7 @@ def handleShortResponse( "no whois server is known for this kind of object", "nameserver not found", "malformed request", # this means this domain is not in whois as it is on top of a registered domain + "no match", ] for i in noneStrings: From d550fe864c31a3b3de8070e1f3cd2755abbb3f0a Mon Sep 17 00:00:00 2001 From: ReversingLabs <55623149+rl-devops@users.noreply.github.com> Date: Fri, 27 Jan 2023 14:46:18 +0100 Subject: [PATCH 4/8] update readme; mention refresh testdata after switch to real tld with dot --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 9a717a4..2ad7fa9 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,8 @@ Raise an issue https://github.com/DannyCork/python-whois/issues/new 2023-01-27: maarten_boot * add autodetect via iana tld file (this has only tld's) * add a central collection of all compiled regexes and reuse them: REG_COLLECTION_BY_KEY in _0_init_tld.py + * refresh testdata now that tld has dot instead of _ if more then one level + * add additional strings meaning domain does not exist ## Support * Python 3.x is supported. From 978e5d4a32a696f63983b68a932b8891b2f5e700 Mon Sep 17 00:00:00 2001 From: ReversingLabs <55623149+rl-devops@users.noreply.github.com> Date: Mon, 30 Jan 2023 11:14:46 +0100 Subject: [PATCH 5/8] add extra verbose; fix detection of second level tld; add get_last_raw_whois_data() to show all tries by domain --- whois/_0_init_tld.py | 4 ++++ whois/_1_query.py | 10 ++++++++++ whois/__init__.py | 27 +++++++++++++++++++++++++-- whois/tld_regexpr.py | 5 ++++- 4 files changed, 43 insertions(+), 3 deletions(-) diff --git a/whois/_0_init_tld.py b/whois/_0_init_tld.py index 368b53d..092383a 100644 --- a/whois/_0_init_tld.py +++ b/whois/_0_init_tld.py @@ -137,6 +137,10 @@ def buildRegCollection(zz: Dict): if reg is None: continue + if reg in regCollection[key] and regCollection[key][reg] is not None: + # we already have a compiled regex, no need to do it again + continue + regCollection[key][reg] = None if isinstance(reg, str): regCollection[key][reg] = re.compile(reg, flags=re.IGNORECASE) diff --git a/whois/_1_query.py b/whois/_1_query.py index a3a9b06..1f7cbd0 100644 --- a/whois/_1_query.py +++ b/whois/_1_query.py @@ -50,11 +50,16 @@ def do_query( k = ".".join(dl) if cache_file: + if verbose: + print(f"using cache file: {cache_file}", file=sys.stderr) cache_load(cache_file) # actually also whois uses cache, so if you really dont want to use cache # you should also pass the --force-lookup flag (on linux) if force or k not in CACHE or CACHE[k][0] < time.time() - cache_age: + if verbose: + print(f"force = {force}", file=sys.stderr) + # slow down before so we can force individual domains at a slower tempo if slow_down: time.sleep(slow_down) @@ -159,6 +164,8 @@ def _do_whois_query( return testWhoisPythonFromStaticTestData(dl, ignore_returncode, server, verbose) cmd = makeWhoisCommandToRun(dl, server, verbose) + if verbose: + print(cmd, file=sys.stderr) # LANG=en is added to make the ".jp" output consist across all environments p = subprocess.Popen( @@ -169,6 +176,9 @@ def _do_whois_query( ) r = p.communicate()[0].decode(errors="ignore") + if verbose: + print(r, file=sys.stderr) + if ignore_returncode is False and p.returncode not in [0, 1]: raise WhoisCommandFailed(r) diff --git a/whois/__init__.py b/whois/__init__.py index 30121a2..674f0b4 100644 --- a/whois/__init__.py +++ b/whois/__init__.py @@ -77,7 +77,11 @@ def _inner(*args, **kw): return _inner -def fromDomainStringToTld(domain: str, internationalized: bool, verbose: bool = False): +def fromDomainStringToTld( + domain: str, + internationalized: bool, + verbose: bool = False, +): domain = domain.lower().strip().rstrip(".") # Remove the trailing dot to support FQDN. d = domain.split(".") if verbose: @@ -90,6 +94,8 @@ def fromDomainStringToTld(domain: str, internationalized: bool, verbose: bool = return None, None tld = filterTldToSupportedPattern(domain, d, verbose) + if verbose: + print(f"filterTldToSupportedPattern returns tld: {tld}", file=sys.stderr) if internationalized and isinstance(internationalized, bool): d = internationalizedDomainNameToPunyCode(d) @@ -177,6 +183,16 @@ def doUnsupportedTldAnyway( ) +LastWhois: Dict = { + "Try": [], +} + + +def get_last_raw_whois_data(): + global LastWhois + return LastWhois + + def query( domain: str, force: bool = False, @@ -209,6 +225,7 @@ def query( return_raw_text_for_unsupported_tld: if the tld is unsupported, just try it anyway but return only the raw text. """ + global LastWhois assert isinstance(domain, str), Exception("`domain` - must be ") return_raw_text_for_unsupported_tld = bool(return_raw_text_for_unsupported_tld) @@ -240,7 +257,7 @@ def query( # but if the tld is yyy.zzz we should only try xxx.yyy.zzz cache_file = cache_file or CACHE_FILE - tldLevel = tld.split("_") # note while the top level domain may have a . the tld has a _ ( co.uk becomes co_uk ) + tldLevel = tld.split(".") # note while the top level domain may have a . the tld has a _ ( co.uk becomes co_uk ) while 1: whois_str = do_query( dl=dl, @@ -252,6 +269,12 @@ def query( server=server, verbose=verbose, ) + tryMe = { + "Domain": ".".join(dl), + "rawData": whois_str, + "server": server, + } + LastWhois["Try"].append(tryMe) data = do_parse( whois_str=whois_str, diff --git a/whois/tld_regexpr.py b/whois/tld_regexpr.py index 5f0b28d..fb6d17e 100644 --- a/whois/tld_regexpr.py +++ b/whois/tld_regexpr.py @@ -19,7 +19,7 @@ # Commercial TLD - Original Big 7 ZZ["com"] = { "extend": None, - "domain_name": r"Domain Name:\s?(.+)", + "domain_name": r"Domain Name\s*:\s*(.+)", "registrar": r"Registrar:\s?(.+)", "registrant": r"Registrant\s*Organi(?:s|z)ation:\s?(.+)", "registrant_country": r"Registrant Country:\s?(.+)", @@ -266,6 +266,9 @@ "creation_date": r"Registration Time:\s?(.+)", "expiration_date": r"Expiration Time:\s?(.+)", } +ZZ["com.cn"] = { + "extend": "cn", +} ZZ["co"] = { "extend": "biz", From 1ee59dd69dc7585f3f13a16c68001eca73dbf9a9 Mon Sep 17 00:00:00 2001 From: ReversingLabs <55623149+rl-devops@users.noreply.github.com> Date: Mon, 30 Jan 2023 11:35:48 +0100 Subject: [PATCH 6/8] add tld tm (Turkmenistan) --- whois/tld_regexpr.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/whois/tld_regexpr.py b/whois/tld_regexpr.py index fb6d17e..ef033b9 100644 --- a/whois/tld_regexpr.py +++ b/whois/tld_regexpr.py @@ -2661,3 +2661,12 @@ ZZ["softbank"] = {"_server": "whois.nic.softbank", "extend": "com"} # auto-detected via IANA tld ZZ["gov"] = {"extend": "com"} # only 2 or 3 fields are actually returned + +ZZ["tm"] = { # Turkmenistan + "extend": "com", + "domain_name": r"Domain\s*:\s*(.+)", + "expiration_date": r"Expiry\s*:\s*(\d+-\d+-\d+)", + "name_servers": r"NS\s+\d+\s+:\s*(\S+)", + "status": r"Status\s*:\s*(.+)", + +} From 30e6104aedb9c5364cf381e7148e93c129e3005f Mon Sep 17 00:00:00 2001 From: ReversingLabs <55623149+rl-devops@users.noreply.github.com> Date: Mon, 30 Jan 2023 11:36:07 +0100 Subject: [PATCH 7/8] reformat --- whois/tld_regexpr.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/whois/tld_regexpr.py b/whois/tld_regexpr.py index ef033b9..5528b4b 100644 --- a/whois/tld_regexpr.py +++ b/whois/tld_regexpr.py @@ -2662,11 +2662,10 @@ ZZ["gov"] = {"extend": "com"} # only 2 or 3 fields are actually returned -ZZ["tm"] = { # Turkmenistan +ZZ["tm"] = { # Turkmenistan "extend": "com", "domain_name": r"Domain\s*:\s*(.+)", "expiration_date": r"Expiry\s*:\s*(\d+-\d+-\d+)", "name_servers": r"NS\s+\d+\s+:\s*(\S+)", "status": r"Status\s*:\s*(.+)", - } From ee42fc02e14301f09a1a5a85390acd6d7f720c2e Mon Sep 17 00:00:00 2001 From: ReversingLabs <55623149+rl-devops@users.noreply.github.com> Date: Mon, 30 Jan 2023 11:39:39 +0100 Subject: [PATCH 8/8] add com.tm as valid second level --- whois/tld_regexpr.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/whois/tld_regexpr.py b/whois/tld_regexpr.py index 5528b4b..be53050 100644 --- a/whois/tld_regexpr.py +++ b/whois/tld_regexpr.py @@ -2669,3 +2669,5 @@ "name_servers": r"NS\s+\d+\s+:\s*(\S+)", "status": r"Status\s*:\s*(.+)", } + +ZZ["com.tm"] = {"extend": "tm"}