Merge pull request #1488 from yoonthegoon/MyPy-CI-failures

laramies · Jul 25, 2023 · 4f0e3bc · 4f0e3bc
2 parents e1b97fc + 3aa25f6
commit 4f0e3bc
Show file tree

Hide file tree

Showing 16 changed files with 949 additions and 554 deletions.
diff --git a/.flake8 b/.flake8
@@ -1,2 +1,2 @@
 [flake8]
-ignore = E501, F405, F403, F401, E402
+ignore = E203, E501, F405, F403, F401, E402, W503
diff --git a/.github/workflows/theHarvester.yml b/.github/workflows/theHarvester.yml
@@ -34,9 +34,9 @@ jobs:
       - name: Lint with flake8
         run: |
           # stop the build if there are Python syntax errors or undefined names
-          flake8 . --count --show-source --statistics
+          flake8 . --count --show-source --statistics --config .flake8
           # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-          flake8 . --count --exit-zero  --max-line-length=127 --statistics
+          flake8 . --count --exit-zero  --max-line-length=127 --statistics --config .flake8
 
       - name: Test with pytest
         run: |

diff --git a/theHarvester/__main__.py b/theHarvester/__main__.py
@@ -176,7 +176,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
             print(f'\033[94m[*] Searching {source[0].upper() + source[1:]}. ')
 
         if store_host:
-            host_names = {host for host in filter(await search_engine.get_hostnames()) if f'.{word}' in host}
+            host_names = list({host for host in filter(await search_engine.get_hostnames()) if f'.{word}' in host})
             host_names = list(host_names)
             if source != 'hackertarget' and source != 'pentesttools' and source != 'rapiddns':
                 # If a source is inside this conditional, it means the hosts returned must be resolved to obtain ip
@@ -574,10 +574,13 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
                         if isinstance(e, MissingKey):
                             print(e)
         else:
-            try:
-                # Check if dns_brute is defined
-                rest_args.dns_brute
-            except Exception:
+            if rest_args is not None:
+                try:
+                    rest_args.dns_brute
+                except Exception:
+                    print('\n[!] Invalid source.\n')
+                    sys.exit(1)
+            else:
                 print('\n[!] Invalid source.\n')
                 sys.exit(1)
 
@@ -848,7 +851,6 @@ async def handler(lst):
         print('\n[*] Virtual hosts:')
         print('------------------')
         for data in host_ip:
-            from theHarvester.discovery import bingsearch
             basic_search = bingsearch.SearchBing(data, limit, start)
             await basic_search.process_vhost()
             results = await basic_search.get_allhostnames()
@@ -890,19 +892,19 @@ async def handler(lst):
                 async with Pool(10) as pool:
                     results = await pool.map(screen_shotter.visit, list(unique_resolved_domains))
                     # Filter out domains that we couldn't connect to
-                    unique_resolved_domains = list(sorted({tup[0] for tup in results if len(tup[1]) > 0}))
+                    unique_resolved_domains_list = list(sorted({tup[0] for tup in results if len(tup[1]) > 0}))
                 async with Pool(3) as pool:
-                    print(f'Length of unique resolved domains: {len(unique_resolved_domains)} chunking now!\n')
+                    print(f'Length of unique resolved domains: {len(unique_resolved_domains_list)} chunking now!\n')
                     # If you have the resources, you could make the function faster by increasing the chunk number
                     chunk_number = 14
-                    for chunk in screen_shotter.chunk_list(unique_resolved_domains, chunk_number):
+                    for chunk in screen_shotter.chunk_list(unique_resolved_domains_list, chunk_number):
                         try:
                             screenshot_tups.extend(await pool.map(screen_shotter.take_screenshot, chunk))
                         except Exception as ee:
                             print(f'An exception has occurred while mapping: {ee}')
             end = time.perf_counter()
             # There is probably an easier way to do this
-            total = end - start_time
+            total = int(end - start_time)
             mon, sec = divmod(total, 60)
             hr, mon = divmod(mon, 60)
             total_time = "%02d:%02d" % (mon, sec)

diff --git a/theHarvester/discovery/bingsearch.py b/theHarvester/discovery/bingsearch.py
@@ -4,56 +4,72 @@
 
 
 class SearchBing:
-
     def __init__(self, word, limit, start) -> None:
-        self.word = word.replace(' ', '%20')
-        self.results = ""
+        self.word = word.replace(" ", "%20")
+        self.results: tuple[Any, ...] = ()
         self.total_results = ""
-        self.server = 'www.bing.com'
-        self.apiserver = 'api.search.live.net'
-        self.hostname = 'www.bing.com'
+        self.server = "www.bing.com"
+        self.apiserver = "api.search.live.net"
+        self.hostname = "www.bing.com"
         self.limit = int(limit)
         self.bingApi = Core.bing_key()
         self.counter = start
         self.proxy = False
 
     async def do_search(self) -> None:
         headers = {
-            'Host': self.hostname,
-            'Cookie': 'SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50',
-            'Accept-Language': 'en-us,en',
-            'User-agent': Core.get_user_agent()
+            "Host": self.hostname,
+            "Cookie": "SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50",
+            "Accept-Language": "en-us,en",
+            "User-agent": Core.get_user_agent(),
         }
         base_url = f'https://{self.server}/search?q=%40"{self.word}"&count=50&first=xx'
-        urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 50) if num <= self.limit]
-        responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
+        urls = [
+            base_url.replace("xx", str(num))
+            for num in range(0, self.limit, 50)
+            if num <= self.limit
+        ]
+        responses = await AsyncFetcher.fetch_all(
+            urls, headers=headers, proxy=self.proxy
+        )
         for response in responses:
             self.total_results += response
 
     async def do_search_api(self) -> None:
-        url = 'https://api.bing.microsoft.com/v7.0/search?'
+        url = "https://api.bing.microsoft.com/v7.0/search?"
         params = {
-            'q': self.word,
-            'count': str(self.limit),
-            'offset': '0',
-            'mkt': 'en-us',
-            'safesearch': 'Off'
+            "q": self.word,
+            "count": str(self.limit),
+            "offset": "0",
+            "mkt": "en-us",
+            "safesearch": "Off",
+        }
+        headers = {
+            "User-Agent": Core.get_user_agent(),
+            "Ocp-Apim-Subscription-Key": self.bingApi,
         }
-        headers = {'User-Agent': Core.get_user_agent(), 'Ocp-Apim-Subscription-Key': self.bingApi}
-        self.results = await AsyncFetcher.fetch_all([url], headers=headers, params=params, proxy=self.proxy)
+        self.results = await AsyncFetcher.fetch_all(
+            [url], headers=headers, params=params, proxy=self.proxy
+        )
         for res in self.results:
             self.total_results += res
 
     async def do_search_vhost(self) -> None:
         headers = {
-            'Host': self.hostname,
-            'Cookie': 'mkt=en-US;ui=en-US;SRCHHPGUSR=NEWWND=0&ADLT=DEMOTE&NRSLT=50',
-            'Accept-Language': 'en-us,en',
-            'User-agent': Core.get_user_agent()
+            "Host": self.hostname,
+            "Cookie": "mkt=en-US;ui=en-US;SRCHHPGUSR=NEWWND=0&ADLT=DEMOTE&NRSLT=50",
+            "Accept-Language": "en-us,en",
+            "User-agent": Core.get_user_agent(),
         }
-        base_url = f'http://{self.server}/search?q=ip:{self.word}&go=&count=50&FORM=QBHL&qs=n&first=xx'
-        urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 50) if num <= self.limit]
-        responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
+        base_url = f"http://{self.server}/search?q=ip:{self.word}&go=&count=50&FORM=QBHL&qs=n&first=xx"
+        urls = [
+            base_url.replace("xx", str(num))
+            for num in range(0, self.limit, 50)
+            if num <= self.limit
+        ]
+        responses = await AsyncFetcher.fetch_all(
+            urls, headers=headers, proxy=self.proxy
+        )
         for response in responses:
             self.total_results += response
 
@@ -71,13 +87,13 @@ async def get_allhostnames(self):
 
     async def process(self, api, proxy: bool = False) -> None:
         self.proxy = proxy
-        if api == 'yes':
+        if api == "yes":
             if self.bingApi is None:
-                raise MissingKey('BingAPI')
+                raise MissingKey("BingAPI")
             await self.do_search_api()
         else:
             await self.do_search()
-            print(f'\tSearching {self.counter} results.')
+            print(f"\tSearching {self.counter} results.")
 
     async def process_vhost(self) -> None:
         await self.do_search_vhost()
diff --git a/theHarvester/discovery/censysearch.py b/theHarvester/discovery/censysearch.py
@@ -1,13 +1,15 @@
 from typing import Set
-from theHarvester.discovery.constants import MissingKey
-from theHarvester.lib.core import Core
-from theHarvester.lib.version import version as thehavester_version
-from censys.search import CensysCertificates
+
 from censys.common import __version__
 from censys.common.exceptions import (
     CensysRateLimitExceededException,
     CensysUnauthorizedException,
 )
+from censys.search import CensysCerts
+
+from theHarvester.discovery.constants import MissingKey
+from theHarvester.lib.core import Core
+from theHarvester.lib.version import version as thehavester_version
 
 
 class SearchCensys:
@@ -23,13 +25,13 @@ def __init__(self, domain, limit: int = 500) -> None:
 
     async def do_search(self) -> None:
         try:
-            cert_search = CensysCertificates(
+            cert_search = CensysCerts(
                 api_id=self.key[0],
                 api_secret=self.key[1],
                 user_agent=f"censys/{__version__} (theHarvester/{thehavester_version}); +https://github.com/laramies/theHarvester)",
             )
         except CensysUnauthorizedException:
-            raise MissingKey('Censys ID and/or Secret')
+            raise MissingKey("Censys ID and/or Secret")
 
         query = f"parsed.names: {self.word}"
         try:
@@ -38,7 +40,7 @@ async def do_search(self) -> None:
                 fields=["parsed.names", "metadata", "parsed.subject.email_address"],
                 max_records=self.limit,
             )
-            for cert in response:
+            for cert in response():
                 self.totalhosts.update(cert.get("parsed.names", []))
                 self.emails.update(cert.get("parsed.subject.email_address", []))
         except CensysRateLimitExceededException:

diff --git a/theHarvester/discovery/constants.py b/theHarvester/discovery/constants.py
@@ -1,6 +1,7 @@
-from theHarvester.lib.core import *
-from typing import Union, Optional
 import random
+from typing import Optional, Union
+
+from theHarvester.lib.core import *
 
 
 async def splitter(links):
@@ -41,15 +42,19 @@ def filter(lst):
     new_lst = []
     for item in lst:
         item = str(item)
-        if (item[0].isalpha() or item[0].isdigit()) and ('xxx' not in item) and ('..' not in item):
-            item = item.replace('252f', '').replace('2F', '').replace('2f', '')
+        if (
+            (item[0].isalpha() or item[0].isdigit())
+            and ("xxx" not in item)
+            and (".." not in item)
+        ):
+            item = item.replace("252f", "").replace("2F", "").replace("2f", "")
             new_lst.append(item.lower())
     return new_lst
 
 
 def get_delay() -> float:
     """Method that is used to generate a random delay"""
-    return random.randint(1, 3) - .5
+    return random.randint(1, 3) - 0.5
 
 
 async def search(text: str) -> bool:
@@ -58,8 +63,12 @@ async def search(text: str) -> bool:
     :return bool:
     """
     for line in text.strip().splitlines():
-        if 'This page appears when Google automatically detects requests coming from your computer network' in line \
-                or 'http://www.google.com/sorry/index' in line or 'https://www.google.com/sorry/index' in line:
+        if (
+            "This page appears when Google automatically detects requests coming from your computer network"
+            in line
+            or "http://www.google.com/sorry/index" in line
+            or "https://www.google.com/sorry/index" in line
+        ):
             # print('\tGoogle is blocking your IP due to too many automated requests, wait or change your IP')
             return True
     return False
@@ -71,46 +80,60 @@ async def google_workaround(visit_url: str) -> Union[bool, str]:
     :param visit_url: Url to scrape
     :return: Correct html that can be parsed by BS4
     """
-    url = 'https://websniffer.cc/'
+    url = "https://websniffer.cc/"
     data = {
-        'Cookie': '',
-        'url': visit_url,
-        'submit': 'Submit',
-        'type': 'GET&http=1.1',
-        'uak': str(random.randint(4, 8))  # select random UA to send to Google
+        "Cookie": "",
+        "url": visit_url,
+        "submit": "Submit",
+        "type": "GET&http=1.1",
+        "uak": str(random.randint(4, 8)),  # select random UA to send to Google
     }
-    returned_html = await AsyncFetcher.post_fetch(url, headers={'User-Agent': Core.get_user_agent()}, data=data)
-    returned_html = "This page appears when Google automatically detects requests coming from your computer network" \
-        if returned_html == "" else returned_html[0]
-
-    returned_html = "" if 'Please Wait... | Cloudflare' in returned_html else returned_html
-
-    if len(returned_html) == 0 or await search(returned_html) or '&lt;html' not in returned_html:
+    returned_html = await AsyncFetcher.post_fetch(
+        url, headers={"User-Agent": Core.get_user_agent()}, data=data
+    )
+    returned_html = (
+        "This page appears when Google automatically detects requests coming from your computer network"
+        if returned_html == ""
+        else returned_html[0]
+    )
+
+    returned_html = (
+        "" if "Please Wait... | Cloudflare" in returned_html else returned_html
+    )
+
+    if (
+        len(returned_html) == 0
+        or await search(returned_html)
+        or "&lt;html" not in returned_html
+    ):
         # indicates that google is serving workaround a captcha
         # That means we will try out second option which will utilize proxies
         return True
     # the html we get is malformed for BS4 as there are no greater than or less than signs
-    if '&lt;html&gt;' in returned_html:
-        start_index = returned_html.index('&lt;html&gt;')
+    if "&lt;html&gt;" in returned_html:
+        start_index = returned_html.index("&lt;html&gt;")
     else:
-        start_index = returned_html.index('&lt;html')
+        start_index = returned_html.index("&lt;html")
 
-    end_index = returned_html.index('&lt;/html&gt;') + 1
+    end_index = returned_html.index("&lt;/html&gt;") + 1
     correct_html = returned_html[start_index:end_index]
     # Slice list to get the response's html
-    correct_html = ''.join([ch.strip().replace('&lt;', '<').replace('&gt;', '>') for ch in correct_html])
+    correct_html = "".join(
+        [ch.strip().replace("&lt;", "<").replace("&gt;", ">") for ch in correct_html]
+    )
     return correct_html
 
 
 class MissingKey(Exception):
     """
     :raise: When there is a module that has not been provided its API key
     """
+
     def __init__(self, source: Optional[str]) -> None:
         if source:
-            self.message = f'\n\033[93m[!] Missing API key for {source}. \033[0m'
+            self.message = f"\n\033[93m[!] Missing API key for {source}. \033[0m"
         else:
-            self.message = '\n\033[93m[!] Missing CSE id. \033[0m'
+            self.message = "\n\033[93m[!] Missing CSE id. \033[0m"
 
     def __str__(self) -> str:
         return self.message