Skip to content

Commit

Permalink
Fixed VirusTotal module (#1172)
Browse files Browse the repository at this point in the history
* Added call to api endpoint to explicitly gather subdomains from zoomeye, updated user agents, replaced orjson with ujson, and fixed substring not found error.

* Updated orjson to ujson.

* Fixed semantic error in html check in google workaround.

* Fixed flake8 errors.

* Fixed VT to use API.

* Fixed virustotal module.

* Fixed possible edge case that could possibly cause an infinite loop.
  • Loading branch information
NotoriousRebel authored Aug 6, 2022
1 parent 121e23b commit c801db6
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 27 deletions.
2 changes: 1 addition & 1 deletion requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ setuptools==63.4.1
shodan==1.28.0
slowapi==0.1.5
uvicorn==0.18.2
uvloop==0.16.0; platform_system != "Windows"
uvloop==0.16.0; platform_system != "Windows"
2 changes: 1 addition & 1 deletion theHarvester.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@

# As we are not using Windows we can change the spawn method to fork for greater performance
aiomultiprocess.set_context("fork")
asyncio.run(__main__.entry_point())
asyncio.run(__main__.entry_point())
14 changes: 1 addition & 13 deletions theHarvester/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,11 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
if store_people:
people_list = await search_engine.get_people()
await db_stash.store_all(word, people_list, 'people', source)

if store_links:
links = await search_engine.get_links()
linkedin_links_tracker.extend(links)
if len(links) > 0:
await db.store_all(word, links, 'linkedinlinks', engineitem)

if store_interestingurls:
iurls = await search_engine.get_interestingurls()
interesting_urls.extend(iurls)
Expand Down Expand Up @@ -286,8 +284,6 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
stor_lst.append(store(github_search, engineitem, store_host=True, store_emails=True))
except MissingKey as ex:
print(ex)
else:
pass

elif engineitem == 'hackertarget':
from theHarvester.discovery import hackertarget
Expand All @@ -303,8 +299,6 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
except Exception as e:
if isinstance(e, MissingKey):
print(e)
else:
pass

elif engineitem == 'intelx':
from theHarvester.discovery import intelxsearch
Expand Down Expand Up @@ -388,8 +382,6 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
except Exception as e:
if isinstance(e, MissingKey):
print(e)
else:
pass

elif engineitem == 'sublist3r':
from theHarvester.discovery import sublist3r
Expand Down Expand Up @@ -432,8 +424,6 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
except Exception as e:
if isinstance(e, MissingKey):
print(e)
else:
pass

elif engineitem == 'yahoo':
from theHarvester.discovery import yahoosearch
Expand All @@ -449,8 +439,6 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
except Exception as e:
if isinstance(e, MissingKey):
print(e)
else:
pass
else:
try:
# Check if dns_brute is defined
Expand Down Expand Up @@ -836,4 +824,4 @@ async def entry_point():
print('\n\n[!] ctrl+c detected from user, quitting.\n\n ')
except Exception as error_entry_point:
print(error_entry_point)
sys.exit(1)
sys.exit(1)
82 changes: 71 additions & 11 deletions theHarvester/discovery/virustotal.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,88 @@
from theHarvester.discovery.constants import *
from theHarvester.lib.core import *
from pprint import pprint


class SearchVirustotal:

def __init__(self, word):
self.word = word
self.key = Core.virustotal_key()
if self.key is None:
raise MissingKey('virustotal')
self.totalhosts = set
self.word = word
self.proxy = False
self.hostnames = []

async def do_search(self):
url = f'https://www.virustotal.com/api/v3/domains/{self.word}/subdomains?limit=40'
response = await AsyncFetcher.fetch_all([url], json=True, headers={'User-Agent': Core.get_user_agent(),
'X-APIKEY': self.key},
proxy=self.proxy)
entry = [host for host in response]
pprint(entry.items())
# TODO determine if more endpoints can yield useful info given a domain
# based on: https://developers.virustotal.com/reference/domains-relationships
# base_url = "https://www.virustotal.com/api/v3/domains/domain/subdomains?limit=40"
headers = {
'User-Agent': Core.get_user_agent(),
"Accept": "application/json",
"x-apikey": self.key
}
base_url = f"https://www.virustotal.com/api/v3/domains/{self.word}/subdomains?limit=40"
cursor = ''
count = 0
fail_counter = 0
counter = 0
breakcon = False
while True:
if breakcon:
break
# rate limit is 4 per minute
# TODO add timer logic if proven to be needed
# in the meantime sleeping 16 seconds should eliminate hitting the rate limit
# in case rate limit is hit, fail counter exists and sleep for 65 seconds
send_url = base_url + "&cursor=" + cursor if cursor != '' and len(cursor) > 2 else base_url
responses = await AsyncFetcher.fetch_all([send_url], headers=headers, proxy=self.proxy, json=True)
jdata = responses[0]
if 'data' not in jdata.keys():
await asyncio.sleep(60 + 5)
fail_counter += 1
if 'meta' in jdata.keys():
cursor = jdata['meta']['cursor'] if 'cursor' in jdata['meta'].keys() else ''
if len(cursor) == 0 and 'data' in jdata.keys():
# if cursor no longer is within the meta field have hit last entry
breakcon = True
count += jdata['meta']['count']
if count == 0 or fail_counter >= 2:
break
if 'data' in jdata.keys():
data = jdata['data']
self.hostnames.extend(await self.parse_hostnames(data, self.word))
counter += 1
await asyncio.sleep(16)
self.hostnames = list(sorted(set(self.hostnames)))
# verify domains such as x.x.com.multicdn.x.com are parsed properly
self.hostnames = [host for host in self.hostnames if ((len(host.split('.')) >= 3)
and host.split('.')[-2] == self.word.split('.')[-2])]

async def get_hostnames(self) -> list:
return self.hostnames

# async def get_hostnames(self) -> set:
# return self.total_results
@staticmethod
async def parse_hostnames(data, word):
total_subdomains = set()
for attribute in data:
total_subdomains.add(attribute['id'].replace('"', '').replace('www.', ''))
attributes = attribute['attributes']
total_subdomains.update(
{value['value'].replace('"', '').replace('www.', '') for value in attributes['last_dns_records'] if
word in value['value']})
if 'last_https_certificate' in attributes.keys():
total_subdomains.update({value.replace('"', '').replace('www.', '') for value in
attributes['last_https_certificate']['extensions']['subject_alternative_name']
if word in value})
total_subdomains = list(sorted(total_subdomains))
# Other false positives may occur over time and yes there are other ways to parse this, feel free to implement
# them and submit a PR or raise an issue if you run into this filtering not being enough
# TODO determine if parsing 'v=spf1 include:_spf-x.acme.com include:_spf-x.acme.com' is worth parsing
total_subdomains = [x for x in total_subdomains if
not str(x).endswith('edgekey.net') and not str(x).endswith('akadns.net')
and 'include:_spf' not in str(x)]
total_subdomains.sort()
return total_subdomains

async def process(self, proxy=False):
self.proxy = proxy
Expand Down
2 changes: 1 addition & 1 deletion theHarvester/lib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,4 +357,4 @@ async def fetch_all(cls, urls, headers='', params='', json=False, takeover=False
return texts
else:
texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url, params, json) for url in urls])
return texts
return texts

0 comments on commit c801db6

Please sign in to comment.