Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed VirusTotal module #1172

Merged
merged 10 commits into from
Aug 6, 2022
2 changes: 1 addition & 1 deletion requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ setuptools==63.4.1
shodan==1.28.0
slowapi==0.1.5
uvicorn==0.18.2
uvloop==0.16.0; platform_system != "Windows"
uvloop==0.16.0; platform_system != "Windows"
2 changes: 1 addition & 1 deletion theHarvester.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@

# As we are not using Windows we can change the spawn method to fork for greater performance
aiomultiprocess.set_context("fork")
asyncio.run(__main__.entry_point())
asyncio.run(__main__.entry_point())
14 changes: 1 addition & 13 deletions theHarvester/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,11 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
if store_people:
people_list = await search_engine.get_people()
await db_stash.store_all(word, people_list, 'people', source)

if store_links:
links = await search_engine.get_links()
linkedin_links_tracker.extend(links)
if len(links) > 0:
await db.store_all(word, links, 'linkedinlinks', engineitem)

if store_interestingurls:
iurls = await search_engine.get_interestingurls()
interesting_urls.extend(iurls)
Expand Down Expand Up @@ -286,8 +284,6 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
stor_lst.append(store(github_search, engineitem, store_host=True, store_emails=True))
except MissingKey as ex:
print(ex)
else:
pass

elif engineitem == 'hackertarget':
from theHarvester.discovery import hackertarget
Expand All @@ -303,8 +299,6 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
except Exception as e:
if isinstance(e, MissingKey):
print(e)
else:
pass

elif engineitem == 'intelx':
from theHarvester.discovery import intelxsearch
Expand Down Expand Up @@ -388,8 +382,6 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
except Exception as e:
if isinstance(e, MissingKey):
print(e)
else:
pass

elif engineitem == 'sublist3r':
from theHarvester.discovery import sublist3r
Expand Down Expand Up @@ -432,8 +424,6 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
except Exception as e:
if isinstance(e, MissingKey):
print(e)
else:
pass

elif engineitem == 'yahoo':
from theHarvester.discovery import yahoosearch
Expand All @@ -449,8 +439,6 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
except Exception as e:
if isinstance(e, MissingKey):
print(e)
else:
pass
else:
try:
# Check if dns_brute is defined
Expand Down Expand Up @@ -836,4 +824,4 @@ async def entry_point():
print('\n\n[!] ctrl+c detected from user, quitting.\n\n ')
except Exception as error_entry_point:
print(error_entry_point)
sys.exit(1)
sys.exit(1)
82 changes: 71 additions & 11 deletions theHarvester/discovery/virustotal.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,88 @@
from theHarvester.discovery.constants import *
from theHarvester.lib.core import *
from pprint import pprint


class SearchVirustotal:

def __init__(self, word):
self.word = word
self.key = Core.virustotal_key()
if self.key is None:
raise MissingKey('virustotal')
self.totalhosts = set
self.word = word
self.proxy = False
self.hostnames = []

async def do_search(self):
url = f'https://www.virustotal.com/api/v3/domains/{self.word}/subdomains?limit=40'
response = await AsyncFetcher.fetch_all([url], json=True, headers={'User-Agent': Core.get_user_agent(),
'X-APIKEY': self.key},
proxy=self.proxy)
entry = [host for host in response]
pprint(entry.items())
# TODO determine if more endpoints can yield useful info given a domain
# based on: https://developers.virustotal.com/reference/domains-relationships
# base_url = "https://www.virustotal.com/api/v3/domains/domain/subdomains?limit=40"
headers = {
'User-Agent': Core.get_user_agent(),
"Accept": "application/json",
"x-apikey": self.key
}
base_url = f"https://www.virustotal.com/api/v3/domains/{self.word}/subdomains?limit=40"
cursor = ''
count = 0
fail_counter = 0
counter = 0
breakcon = False
while True:
if breakcon:
break
# rate limit is 4 per minute
# TODO add timer logic if proven to be needed
# in the meantime sleeping 16 seconds should eliminate hitting the rate limit
# in case rate limit is hit, fail counter exists and sleep for 65 seconds
send_url = base_url + "&cursor=" + cursor if cursor != '' and len(cursor) > 2 else base_url
responses = await AsyncFetcher.fetch_all([send_url], headers=headers, proxy=self.proxy, json=True)
jdata = responses[0]
if 'data' not in jdata.keys():
await asyncio.sleep(60 + 5)
fail_counter += 1
if 'meta' in jdata.keys():
cursor = jdata['meta']['cursor'] if 'cursor' in jdata['meta'].keys() else ''
if len(cursor) == 0 and 'data' in jdata.keys():
# if cursor no longer is within the meta field have hit last entry
breakcon = True
count += jdata['meta']['count']
if count == 0 or fail_counter >= 2:
break
if 'data' in jdata.keys():
data = jdata['data']
self.hostnames.extend(await self.parse_hostnames(data, self.word))
counter += 1
await asyncio.sleep(16)
self.hostnames = list(sorted(set(self.hostnames)))
# verify domains such as x.x.com.multicdn.x.com are parsed properly
self.hostnames = [host for host in self.hostnames if ((len(host.split('.')) >= 3)
and host.split('.')[-2] == self.word.split('.')[-2])]

async def get_hostnames(self) -> list:
return self.hostnames

# async def get_hostnames(self) -> set:
# return self.total_results
@staticmethod
async def parse_hostnames(data, word):
total_subdomains = set()
for attribute in data:
total_subdomains.add(attribute['id'].replace('"', '').replace('www.', ''))
attributes = attribute['attributes']
total_subdomains.update(
{value['value'].replace('"', '').replace('www.', '') for value in attributes['last_dns_records'] if
word in value['value']})
if 'last_https_certificate' in attributes.keys():
total_subdomains.update({value.replace('"', '').replace('www.', '') for value in
attributes['last_https_certificate']['extensions']['subject_alternative_name']
if word in value})
total_subdomains = list(sorted(total_subdomains))
# Other false positives may occur over time and yes there are other ways to parse this, feel free to implement
# them and submit a PR or raise an issue if you run into this filtering not being enough
# TODO determine if parsing 'v=spf1 include:_spf-x.acme.com include:_spf-x.acme.com' is worth parsing
total_subdomains = [x for x in total_subdomains if
not str(x).endswith('edgekey.net') and not str(x).endswith('akadns.net')

Check failure

Code scanning / CodeQL

Incomplete URL substring sanitization

'[edgekey.net](1)' may be at an arbitrary position in the sanitized URL.

Check failure

Code scanning / CodeQL

Incomplete URL substring sanitization

'[akadns.net](1)' may be at an arbitrary position in the sanitized URL.
and 'include:_spf' not in str(x)]
total_subdomains.sort()
return total_subdomains

async def process(self, proxy=False):
self.proxy = proxy
Expand Down
2 changes: 1 addition & 1 deletion theHarvester/lib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,4 +357,4 @@ async def fetch_all(cls, urls, headers='', params='', json=False, takeover=False
return texts
else:
texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url, params, json) for url in urls])
return texts
return texts