InSpy 3.0 commit

jobroche · Aug 14, 2018 · 161ca95 · 161ca95
1 parent 54a5398
commit 161ca95
Show file tree

Hide file tree

Showing 21 changed files with 332 additions and 508 deletions.
diff --git a/InSpy.py b/InSpy.py
@@ -1,125 +1,82 @@
 #!/usr/bin/env python2
-# Copyright (c) 2016 Jonathan Broche (@g0jhonny)
+# Copyright (c) 2018 Jonathan Broche (@LeapSecurity)
 
-from lib.logger import *
-from lib.soupify import *
+import argparse, sys, os
+from lib.http import *
 from lib.workbench import *
-from lib.crawler import *
-import os, argparse, sys, time
+from lib.soup import *
+from lib.export import *
+from lib.logger import *
 
-parser = argparse.ArgumentParser(description='InSpy - A LinkedIn enumeration tool by Jonathan Broche (@g0jhonny)', version="2.0.2")
-parser.add_argument('company', help="Company name to use for tasks.")    
-techgroup = parser.add_argument_group(title="Technology Search")
-techgroup.add_argument('--techspy', metavar='file', const="wordlists/tech-list-small.txt", nargs='?', help="Crawl LinkedIn job listings for technologies used by the company. Technologies imported from a new line delimited file. [Default: tech-list-small.txt]")
-techgroup.add_argument('--limit', metavar='int', type=int, default=50, help="Limit the number of job listings to crawl. [Default: 50]")
-empgroup = parser.add_argument_group(title="Employee Harvesting")
-empgroup.add_argument('--empspy', metavar='file', const="wordlists/title-list-small.txt", nargs='?', help="Discover employees by title and/or department. Titles and departments are imported from a new line delimited file. [Default: title-list-small.txt]")
-empgroup.add_argument('--emailformat', metavar='string', help="Create email addresses for discovered employees using a known format. [Accepted Formats: first.last@xyz.com, last.first@xyz.com, firstl@xyz.com, lfirst@xyz.com, flast@xyz.com, lastf@xyz.com, first@xyz.com, last@xyz.com]")
+
+parser = argparse.ArgumentParser(description='InSpy - A LinkedIn enumeration tool by Jonathan Broche (@LeapSecurity)', version="3.0.0")
+parser.add_argument('company', help="Company name to use for tasks.")
+parser.add_argument('--domain', help="Company domain to use for searching.")
+parser.add_argument('--email', help="Email format to create email addresses with. [Accepted Formats: first.last@xyz.com, last.first@xyz.com, firstl@xyz.com, lfirst@xyz.com, flast@xyz.com, lastf@xyz.com, first@xyz.com, last@xyz.com]")
+parser.add_argument('--titles', metavar='file', default="wordlists/title-list-small.txt", nargs='?', help="Discover employees by title and/or department. Titles and departments are imported from a new line delimited file. [Default: title-list-small.txt]")
 outgroup = parser.add_argument_group(title="Output Options")
 outgroup.add_argument('--html', metavar='file', help="Print results in HTML file.")
 outgroup.add_argument('--csv', metavar='file', help="Print results in CSV format.")
 outgroup.add_argument('--json', metavar='file', help="Print results in JSON.")
+outgroup.add_argument('--xml', metavar='file', help="Print results in XML.")
 
 if len(sys.argv) == 1:
     parser.print_help()
     sys.exit(1)
 
 args = parser.parse_args()
 start_logger(args.company)
+hunterapi = "" #insert hunterio api key here
 
-print "\nInSpy {}\n".format(parser.version)
-
-if not args.techspy and not args.empspy: 
-    print "You didn't provide any work for me to do."
-    sys.exit(1)
-
-stime = time.time()
-tech_html, employee_html, tech_csv, employee_csv, tech_json, employee_json = [], [], [], [], [], []
-
-if args.techspy:
-    if os.path.exists(os.path.abspath(args.techspy)):
-        initial_crawl = crawl_jobs(args.company)
-        if initial_crawl:
-            soup = soupify(initial_crawl)
-            job_links = []
-            for link in get_job_links(soup, args.company):
-                if len(job_links) < args.limit:
-                    job_links.append(link)
-            if len(job_links) != args.limit:
-                page_links = get_page_links(soup)
-                for page in range(len(page_links)):
-                    if len(job_links) == args.limit: break
-                    urlcrawl = crawl_url(page_links[page])
-                    if urlcrawl:                    
-                        for link in get_job_links(soupify(urlcrawl), args.company):
-                            if len(job_links) < args.limit:
-                                job_links.append(link)
+print "\nInSpy {}".format(parser.version)
 
-            pstatus("{} Jobs identified".format(len(job_links)))
-            if job_links:
-                techs = {}            
-                for job in range(len(job_links)):
-                    jobresponse = crawl_url(job_links[job])
-                    if jobresponse:
-                        jobsoup = soupify(jobresponse)
-                        description = get_job_description(jobsoup)
-                        matches = identify_tech(description, os.path.abspath(args.techspy))
-                        if matches:
-                            title = get_job_title(jobsoup)
-                            techs[title] = {job_links[job]:matches}
+if args.domain and not args.email: #search hunterio for email format
+	domain = args.domain
+	email = get_email_format(args.domain, hunterapi).replace("{", "").replace("}","")
+elif args.email and not args.domain: #search clearbit for domain
+	email = args.email
+	domain = get_domain(args.company)
+else: #no domain or email provided - fully automate it
+	domain = get_domain(args.company)
+	if domain:
+		email = get_email_format(domain, hunterapi)
+		if email: email = email.replace("{", "").replace("}","")
 
-                tech_html, tech_csv, tech_json = craft_tech(techs)
-    else:
-        perror("No such file or directory: '{}'".format(args.techspy))
+if domain and email:
+	print "\nDomain: {}, Email Format: {}\n".format(domain, email)
+	employees = {}
 
-if args.empspy:
-    if os.path.exists(os.path.abspath(args.empspy)):
-        employees = {}
-        emails = []
-        for response in crawl_employees(args.company, os.path.abspath(args.empspy)):
-            for name, title in get_employees(soupify(response)).items():
-                if args.company.lower() in title.lower():
-                    if not name in employees:
-                        employees[name] = title
+	if os.path.exists(os.path.abspath(args.titles)):
+		for response in search_linkedin(args.company, os.path.abspath(args.titles)):
+			for name, title in get_employees(soupify(response)).items():
+				if args.company.lower() in title.lower():
+					if not name in employees:
+						employees[name] = title
+		print "\n{} Employees identified".format(len(employees.keys()))
+	else:
+		print os.path.abspath(args.titles)
+		print "No such file or directory: '{}'".format(args.titles)
 
-        pstatus("{} Employees identified".format(len(employees.keys())))
-        if employees:
-            if args.emailformat:
-                if args.emailformat[:args.emailformat.find('@')] in ['first.last', 'last.first', 'firstlast', 'lastfirst', 'first_last', 'last_first', 'first', 'last', 'firstl', 'lfirst', 'flast', 'lastf']:
-                    employee_html, employee_csv, employee_json = craft_employees(employees, args.emailformat)
-                else:
-                    pwarning("You didn't provide a valid e-mail format. See help (-h) for acceptable formats.")
-                    employee_html, employee_csv, employee_json = craft_employees(employees, None)
-            else:
-                employee_html, employee_csv, employee_json = craft_employees(employees, None)
-    else:
-        print os.path.abspath(args.empspy)
-        perror("No such file or directory: '{}'".format(args.empspy))
+	if employees:
+		#output employees
+		for name, title in employees.iteritems():
+			print "{} {}".format(name, title[:50].replace('&amp;', '&'))
+
+		#craft emails
+		emails = create_emails(employees, domain, email)
 
-#output
-if args.html:
-    if tech_html or employee_html:
-        if tech_html and employee_html:
-            craft_html(args.company, tech_html, employee_html, args.html)
-        elif tech_html and not employee_html:
-            craft_html(args.company, tech_html, None, args.html)
-        else:
-            craft_html(args.company, None, employee_html, args.html)
-if args.csv:
-    if tech_csv or employee_csv:
-        if tech_csv and employee_csv:
-            craft_csv(tech_csv, employee_csv, args.csv)
-        elif tech_csv and not employee_csv:
-            craft_csv(tech_csv, None, args.csv)
-        else:
-            craft_csv(None, employee_csv, args.csv)
-if args.json:
-    if tech_json or employee_json:
-        if tech_json and employee_json:
-            craft_json(tech_json, employee_json, args.json)
-        elif tech_json and not employee_json:
-            craft_json(tech_json, None, args.json)
-        else:
-            craft_json(None, employee_json, args.json)
+		if emails:
+			#output emails
+			print "\nEmails crafted\n".format(len(emails.keys()))
+			for name, email in emails.items():
+				print email
 
-print "Completed in {:.1f}s".format(time.time()-stime)
+	#export results
+	if args.html:
+		output("html", args.html, args.company, domain, employees, emails)
+	if args.xml:
+		output("xml", args.xml, args.company, domain, employees, emails)
+	if args.json:
+		output("json", args.json, args.company, domain, employees, emails)
+	if args.csv:
+		output("csv", args.csv, args.company, domain, employees, emails)
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 The MIT License (MIT)
 
-Copyright (c) 2016 Jonathan Broche
+Copyright (c) 2018 Leap Security
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/README.md b/README.md
@@ -3,50 +3,41 @@
 ## Introduction
 -----
 
-InSpy is a python based LinkedIn enumeration tool. Inspy has two functionalities: TechSpy and EmpSpy.
+InSpy is a python based LinkedIn enumeration tool.
 
-- TechSpy - Crawls LinkedIn job listings for technologies used by the provided company. InSpy attempts to identify technologies by matching job descriptions to keywords from a new line delimited file.
-
-- EmpSpy - Crawls LinkedIn for employees working at the provided company. InSpy searches for employees by title and/or departments from a new line delimited file. InSpy may also create emails for the identified employees if the user specifies an email format.
+Version 3.0 introduces the automation of domain and email retrieval in addition to randomized headers and xml output support.
 
 ## Installation
 -----
 
 Run `pip install -r requirements.txt` within the cloned InSpy directory.
 
+Obtain an API key from [HunterIO](https://hunter.io/) and insert it into the hunterio variable within InSpy.py (line 29).
+
 ## Help
 -----
 
 ```
-InSpy - A LinkedIn enumeration tool by Jonathan Broche (@jonathanbroche)
+InSpy - A LinkedIn enumeration tool by Jonathan Broche (@LeapSecurity)
 
 positional arguments:
-  company               Company name to use for tasks.
+  company          Company name to use for tasks.
 
 optional arguments:
-  -h, --help            show this help message and exit
-  -v, --version         show program's version number and exit
-
-Technology Search:
-  --techspy [file]      Crawl LinkedIn job listings for technologies used by
-                        the company. Technologies imported from a new line
-                        delimited file. [Default: tech-list-small.txt]
-  --limit int           Limit the number of job listings to crawl. [Default:
-                        50]
-
-Employee Harvesting:
-  --empspy [file]       Discover employees by title and/or department. Titles
-                        and departments are imported from a new line delimited
-                        file. [Default: title-list-small.txt]
-  --emailformat string  Create email addresses for discovered employees using
-                        a known format. [Accepted Formats: first.last@xyz.com,
-                        last.first@xyz.com, first_last@xyz.com, last_first@xyz.com, 
-                        firstl@xyz.com, lfirst@xyz.com,
-                        flast@xyz.com, lastf@xyz.com, first@xyz.com,
-                        last@xyz.com]
+  -h, --help       show this help message and exit
+  -v, --version    show program's version number and exit
+  --domain DOMAIN  Company domain to use for searching.
+  --email EMAIL    Email format to create email addresses with. [Accepted
+                   Formats: first.last@xyz.com, last.first@xyz.com,
+                   firstl@xyz.com, lfirst@xyz.com, flast@xyz.com,
+                   lastf@xyz.com, first@xyz.com, last@xyz.com]
+  --titles [file]  Discover employees by title and/or department. Titles and
+                   departments are imported from a new line delimited file.
+                   [Default: title-list-small.txt]
 
 Output Options:
-  --html file           Print results in HTML file.
-  --csv file            Print results in CSV format.
-  --json file           Print results in JSON.
+  --html file      Print results in HTML file.
+  --csv file       Print results in CSV format.
+  --json file      Print results in JSON.
+  --xml file       Print results in XML.
 ```
diff --git a/lib/__init__.pyc b/lib/__init__.pyc
diff --git a/lib/crawler.py b/lib/crawler.py
diff --git a/lib/crawler.pyc b/lib/crawler.pyc