Skip to content

Commit

Permalink
InSpy 3.0 commit
Browse files Browse the repository at this point in the history
  • Loading branch information
jobroche committed Aug 14, 2018
1 parent 54a5398 commit 161ca95
Show file tree
Hide file tree
Showing 21 changed files with 332 additions and 508 deletions.
163 changes: 60 additions & 103 deletions InSpy.py
Original file line number Diff line number Diff line change
@@ -1,125 +1,82 @@
#!/usr/bin/env python2
# Copyright (c) 2016 Jonathan Broche (@g0jhonny)
# Copyright (c) 2018 Jonathan Broche (@LeapSecurity)

from lib.logger import *
from lib.soupify import *
import argparse, sys, os
from lib.http import *
from lib.workbench import *
from lib.crawler import *
import os, argparse, sys, time
from lib.soup import *
from lib.export import *
from lib.logger import *

parser = argparse.ArgumentParser(description='InSpy - A LinkedIn enumeration tool by Jonathan Broche (@g0jhonny)', version="2.0.2")
parser.add_argument('company', help="Company name to use for tasks.")
techgroup = parser.add_argument_group(title="Technology Search")
techgroup.add_argument('--techspy', metavar='file', const="wordlists/tech-list-small.txt", nargs='?', help="Crawl LinkedIn job listings for technologies used by the company. Technologies imported from a new line delimited file. [Default: tech-list-small.txt]")
techgroup.add_argument('--limit', metavar='int', type=int, default=50, help="Limit the number of job listings to crawl. [Default: 50]")
empgroup = parser.add_argument_group(title="Employee Harvesting")
empgroup.add_argument('--empspy', metavar='file', const="wordlists/title-list-small.txt", nargs='?', help="Discover employees by title and/or department. Titles and departments are imported from a new line delimited file. [Default: title-list-small.txt]")
empgroup.add_argument('--emailformat', metavar='string', help="Create email addresses for discovered employees using a known format. [Accepted Formats: first.last@xyz.com, last.first@xyz.com, firstl@xyz.com, lfirst@xyz.com, flast@xyz.com, lastf@xyz.com, first@xyz.com, last@xyz.com]")

parser = argparse.ArgumentParser(description='InSpy - A LinkedIn enumeration tool by Jonathan Broche (@LeapSecurity)', version="3.0.0")
parser.add_argument('company', help="Company name to use for tasks.")
parser.add_argument('--domain', help="Company domain to use for searching.")
parser.add_argument('--email', help="Email format to create email addresses with. [Accepted Formats: first.last@xyz.com, last.first@xyz.com, firstl@xyz.com, lfirst@xyz.com, flast@xyz.com, lastf@xyz.com, first@xyz.com, last@xyz.com]")
parser.add_argument('--titles', metavar='file', default="wordlists/title-list-small.txt", nargs='?', help="Discover employees by title and/or department. Titles and departments are imported from a new line delimited file. [Default: title-list-small.txt]")
outgroup = parser.add_argument_group(title="Output Options")
outgroup.add_argument('--html', metavar='file', help="Print results in HTML file.")
outgroup.add_argument('--csv', metavar='file', help="Print results in CSV format.")
outgroup.add_argument('--json', metavar='file', help="Print results in JSON.")
outgroup.add_argument('--xml', metavar='file', help="Print results in XML.")

if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)

args = parser.parse_args()
start_logger(args.company)
hunterapi = "" #insert hunterio api key here

print "\nInSpy {}\n".format(parser.version)

if not args.techspy and not args.empspy:
print "You didn't provide any work for me to do."
sys.exit(1)

stime = time.time()
tech_html, employee_html, tech_csv, employee_csv, tech_json, employee_json = [], [], [], [], [], []

if args.techspy:
if os.path.exists(os.path.abspath(args.techspy)):
initial_crawl = crawl_jobs(args.company)
if initial_crawl:
soup = soupify(initial_crawl)
job_links = []
for link in get_job_links(soup, args.company):
if len(job_links) < args.limit:
job_links.append(link)
if len(job_links) != args.limit:
page_links = get_page_links(soup)
for page in range(len(page_links)):
if len(job_links) == args.limit: break
urlcrawl = crawl_url(page_links[page])
if urlcrawl:
for link in get_job_links(soupify(urlcrawl), args.company):
if len(job_links) < args.limit:
job_links.append(link)
print "\nInSpy {}".format(parser.version)

pstatus("{} Jobs identified".format(len(job_links)))
if job_links:
techs = {}
for job in range(len(job_links)):
jobresponse = crawl_url(job_links[job])
if jobresponse:
jobsoup = soupify(jobresponse)
description = get_job_description(jobsoup)
matches = identify_tech(description, os.path.abspath(args.techspy))
if matches:
title = get_job_title(jobsoup)
techs[title] = {job_links[job]:matches}
if args.domain and not args.email: #search hunterio for email format
domain = args.domain
email = get_email_format(args.domain, hunterapi).replace("{", "").replace("}","")
elif args.email and not args.domain: #search clearbit for domain
email = args.email
domain = get_domain(args.company)
else: #no domain or email provided - fully automate it
domain = get_domain(args.company)
if domain:
email = get_email_format(domain, hunterapi)
if email: email = email.replace("{", "").replace("}","")

tech_html, tech_csv, tech_json = craft_tech(techs)
else:
perror("No such file or directory: '{}'".format(args.techspy))
if domain and email:
print "\nDomain: {}, Email Format: {}\n".format(domain, email)
employees = {}

if args.empspy:
if os.path.exists(os.path.abspath(args.empspy)):
employees = {}
emails = []
for response in crawl_employees(args.company, os.path.abspath(args.empspy)):
for name, title in get_employees(soupify(response)).items():
if args.company.lower() in title.lower():
if not name in employees:
employees[name] = title
if os.path.exists(os.path.abspath(args.titles)):
for response in search_linkedin(args.company, os.path.abspath(args.titles)):
for name, title in get_employees(soupify(response)).items():
if args.company.lower() in title.lower():
if not name in employees:
employees[name] = title
print "\n{} Employees identified".format(len(employees.keys()))
else:
print os.path.abspath(args.titles)
print "No such file or directory: '{}'".format(args.titles)

pstatus("{} Employees identified".format(len(employees.keys())))
if employees:
if args.emailformat:
if args.emailformat[:args.emailformat.find('@')] in ['first.last', 'last.first', 'firstlast', 'lastfirst', 'first_last', 'last_first', 'first', 'last', 'firstl', 'lfirst', 'flast', 'lastf']:
employee_html, employee_csv, employee_json = craft_employees(employees, args.emailformat)
else:
pwarning("You didn't provide a valid e-mail format. See help (-h) for acceptable formats.")
employee_html, employee_csv, employee_json = craft_employees(employees, None)
else:
employee_html, employee_csv, employee_json = craft_employees(employees, None)
else:
print os.path.abspath(args.empspy)
perror("No such file or directory: '{}'".format(args.empspy))
if employees:
#output employees
for name, title in employees.iteritems():
print "{} {}".format(name, title[:50].replace('&amp;', '&'))

#craft emails
emails = create_emails(employees, domain, email)

#output
if args.html:
if tech_html or employee_html:
if tech_html and employee_html:
craft_html(args.company, tech_html, employee_html, args.html)
elif tech_html and not employee_html:
craft_html(args.company, tech_html, None, args.html)
else:
craft_html(args.company, None, employee_html, args.html)
if args.csv:
if tech_csv or employee_csv:
if tech_csv and employee_csv:
craft_csv(tech_csv, employee_csv, args.csv)
elif tech_csv and not employee_csv:
craft_csv(tech_csv, None, args.csv)
else:
craft_csv(None, employee_csv, args.csv)
if args.json:
if tech_json or employee_json:
if tech_json and employee_json:
craft_json(tech_json, employee_json, args.json)
elif tech_json and not employee_json:
craft_json(tech_json, None, args.json)
else:
craft_json(None, employee_json, args.json)
if emails:
#output emails
print "\nEmails crafted\n".format(len(emails.keys()))
for name, email in emails.items():
print email

print "Completed in {:.1f}s".format(time.time()-stime)
#export results
if args.html:
output("html", args.html, args.company, domain, employees, emails)
if args.xml:
output("xml", args.xml, args.company, domain, employees, emails)
if args.json:
output("json", args.json, args.company, domain, employees, emails)
if args.csv:
output("csv", args.csv, args.company, domain, employees, emails)
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
The MIT License (MIT)

Copyright (c) 2016 Jonathan Broche
Copyright (c) 2018 Leap Security

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
49 changes: 20 additions & 29 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,50 +3,41 @@
## Introduction
-----

InSpy is a python based LinkedIn enumeration tool. Inspy has two functionalities: TechSpy and EmpSpy.
InSpy is a python based LinkedIn enumeration tool.

- TechSpy - Crawls LinkedIn job listings for technologies used by the provided company. InSpy attempts to identify technologies by matching job descriptions to keywords from a new line delimited file.

- EmpSpy - Crawls LinkedIn for employees working at the provided company. InSpy searches for employees by title and/or departments from a new line delimited file. InSpy may also create emails for the identified employees if the user specifies an email format.
Version 3.0 introduces the automation of domain and email retrieval in addition to randomized headers and xml output support.

## Installation
-----

Run `pip install -r requirements.txt` within the cloned InSpy directory.

Obtain an API key from [HunterIO](https://hunter.io/) and insert it into the hunterio variable within InSpy.py (line 29).

## Help
-----

```
InSpy - A LinkedIn enumeration tool by Jonathan Broche (@jonathanbroche)
InSpy - A LinkedIn enumeration tool by Jonathan Broche (@LeapSecurity)
positional arguments:
company Company name to use for tasks.
company Company name to use for tasks.
optional arguments:
-h, --help show this help message and exit
-v, --version show program's version number and exit
Technology Search:
--techspy [file] Crawl LinkedIn job listings for technologies used by
the company. Technologies imported from a new line
delimited file. [Default: tech-list-small.txt]
--limit int Limit the number of job listings to crawl. [Default:
50]
Employee Harvesting:
--empspy [file] Discover employees by title and/or department. Titles
and departments are imported from a new line delimited
file. [Default: title-list-small.txt]
--emailformat string Create email addresses for discovered employees using
a known format. [Accepted Formats: first.last@xyz.com,
last.first@xyz.com, first_last@xyz.com, last_first@xyz.com,
firstl@xyz.com, lfirst@xyz.com,
flast@xyz.com, lastf@xyz.com, first@xyz.com,
last@xyz.com]
-h, --help show this help message and exit
-v, --version show program's version number and exit
--domain DOMAIN Company domain to use for searching.
--email EMAIL Email format to create email addresses with. [Accepted
Formats: first.last@xyz.com, last.first@xyz.com,
firstl@xyz.com, lfirst@xyz.com, flast@xyz.com,
lastf@xyz.com, first@xyz.com, last@xyz.com]
--titles [file] Discover employees by title and/or department. Titles and
departments are imported from a new line delimited file.
[Default: title-list-small.txt]
Output Options:
--html file Print results in HTML file.
--csv file Print results in CSV format.
--json file Print results in JSON.
--html file Print results in HTML file.
--csv file Print results in CSV format.
--json file Print results in JSON.
--xml file Print results in XML.
```
Binary file modified lib/__init__.pyc
Binary file not shown.
51 changes: 0 additions & 51 deletions lib/crawler.py

This file was deleted.

Binary file removed lib/crawler.pyc
Binary file not shown.
Loading

0 comments on commit 161ca95

Please sign in to comment.