Skip to content

Commit

Permalink
Merge pull request #1 from dmdhrumilmistry/implement-mass-check
Browse files Browse the repository at this point in the history
implement async email checker
  • Loading branch information
dmdhrumilmistry committed Nov 20, 2023
2 parents 278ed36 + 49ac0c8 commit 1f62546
Show file tree
Hide file tree
Showing 6 changed files with 148 additions and 13 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,7 @@ poetry.toml

# LSP config files
pyrightconfig.json

# breach-check data
*.txt
output_*.json
33 changes: 33 additions & 0 deletions breach_check/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from argparse import ArgumentParser
from breach_check.breach import BreachChecker
from breach_check.logger import console
from breach_check.utils import generate_unique_filename, extract_emails, write_json_file
from asyncio import run
from sys import exit


def main():
parser = ArgumentParser('breach-check')
parser.add_argument('-i', '--input', dest='input_file',
help='input file containing emails on each line', type=str, required=True)
parser.add_argument('-o', '--output', dest='output_file',
help='output json file path', required=False, default=generate_unique_filename(), type=str)

args = parser.parse_args()

emails = extract_emails(args.input_file)

if not emails:
exit(-1)

results = run(
BreachChecker().mass_check(emails=emails)
)

if not write_json_file(args.output_file, results):
console.print('Results:')
console.print(results)


if __name__ == '__main__':
main()
66 changes: 55 additions & 11 deletions breach_check/breach.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from aiohttp.client_exceptions import ClientProxyConnectionError
from asyncio import run
from asyncio import run, ensure_future, gather
from breach_check.http import AsyncRequests
from breach_check.logger import logger
from breach_check.logger import logger, console
from json import loads as json_loads
from rich.progress import Progress, TaskID
from re import compile
Expand All @@ -14,6 +14,9 @@ def __init__(self, rate_limit: int | None = None, delay: float | None = None, he
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
}

self.progress = Progress(console=console)
self.progress_task_id: TaskID | None = None

self._http_client = AsyncRequests(
rate_limit=rate_limit,
delay=delay,
Expand All @@ -28,12 +31,42 @@ async def mass_check(self, emails: list[str] | None = None):
if not emails or len(emails) == 0:
return []

async def check(self, email: str | None = None) -> list[dict]:
self.progress.start()
self.progress_task_id = self.progress.add_task(
'[orange] Checking for Breaches:',
total=len(emails)
)

tasks = []
for email in emails:
tasks.append(
ensure_future(
self.check(email)
)
)

try:
results = await gather(*tasks)

self.progress.stop()
return results
except Exception as e:
logger.error(
f'[*] Exception occurred while gathering results: {e}',
stack_info=True
)
return []

async def check(self, email: str | None = None) -> dict:
if not email:
logger.warning('email param cannot be None')
return []
return {}

res_data = []
res_data = {
'email': email,
'breaches': [],
'total': None
}
try:
email_validator = compile(r"^[^@\s']+@[^@\s']+\.[^@\s']+$")
if email_validator.match(email):
Expand All @@ -51,22 +84,33 @@ async def check(self, email: str | None = None) -> list[dict]:
is_success = res_body.get('success', False)

if status_code == 200 and is_success:
res_data = res_body.get('breaches')
breaches = res_body.get('breaches', [])
total = res_body.get('total', -1)
res_data['breaches'] = breaches
res_data['total'] = total

elif status_code == 429:
logger.warning('Rate Limited')

else:
logger.error(f'Failed with status code: {status_code}')
logger.error(response, res_body)

else:
logger.warning(f'{email} is not a valid email')

# advance progress bar
if self.progress_task_id != None:
self.progress.update(self.progress_task_id,
advance=1, refresh=True)
else:
logger.error('No Progress Bar Task Found!')

if self.progress and self.progress.finished:
self.progress.stop()

return res_data
except ConnectionRefusedError:
logger.error('Connection Failed! Server refused Connection!!')
except ClientProxyConnectionError as e:
logger.error(f'Proxy Connection Error: {e}')


if __name__ == '__main__':
result = run(BreachChecker().check(email='admin@example.com'))
logger.info(result)
3 changes: 2 additions & 1 deletion breach_check/http.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from aiohttp import ClientSession, ClientResponse, TCPConnector
from aiohttp import ClientSession, TCPConnector
from os import name as os_name


import asyncio
import aiohttp.resolver


aiohttp.resolver.DefaultResolver = aiohttp.resolver.AsyncResolver
if os_name == 'nt':
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
Expand Down
43 changes: 43 additions & 0 deletions breach_check/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from breach_check.logger import logger
from datetime import datetime
from json import dumps as json_dumps, JSONDecodeError
from os.path import isfile


def generate_unique_filename():
current_time = datetime.now()
timestamp = current_time.strftime(
"%Y%m%d%H%M%S") # YearMonthDayHourMinuteSecond
unique_filename = f"output_{timestamp}.json"
return unique_filename


def extract_emails(file_path: str) -> list[str] | None:
if not isfile(file_path):
logger.error(f'Input File with Emails Not Found: {file_path}')
return

with open(file_path, 'r') as f:
emails = [email.strip() for email in f.read().splitlines()]

return emails


def write_json_file(file_path: str, json_data) -> bool:
if isfile(file_path):
logger.warning(f'{file_path} data will be overwritten')

try:
json_data = json_dumps(json_data)
except JSONDecodeError:
logger.error('Invalid JSON Data')
return False
except Exception as e:
logger.error(f'Exception: {e}')
return False

with open(file_path, 'w') as f:
f.write(json_data)
logger.info(f'data written to {file_path} successfully')

return True
12 changes: 11 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[tool.poetry]
name = "breach-check"
version = "0.1.0"
description = "check for dark web breaches for provided emails"
description = "check for data breaches for provided emails"
authors = ["Dhrumil Mistry <56185972+dmdhrumilmistry@users.noreply.github.com>"]
license = "MIT"
readme = "README.md"
Expand All @@ -14,6 +14,16 @@ rich = "^13.7.0"
[tool.poetry.group.dev.dependencies]
pytest = "^7.4.3"

[tool.poetry.urls]
"Home" = "https://github.com/dmdhrumilmistry/breach-check"
"Bug Tracker" = "https://github.com/dmdhrumilmistry/breach-check/issues"
"Support" = "https://github.com/sponsors/dmdhrumilmistry/"
"PayPal" = "https://paypal.me/dmdhrumilmistry"

[tool.poetry.scripts]
breach-check = "breach_check.__main__:main"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

0 comments on commit 1f62546

Please sign in to comment.