From d886a9badd9d3ceccc806c5536e601b061f4ed78 Mon Sep 17 00:00:00 2001 From: kazet Date: Thu, 23 Nov 2023 14:40:48 +0100 Subject: [PATCH] DomainExpirationScanner: more logging, timeouts accounting for the fact that we wait for quota, minor message fixes, etc. (#604) --- artemis/config.py | 4 +- artemis/module_base.py | 31 +++++++++++++++- artemis/modules/domain_expiration_scanner.py | 37 +++++++++---------- ...ate_close_domain_expiration_scanner.jinja2 | 2 +- artemis/reporting/severity.py | 1 + artemis/utils.py | 23 +++++++++++- docker-compose.yaml | 2 +- docker/Dockerfile | 6 ++- docker/karton.ini | 5 +++ requirements.txt | 2 +- 10 files changed, 83 insertions(+), 30 deletions(-) diff --git a/artemis/config.py b/artemis/config.py index e800c3edf..bf34f5bfa 100644 --- a/artemis/config.py +++ b/artemis/config.py @@ -100,7 +100,7 @@ class Locking: Locks are not permanent, because a service that has acquired a lock may get restarted or killed. This is the lock default expiry time. """, - ] = get_config("DEFAULT_LOCK_EXPIRY_SECONDS", default=3600, cast=int) + ] = get_config("DEFAULT_LOCK_EXPIRY_SECONDS", default=48 * 60 * 60, cast=int) class PublicSuffixes: ALLOW_SCANNING_PUBLIC_SUFFIXES: Annotated[ @@ -473,7 +473,7 @@ class WordPressBruter: class DomainExpirationScanner: DOMAIN_EXPIRATION_TIMEFRAME_DAYS: Annotated[ int, "The scanner warns if the domain's expiration date falls within this time frame from now." - ] = get_config("DOMAIN_EXPIRATION_TIMEFRAME_DAYS", default=5, cast=int) + ] = get_config("DOMAIN_EXPIRATION_TIMEFRAME_DAYS", default=14, cast=int) @staticmethod def verify_each_variable_is_annotated() -> None: diff --git a/artemis/module_base.py b/artemis/module_base.py index 68e9e0cc7..9aa99d7bc 100644 --- a/artemis/module_base.py +++ b/artemis/module_base.py @@ -46,8 +46,15 @@ class ArtemisBase(Karton): # their IPs are already scanned, the actual batch size may be lower. task_max_batch_size = 1 + timeout_seconds = Config.Limits.TASK_TIMEOUT_SECONDS + lock_target = Config.Locking.LOCK_SCANNED_TARGETS + # Sometimes there are multiple modules that make use of a resource, e.g. whois database. + # This is the name of the resource - if a module locks it, no other module using this + # resource can use it. + resource_name_to_lock_before_scanning: Optional[str] = None + def __init__(self, db: Optional[DB] = None, *args, **kwargs) -> None: # type: ignore[no-untyped-def] super().__init__(*args, **kwargs) self.cache = RedisCache(REDIS, self.identity) @@ -234,6 +241,23 @@ def run_multiple(self, tasks: List[Task]) -> None: raise NotImplementedError() def lock_and_internal_process_multiple(self, tasks: List[Task]) -> None: + if self.resource_name_to_lock_before_scanning: + resource_lock = ResourceLock( + REDIS, + f"resource-lock-{self.resource_name_to_lock_before_scanning}", + max_tries=Config.Locking.SCAN_DESTINATION_LOCK_MAX_TRIES, + ) + try: + resource_lock.acquire() + self.log.info("Succeeded to lock resource %s", self.resource_name_to_lock_before_scanning) + except FailedToAcquireLockException: + self.log.info("Failed to lock resource %s", self.resource_name_to_lock_before_scanning) + for task in tasks: + self.reschedule_task(task) + return + else: + resource_lock = None + if self.lock_target: locks_acquired = [] tasks_to_reschedule = [] @@ -288,6 +312,9 @@ def lock_and_internal_process_multiple(self, tasks: List[Task]) -> None: self._log_tasks(tasks) self.internal_process_multiple(tasks) + if resource_lock: + resource_lock.release() + def internal_process_multiple(self, tasks: List[Task]) -> None: tasks_filtered = [] for task in tasks: @@ -350,10 +377,10 @@ def process_multiple(self, tasks: List[Task]) -> None: try: if self.batch_tasks: - timeout_decorator.timeout(Config.Limits.TASK_TIMEOUT_SECONDS)(lambda: self.run_multiple(tasks))() + timeout_decorator.timeout(self.timeout_seconds)(lambda: self.run_multiple(tasks))() else: (task,) = tasks - timeout_decorator.timeout(Config.Limits.TASK_TIMEOUT_SECONDS)(lambda: self.run(task))() + timeout_decorator.timeout(self.timeout_seconds)(lambda: self.run(task))() except Exception: for task in tasks: self.db.save_task_result(task=task, status=TaskStatus.ERROR, data=traceback.format_exc()) diff --git a/artemis/modules/domain_expiration_scanner.py b/artemis/modules/domain_expiration_scanner.py index 08363ab9c..ba50ebb6d 100644 --- a/artemis/modules/domain_expiration_scanner.py +++ b/artemis/modules/domain_expiration_scanner.py @@ -1,15 +1,14 @@ #!/usr/bin/env python3 import datetime -import time from typing import Any, Dict, Optional from karton.core import Task -from whois import Domain, WhoisQuotaExceeded, query # type: ignore from artemis.binds import TaskStatus, TaskType from artemis.config import Config from artemis.domains import is_main_domain from artemis.module_base import ArtemisBase +from artemis.utils import perform_whois_or_sleep class DomainExpirationScanner(ArtemisBase): @@ -19,6 +18,11 @@ class DomainExpirationScanner(ArtemisBase): identity = "domain_expiration_scanner" filters = [{"type": TaskType.DOMAIN.value}] + resource_name_to_lock_before_scanning = "whois" + + # As the logic sometimes requires waiting 24 hours for the quota to be renewed, let's + # set the timeout for 24 hours + 1 hour. + timeout_seconds = (24 + 1) * 3600 def run(self, current_task: Task) -> None: domain = current_task.get_payload(TaskType.DOMAIN) @@ -26,27 +30,20 @@ def run(self, current_task: Task) -> None: status = TaskStatus.OK status_reason = None if is_main_domain(domain): - try: - domain_data = self._query_whois(domain=domain) - except WhoisQuotaExceeded: - time.sleep(24 * 60 * 60) - domain_data = self._query_whois(domain=domain) + domain_data = perform_whois_or_sleep(domain=domain, logger=self.log) - expiry_date = domain_data.expiration_date - result = self._prepare_expiration_data(expiration_date=expiry_date, result=result) + if domain_data: + expiry_date = domain_data.expiration_date + result = self._prepare_expiration_data(expiration_date=expiry_date, result=result) - if "close_expiration_date" in result: - status = TaskStatus.INTERESTING - status_reason = self._prepare_expiration_status_reason( - days_to_expire=result["days_to_expire"], expiration_date=result["expiration_date"] - ) + if "close_expiration_date" in result: + status = TaskStatus.INTERESTING + status_reason = self._prepare_expiration_status_reason( + days_to_expire=result["days_to_expire"], expiration_date=result["expiration_date"] + ) self.db.save_task_result(task=current_task, status=status, status_reason=status_reason, data=result) - @staticmethod - def _query_whois(domain: str) -> Domain: - return query(domain) - @staticmethod def _prepare_expiration_data( expiration_date: Optional[datetime.datetime], result: Dict[str, Any] @@ -64,9 +61,9 @@ def _prepare_expiration_data( @staticmethod def _prepare_expiration_status_reason(days_to_expire: int, expiration_date: datetime.datetime) -> str: return ( - f"Scanned domain will expire in {days_to_expire} days - (on {expiration_date})." + f"Scanned domain will expire in {days_to_expire} days - on {expiration_date}." if days_to_expire != 1 - else f"Scanned domain will expire in {days_to_expire} day - (on {expiration_date})." + else f"Scanned domain will expire in {days_to_expire} day - on {expiration_date}." ) diff --git a/artemis/reporting/modules/domain_expiration_scanner/template_close_domain_expiration_scanner.jinja2 b/artemis/reporting/modules/domain_expiration_scanner/template_close_domain_expiration_scanner.jinja2 index ac5ed0a38..86a30d8aa 100644 --- a/artemis/reporting/modules/domain_expiration_scanner/template_close_domain_expiration_scanner.jinja2 +++ b/artemis/reporting/modules/domain_expiration_scanner/template_close_domain_expiration_scanner.jinja2 @@ -4,7 +4,7 @@ {% for report in data.reports %} {% if report.report_type == "close_domain_expiration_date" %}
  • -

    {{ report.target }} - {% trans %}will expire on{% endtrans %} {{ report.additional_data["expiration_date"] }}

    +

    {{ report.target }} - {% trans %}will expire on{% endtrans %} {{ report.additional_data["expiration_date"] }}.

  • {% endif %} {% endfor %} diff --git a/artemis/reporting/severity.py b/artemis/reporting/severity.py index 786f329a3..d3e4ceddb 100644 --- a/artemis/reporting/severity.py +++ b/artemis/reporting/severity.py @@ -15,6 +15,7 @@ class Severity(str, Enum): ReportType("subdomain_takeover_possible"): Severity.HIGH, ReportType("sql_injection"): Severity.HIGH, ReportType("closed_wordpress_plugin"): Severity.HIGH, + ReportType("close_domain_expiration_date"): Severity.HIGH, ReportType("exposed_database_with_easy_password"): Severity.HIGH, ReportType("exposed_version_control_folder"): Severity.HIGH, ReportType("exposed_version_control_folder_with_credentials"): Severity.HIGH, diff --git a/artemis/utils.py b/artemis/utils.py index 0f4e1d1f6..b641b6ab4 100644 --- a/artemis/utils.py +++ b/artemis/utils.py @@ -3,7 +3,10 @@ import time import urllib.parse from ipaddress import ip_address -from typing import Any, Callable, List +from typing import Any, Callable, List, Optional + +from whoisdomain import Domain, WhoisQuotaExceeded # type: ignore +from whoisdomain import query as whois_query from artemis.config import Config @@ -21,6 +24,24 @@ def check_output_log_on_error(command: List[str], logger: logging.Logger, **kwar raise +def perform_whois_or_sleep(domain: str, logger: logging.Logger) -> Optional[Domain]: + try: + domain_data = whois_query(domain=domain) + logger.info( + "Successful whois query for %s expiry=%s", domain, domain_data.expiration_date if domain_data else None + ) + except WhoisQuotaExceeded: + logger.info("Quota exceeded for whois query for %s, sleeping 24 hours", domain) + time.sleep(24 * 60 * 60) + domain_data = whois_query(domain=domain) + logger.info( + "Successful whois query for %s after retry expiry=%s", + domain, + domain_data.expiration_date if domain_data else None, + ) + return domain_data + + def build_logger(name: str) -> logging.Logger: logger = logging.getLogger(name) logger.setLevel(logging.INFO) diff --git a/docker-compose.yaml b/docker-compose.yaml index 9a6f41b43..d161b9861 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -345,7 +345,7 @@ services: depends_on: [ karton-logger ] env_file: .env restart: always - volumes: [ "./docker/karton.ini:/etc/karton/karton.ini" ] + volumes: ["./docker/karton.ini:/etc/karton/karton.ini", "${DOCKER_COMPOSE_ADDITIONAL_SHARED_DIRECTORY:-./shared}:/shared/"] volumes: data-mongodb: diff --git a/docker/Dockerfile b/docker/Dockerfile index 2d5a0e41d..b0b177879 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -3,8 +3,10 @@ FROM python:3.11-alpine3.18 COPY docker/wait-for-it.sh /wait-for-it.sh ARG ADDITIONAL_REQUIREMENTS -RUN apk add --no-cache --virtual .build-deps go gcc git libc-dev make libffi-dev libpcap-dev postgresql-dev whois && \ - apk add --no-cache bash libpcap libpq git subversion +RUN apk add --no-cache --virtual .build-deps go gcc git libc-dev make libffi-dev libpcap-dev postgresql-dev && \ + # Whois here is important - if we wouldn't install it, we would default to busybox whois, + # having different output making https://pypi.org/project/whoisdomain/ regexes fail. + apk add --no-cache bash libpcap libpq git subversion whois RUN GOBIN=/usr/local/bin/ go install github.com/projectdiscovery/naabu/v2/cmd/naabu@v2.1.6 && \ GOBIN=/usr/local/bin/ go install github.com/praetorian-inc/fingerprintx/cmd/fingerprintx@v1.1.9 && \ GOBIN=/usr/local/bin/ go install github.com/lc/gau/v2/cmd/gau@v2.1.2 diff --git a/docker/karton.ini b/docker/karton.ini index ddf43ace0..d1355aec5 100644 --- a/docker/karton.ini +++ b/docker/karton.ini @@ -1,3 +1,8 @@ +[system] +# 2 days +task_dispatched_timeout=172800 +task_started_timeout=172800 + [minio] address=minio:9000 bucket=artemis diff --git a/requirements.txt b/requirements.txt index 54ee0c472..0a309bbf3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -45,4 +45,4 @@ typing-extensions==4.8.0 urllib3==1.26.18 uvicorn==0.24.0.post1 validators==0.22.0 -whois==0.9.27 +whoisdomain==1.20231102.1