diff --git a/artemis/config.py b/artemis/config.py index e800c3edf..bf34f5bfa 100644 --- a/artemis/config.py +++ b/artemis/config.py @@ -100,7 +100,7 @@ class Locking: Locks are not permanent, because a service that has acquired a lock may get restarted or killed. This is the lock default expiry time. """, - ] = get_config("DEFAULT_LOCK_EXPIRY_SECONDS", default=3600, cast=int) + ] = get_config("DEFAULT_LOCK_EXPIRY_SECONDS", default=48 * 60 * 60, cast=int) class PublicSuffixes: ALLOW_SCANNING_PUBLIC_SUFFIXES: Annotated[ @@ -473,7 +473,7 @@ class WordPressBruter: class DomainExpirationScanner: DOMAIN_EXPIRATION_TIMEFRAME_DAYS: Annotated[ int, "The scanner warns if the domain's expiration date falls within this time frame from now." - ] = get_config("DOMAIN_EXPIRATION_TIMEFRAME_DAYS", default=5, cast=int) + ] = get_config("DOMAIN_EXPIRATION_TIMEFRAME_DAYS", default=14, cast=int) @staticmethod def verify_each_variable_is_annotated() -> None: diff --git a/artemis/module_base.py b/artemis/module_base.py index 68e9e0cc7..9aa99d7bc 100644 --- a/artemis/module_base.py +++ b/artemis/module_base.py @@ -46,8 +46,15 @@ class ArtemisBase(Karton): # their IPs are already scanned, the actual batch size may be lower. task_max_batch_size = 1 + timeout_seconds = Config.Limits.TASK_TIMEOUT_SECONDS + lock_target = Config.Locking.LOCK_SCANNED_TARGETS + # Sometimes there are multiple modules that make use of a resource, e.g. whois database. + # This is the name of the resource - if a module locks it, no other module using this + # resource can use it. + resource_name_to_lock_before_scanning: Optional[str] = None + def __init__(self, db: Optional[DB] = None, *args, **kwargs) -> None: # type: ignore[no-untyped-def] super().__init__(*args, **kwargs) self.cache = RedisCache(REDIS, self.identity) @@ -234,6 +241,23 @@ def run_multiple(self, tasks: List[Task]) -> None: raise NotImplementedError() def lock_and_internal_process_multiple(self, tasks: List[Task]) -> None: + if self.resource_name_to_lock_before_scanning: + resource_lock = ResourceLock( + REDIS, + f"resource-lock-{self.resource_name_to_lock_before_scanning}", + max_tries=Config.Locking.SCAN_DESTINATION_LOCK_MAX_TRIES, + ) + try: + resource_lock.acquire() + self.log.info("Succeeded to lock resource %s", self.resource_name_to_lock_before_scanning) + except FailedToAcquireLockException: + self.log.info("Failed to lock resource %s", self.resource_name_to_lock_before_scanning) + for task in tasks: + self.reschedule_task(task) + return + else: + resource_lock = None + if self.lock_target: locks_acquired = [] tasks_to_reschedule = [] @@ -288,6 +312,9 @@ def lock_and_internal_process_multiple(self, tasks: List[Task]) -> None: self._log_tasks(tasks) self.internal_process_multiple(tasks) + if resource_lock: + resource_lock.release() + def internal_process_multiple(self, tasks: List[Task]) -> None: tasks_filtered = [] for task in tasks: @@ -350,10 +377,10 @@ def process_multiple(self, tasks: List[Task]) -> None: try: if self.batch_tasks: - timeout_decorator.timeout(Config.Limits.TASK_TIMEOUT_SECONDS)(lambda: self.run_multiple(tasks))() + timeout_decorator.timeout(self.timeout_seconds)(lambda: self.run_multiple(tasks))() else: (task,) = tasks - timeout_decorator.timeout(Config.Limits.TASK_TIMEOUT_SECONDS)(lambda: self.run(task))() + timeout_decorator.timeout(self.timeout_seconds)(lambda: self.run(task))() except Exception: for task in tasks: self.db.save_task_result(task=task, status=TaskStatus.ERROR, data=traceback.format_exc()) diff --git a/artemis/modules/domain_expiration_scanner.py b/artemis/modules/domain_expiration_scanner.py index 08363ab9c..ba50ebb6d 100644 --- a/artemis/modules/domain_expiration_scanner.py +++ b/artemis/modules/domain_expiration_scanner.py @@ -1,15 +1,14 @@ #!/usr/bin/env python3 import datetime -import time from typing import Any, Dict, Optional from karton.core import Task -from whois import Domain, WhoisQuotaExceeded, query # type: ignore from artemis.binds import TaskStatus, TaskType from artemis.config import Config from artemis.domains import is_main_domain from artemis.module_base import ArtemisBase +from artemis.utils import perform_whois_or_sleep class DomainExpirationScanner(ArtemisBase): @@ -19,6 +18,11 @@ class DomainExpirationScanner(ArtemisBase): identity = "domain_expiration_scanner" filters = [{"type": TaskType.DOMAIN.value}] + resource_name_to_lock_before_scanning = "whois" + + # As the logic sometimes requires waiting 24 hours for the quota to be renewed, let's + # set the timeout for 24 hours + 1 hour. + timeout_seconds = (24 + 1) * 3600 def run(self, current_task: Task) -> None: domain = current_task.get_payload(TaskType.DOMAIN) @@ -26,27 +30,20 @@ def run(self, current_task: Task) -> None: status = TaskStatus.OK status_reason = None if is_main_domain(domain): - try: - domain_data = self._query_whois(domain=domain) - except WhoisQuotaExceeded: - time.sleep(24 * 60 * 60) - domain_data = self._query_whois(domain=domain) + domain_data = perform_whois_or_sleep(domain=domain, logger=self.log) - expiry_date = domain_data.expiration_date - result = self._prepare_expiration_data(expiration_date=expiry_date, result=result) + if domain_data: + expiry_date = domain_data.expiration_date + result = self._prepare_expiration_data(expiration_date=expiry_date, result=result) - if "close_expiration_date" in result: - status = TaskStatus.INTERESTING - status_reason = self._prepare_expiration_status_reason( - days_to_expire=result["days_to_expire"], expiration_date=result["expiration_date"] - ) + if "close_expiration_date" in result: + status = TaskStatus.INTERESTING + status_reason = self._prepare_expiration_status_reason( + days_to_expire=result["days_to_expire"], expiration_date=result["expiration_date"] + ) self.db.save_task_result(task=current_task, status=status, status_reason=status_reason, data=result) - @staticmethod - def _query_whois(domain: str) -> Domain: - return query(domain) - @staticmethod def _prepare_expiration_data( expiration_date: Optional[datetime.datetime], result: Dict[str, Any] @@ -64,9 +61,9 @@ def _prepare_expiration_data( @staticmethod def _prepare_expiration_status_reason(days_to_expire: int, expiration_date: datetime.datetime) -> str: return ( - f"Scanned domain will expire in {days_to_expire} days - (on {expiration_date})." + f"Scanned domain will expire in {days_to_expire} days - on {expiration_date}." if days_to_expire != 1 - else f"Scanned domain will expire in {days_to_expire} day - (on {expiration_date})." + else f"Scanned domain will expire in {days_to_expire} day - on {expiration_date}." ) diff --git a/artemis/reporting/modules/domain_expiration_scanner/template_close_domain_expiration_scanner.jinja2 b/artemis/reporting/modules/domain_expiration_scanner/template_close_domain_expiration_scanner.jinja2 index ac5ed0a38..86a30d8aa 100644 --- a/artemis/reporting/modules/domain_expiration_scanner/template_close_domain_expiration_scanner.jinja2 +++ b/artemis/reporting/modules/domain_expiration_scanner/template_close_domain_expiration_scanner.jinja2 @@ -4,7 +4,7 @@ {% for report in data.reports %} {% if report.report_type == "close_domain_expiration_date" %}
{{ report.target }} - {% trans %}will expire on{% endtrans %} {{ report.additional_data["expiration_date"] }}
+{{ report.target }} - {% trans %}will expire on{% endtrans %} {{ report.additional_data["expiration_date"] }}.