From 08ee90f22518be5701a4c9cf88c9d2e89c920225 Mon Sep 17 00:00:00 2001 From: kazet Date: Tue, 26 Mar 2024 13:47:31 +0100 Subject: [PATCH] Cleaning Artemis API (#860) --- .github/workflows/test.yml | 2 +- artemis/api.py | 92 +++++++++------- artemis/config.py | 4 + artemis/db.py | 32 ++++-- artemis/db_migration.py | 3 + artemis/frontend.py | 19 +++- artemis/main.py | 9 +- artemis/reporting/export/common.py | 2 +- artemis/reporting/export/db.py | 17 +-- artemis/reporting/export/export_data.py | 4 +- artemis/reporting/export/hook.py | 2 +- artemis/reporting/export/hooks.py | 4 +- artemis/reporting/export/main.py | 60 +++++++---- artemis/reporting/export/stats.py | 12 ++- artemis/templating.py | 2 +- artemis/utils.py | 17 ++- docker-compose.test-e2e.yaml | 33 +++++- docker-compose.test.yaml | 14 ++- docker/karton-test.ini | 12 +++ docs/generating-reports.rst | 8 +- docs/user-guide/troubleshooting.rst | 1 + env.test | 3 + scripts/{export_emails => export_reports} | 2 +- scripts/test | 2 +- .../components/generating_reports_hint.jinja2 | 5 + templates/components/navbar.jinja2 | 3 + templates/index.jinja2 | 1 + templates/no_api_token.jinja2 | 8 ++ templates/task_list.jinja2 | 6 +- test/e2e/base.py | 18 ++++ test/e2e/test_automated_interaction.py | 100 ++++++++++++++++++ 31 files changed, 381 insertions(+), 116 deletions(-) create mode 100644 docker/karton-test.ini create mode 100644 env.test rename scripts/{export_emails => export_reports} (96%) create mode 100644 templates/components/generating_reports_hint.jinja2 create mode 100644 templates/no_api_token.jinja2 create mode 100644 test/e2e/test_automated_interaction.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8eeb4cdeb..cc799ae86 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -6,7 +6,7 @@ on: jobs: test: runs-on: ubuntu-latest - timeout-minutes: 30 + timeout-minutes: 40 steps: - name: Check out repository uses: actions/checkout@v2 diff --git a/artemis/api.py b/artemis/api.py index f1d8cd25d..9dd8c9fdc 100644 --- a/artemis/api.py +++ b/artemis/api.py @@ -1,31 +1,55 @@ -from typing import Any, Dict, List, Optional +from typing import Annotated, Any, Dict, List, Optional -from fastapi import APIRouter, HTTPException, Query, Request +from fastapi import APIRouter, Body, Depends, Header, HTTPException, Query, Request from karton.core.backend import KartonBackend from karton.core.config import Config as KartonConfig from karton.core.inspect import KartonState +from artemis.config import Config from artemis.db import DB, ColumnOrdering, TaskFilter +from artemis.modules.classifier import Classifier +from artemis.producer import create_tasks from artemis.templating import render_analyses_table_row, render_task_table_row router = APIRouter() db = DB() -@router.get("/task/{task_id}") -def get_task(task_id: str) -> Dict[str, Any]: - if result := db.get_task_by_id(task_id): - return result - raise HTTPException(status_code=404, detail="Task not found") +def verify_api_token(x_api_token: Annotated[str, Header()]) -> None: + if not Config.Miscellaneous.API_TOKEN: + raise HTTPException( + status_code=401, + detail="Please fill the API_TOKEN variable in .env in order to use the API", + ) + elif x_api_token != Config.Miscellaneous.API_TOKEN: + raise HTTPException(status_code=401, detail="Invalid API token") + + +@router.post("/add", dependencies=[Depends(verify_api_token)]) +def add( + targets: List[str], + tag: Annotated[Optional[str], Body()] = None, + disabled_modules: List[str] = Config.Miscellaneous.MODULES_DISABLED_BY_DEFAULT, +) -> Dict[str, Any]: + """Add targets to be scanned.""" + for task in targets: + if not Classifier.is_supported(task): + return {"error": f"Invalid task: {task}"} + + create_tasks(targets, tag, disabled_modules=disabled_modules) + return {"ok": True} -@router.get("/analysis") + +@router.get("/analyses", dependencies=[Depends(verify_api_token)]) def list_analysis() -> List[Dict[str, Any]]: + """Returns the list of analysed targets. Any scanned target would be listed here.""" return db.list_analysis() -@router.get("/num-queued-tasks") -def num_queued_tasks(karton_names: Optional[List[str]] = Query(default=None)) -> int: +@router.get("/num-queued-tasks", dependencies=[Depends(verify_api_token)]) +def num_queued_tasks(karton_names: Optional[List[str]] = None) -> int: + """Return the number of queued tasks for all or only some kartons.""" # We check the backend redis queue length directly to avoid the long runtimes of # KartonState.get_all_tasks() backend = KartonBackend(config=KartonConfig()) @@ -39,14 +63,25 @@ def num_queued_tasks(karton_names: Optional[List[str]] = Query(default=None)) -> return sum([backend.redis.llen(key) for key in backend.redis.keys("karton.queue.*")]) -@router.get("/analysis/{root_id}") -def get_analysis(root_id: str) -> Dict[str, Any]: - if result := db.get_analysis_by_id(root_id): - return result - raise HTTPException(status_code=404, detail="Analysis not found") - - -@router.get("/analyses-table") +@router.get("/task-results", dependencies=[Depends(verify_api_token)]) +def get_task_results( + only_interesting: bool = False, + page: int = 1, + page_size: int = 100, + analysis_id: Optional[str] = None, + search: Optional[str] = None, +) -> List[Dict[str, Any]]: + return db.get_paginated_task_results( + start=(page - 1) * page_size, + length=page_size, + ordering=[ColumnOrdering(column_name="created_at", ascending=True)], + search_query=search, + analysis_id=analysis_id, + task_filter=TaskFilter.INTERESTING if only_interesting else None, + ).data + + +@router.get("/analyses-table", include_in_schema=False) def get_analyses_table( request: Request, draw: int = Query(), @@ -71,8 +106,7 @@ def get_analyses_table( { "id": entry["id"], "tag": entry["tag"], - "payload": entry["task"]["payload"], - "payload_persistent": entry["task"]["payload_persistent"], + "target": entry["target"], "num_active_tasks": num_active_tasks, "stopped": entry.get("stopped", None), } @@ -86,7 +120,7 @@ def get_analyses_table( } -@router.get("/task-results-table") +@router.get("/task-results-table", include_in_schema=False) def get_task_results_table( request: Request, analysis_id: Optional[str] = Query(default=None), @@ -101,19 +135,6 @@ def get_task_results_table( ) search_query = _get_search_query(request) - fields = [ - "created_at", - "target_string", - "headers", - "payload_persistent", - "status", - "status_reason", - "priority", - "uid", - "decision_type", - "operator_comment", - ] - if analysis_id: if not db.get_analysis_by_id(analysis_id): raise HTTPException(status_code=404, detail="Analysis not found") @@ -121,14 +142,13 @@ def get_task_results_table( start, length, ordering, - fields=fields, search_query=search_query, analysis_id=analysis_id, task_filter=task_filter, ) else: result = db.get_paginated_task_results( - start, length, ordering, fields=fields, search_query=search_query, task_filter=task_filter + start, length, ordering, search_query=search_query, task_filter=task_filter ) return { diff --git a/artemis/config.py b/artemis/config.py index 31e9097e2..d7e9f4272 100644 --- a/artemis/config.py +++ b/artemis/config.py @@ -154,6 +154,10 @@ class Limits: ] = get_config("REQUESTS_PER_SECOND", default=0, cast=float) class Miscellaneous: + API_TOKEN: Annotated[str, "The token to authenticate to the API. Provide one to use the API."] = get_config( + "API_TOKEN", default=None + ) + BLOCKLIST_FILE: Annotated[ str, "A file that determines what should not be scanned or reported", diff --git a/artemis/db.py b/artemis/db.py index da3faf9ce..54519f514 100644 --- a/artemis/db.py +++ b/artemis/db.py @@ -75,7 +75,6 @@ class Analysis(Base): # type: ignore target = Column(String, index=True) tag = Column(String, index=True) stopped = Column(Boolean, index=True) - task = Column(JSON) fulltext = Column( TSVector(), @@ -153,7 +152,7 @@ def __init__(self) -> None: def list_analysis(self) -> List[Dict[str, Any]]: with self.session() as session: - return [item.__dict__ for item in session.query(Analysis).all()] + return [self._strip_internal_db_info(item.__dict__) for item in session.query(Analysis).all()] def mark_analysis_as_stopped(self, analysis_id: str) -> None: with self.session() as session: @@ -164,16 +163,12 @@ def mark_analysis_as_stopped(self, analysis_id: str) -> None: def create_analysis(self, analysis: Task) -> None: analysis_dict = self.task_to_dict(analysis) - del analysis_dict["status"] - if "status_reason" in analysis_dict: - del analysis_dict["status_reason"] analysis = Analysis( id=analysis.uid, target=analysis_dict["payload"]["data"], tag=analysis_dict["payload_persistent"].get("tag", None), stopped=False, - task=analysis_dict, ) with self.session() as session: session.add(analysis) @@ -194,6 +189,9 @@ def save_task_result( # Used to allow searching in the names and values of all existing headers headers_string=" ".join([key + " " + value for key, value in task.headers.items()]), ) + + del to_save["task"]["status"] # at the moment of saving it's "started", which will be misleading + if isinstance(data, BaseModel): to_save["result"] = data.dict() elif isinstance(data, Exception): @@ -215,7 +213,7 @@ def get_analysis_by_id(self, analysis_id: str) -> Optional[Dict[str, Any]]: item = session.query(Analysis).get(analysis_id) if item: - return item.__dict__ # type: ignore + return self._strip_internal_db_info(item.__dict__) else: return None except NoResultFound: @@ -247,7 +245,10 @@ def get_paginated_analyses( query = query.filter(Analysis.fulltext.match(self._to_postgresql_query(search_query))) # type: ignore records_count_filtered: int = query.count() - results_page = [item.__dict__ for item in query.order_by(*ordering_postgresql).slice(start, start + length)] + results_page = [ + self._strip_internal_db_info(item.__dict__) + for item in query.order_by(*ordering_postgresql).slice(start, start + length) + ] return PaginatedResults( records_count_total=records_count_total, records_count_filtered=records_count_filtered, @@ -259,7 +260,6 @@ def get_paginated_task_results( start: int, length: int, ordering: List[ColumnOrdering], - fields: List[str], *, search_query: Optional[str] = None, analysis_id: Optional[str] = None, @@ -290,7 +290,10 @@ def get_paginated_task_results( query = query.filter(getattr(TaskResult, key) == value) records_count_filtered = query.count() - results_page = [item.__dict__ for item in query.order_by(*ordering_postgresql).slice(start, start + length)] + results_page = [ + self._strip_internal_db_info(item.__dict__) + for item in query.order_by(*ordering_postgresql).slice(start, start + length) + ] return PaginatedResults( records_count_total=records_count_total, records_count_filtered=records_count_filtered, @@ -303,7 +306,7 @@ def get_task_by_id(self, task_id: str) -> Optional[Dict[str, Any]]: item = session.query(TaskResult).get(task_id) if item: - return item.__dict__ # type: ignore + return self._strip_internal_db_info(item.__dict__) else: return None except NoResultFound: @@ -393,3 +396,10 @@ def _to_postgresql_query(self, query: str) -> str: query = query.replace("\\", " ") # just in case query = query.replace('"', " ") # just in case return " & ".join([f'"{item}"' for item in query.split(" ") if item]) + + def _strip_internal_db_info(self, d: Dict[str, Any]) -> Dict[str, Any]: + del d["_sa_instance_state"] + del d["fulltext"] + if "headers_string" in d: + del d["headers_string"] + return d diff --git a/artemis/db_migration.py b/artemis/db_migration.py index 9809283be..848a7d123 100644 --- a/artemis/db_migration.py +++ b/artemis/db_migration.py @@ -114,6 +114,9 @@ def _single_migration_iteration() -> None: def migrate_and_start_thread() -> None: + if not Config.Data.LEGACY_MONGODB_CONN_STR: + return + client = MongoClient(Config.Data.LEGACY_MONGODB_CONN_STR) client.artemis.task_results.create_index([("migrated", ASCENDING)]) client.artemis.analysis.create_index([("migrated", ASCENDING)]) diff --git a/artemis/frontend.py b/artemis/frontend.py index 53f545be9..0fd75c7dd 100644 --- a/artemis/frontend.py +++ b/artemis/frontend.py @@ -13,7 +13,7 @@ Request, Response, ) -from fastapi.responses import RedirectResponse +from fastapi.responses import JSONResponse, RedirectResponse from fastapi_csrf_protect import CsrfProtect from karton.core.backend import KartonBackend, KartonBind from karton.core.config import Config as KartonConfig @@ -74,7 +74,22 @@ def get_binds_that_can_be_disabled() -> List[KartonBind]: def error_content_not_found(request: Request, exc: HTTPException) -> Response: - return templates.TemplateResponse("not_found.jinja2", {"request": request}, status_code=404) + if request.url.path.startswith("/api"): + return JSONResponse({"error": 404}, status_code=404) + else: + return templates.TemplateResponse("not_found.jinja2", {"request": request}, status_code=404) + + +if not Config.Miscellaneous.API_TOKEN: + + @router.get("/docs", include_in_schema=False) + def api_docs_information(request: Request) -> Response: + return templates.TemplateResponse( + "no_api_token.jinja2", + { + "request": request, + }, + ) @router.get("/", include_in_schema=False) diff --git a/artemis/main.py b/artemis/main.py index 358756be8..707b094a6 100644 --- a/artemis/main.py +++ b/artemis/main.py @@ -7,11 +7,18 @@ from artemis import csrf, db_migration from artemis.api import router as router_api +from artemis.config import Config from artemis.db import DB from artemis.frontend import error_content_not_found from artemis.frontend import router as router_front +from artemis.utils import read_template -app = FastAPI() +app = FastAPI( + docs_url="/docs" if Config.Miscellaneous.API_TOKEN else None, + redoc_url=None, + # This will be displayed as the additional text in Swagger docs + description=read_template("components/generating_reports_hint.jinja2"), +) app.exception_handler(CsrfProtectError)(csrf.csrf_protect_exception_handler) app.exception_handler(404)(error_content_not_found) diff --git a/artemis/reporting/export/common.py b/artemis/reporting/export/common.py index da9f3824a..ded29e763 100644 --- a/artemis/reporting/export/common.py +++ b/artemis/reporting/export/common.py @@ -1,5 +1,5 @@ from pathlib import Path -# This is the output location *inside the container*. The scripts/export_emails +# This is the output location *inside the container*. The scripts/export_reports # script is responsible for mounting a host path to a path inside the container. OUTPUT_LOCATION = Path("./output/autoreporter/") diff --git a/artemis/reporting/export/db.py b/artemis/reporting/export/db.py index 4644fbe1e..07b6d227f 100644 --- a/artemis/reporting/export/db.py +++ b/artemis/reporting/export/db.py @@ -20,12 +20,15 @@ class DataLoader: A wrapper around DB that loads data and converts them to Reports. """ - def __init__(self, db: DB, blocklist: List[BlocklistItem], language: Language, tag: Optional[str]): + def __init__( + self, db: DB, blocklist: List[BlocklistItem], language: Language, tag: Optional[str], silent: bool = False + ): self._db = db self._blocklist = blocklist self._language = language self._tag = tag self._data_initialized = False + self._silent = silent def _initialize_data_if_needed(self) -> None: """ @@ -43,11 +46,13 @@ def _initialize_data_if_needed(self) -> None: self._scanned_targets = set() self._tag_stats: DefaultDict[str, int] = defaultdict(lambda: 0) - for result in tqdm( - self._db.get_task_results_since( - datetime.datetime.now() - datetime.timedelta(days=Config.Reporting.REPORTING_MAX_VULN_AGE_DAYS) - ) - ): + results = self._db.get_task_results_since( + datetime.datetime.now() - datetime.timedelta(days=Config.Reporting.REPORTING_MAX_VULN_AGE_DAYS) + ) + if not self._silent: + results = tqdm(results) # type: ignore + + for result in results: result_tag = result["task"].get("payload_persistent", {}).get("tag", None) self._tag_stats[result_tag] += 1 diff --git a/artemis/reporting/export/export_data.py b/artemis/reporting/export/export_data.py index 13c3a6d22..568106da7 100644 --- a/artemis/reporting/export/export_data.py +++ b/artemis/reporting/export/export_data.py @@ -2,8 +2,6 @@ from dataclasses import dataclass from typing import Dict, List, Optional -from tqdm import tqdm - from artemis.domains import is_domain from artemis.reporting.base.report import Report from artemis.reporting.base.report_type import ReportType @@ -41,7 +39,7 @@ def build_export_data( reports = deduplicate_reports(previous_reports, db.reports) reports_per_top_level_target: Dict[str, List[Report]] = {} - for report in tqdm(reports): + for report in reports: if report.top_level_target not in reports_per_top_level_target: reports_per_top_level_target[report.top_level_target] = [] reports_per_top_level_target[report.top_level_target].append(report) diff --git a/artemis/reporting/export/hook.py b/artemis/reporting/export/hook.py index 8ba206511..da571f866 100644 --- a/artemis/reporting/export/hook.py +++ b/artemis/reporting/export/hook.py @@ -13,5 +13,5 @@ def get_ordering() -> int: @staticmethod @abstractmethod - def run(output_dir: Path, export_data: ExportData) -> None: + def run(output_dir: Path, export_data: ExportData, silent: bool) -> None: raise NotImplementedError() diff --git a/artemis/reporting/export/hooks.py b/artemis/reporting/export/hooks.py index 5df762be9..9494bc6eb 100644 --- a/artemis/reporting/export/hooks.py +++ b/artemis/reporting/export/hooks.py @@ -21,7 +21,7 @@ def get_all_hooks() -> List[Type[ExportHook]]: return sorted(ExportHook.__subclasses__(), key=lambda cls: cls.get_ordering()) -def run_export_hooks(output_dir: Path, export_data: ExportData) -> None: +def run_export_hooks(output_dir: Path, export_data: ExportData, silent: bool) -> None: for hook in get_all_hooks(): logger.info("Running hook: %s (ordering=%s)", hook.__name__, hook.get_ordering()) - hook.run(output_dir, export_data) + hook.run(output_dir, export_data, silent) diff --git a/artemis/reporting/export/main.py b/artemis/reporting/export/main.py index 7e83e70dc..807503f39 100644 --- a/artemis/reporting/export/main.py +++ b/artemis/reporting/export/main.py @@ -1,6 +1,7 @@ import dataclasses import datetime import json +import logging import os from pathlib import Path from typing import Optional @@ -28,6 +29,7 @@ from artemis.reporting.export.previous_reports import load_previous_reports from artemis.reporting.export.stats import print_and_save_stats from artemis.reporting.export.translations import install_translations +from artemis.utils import CONSOLE_LOG_HANDLER environment = Environment( loader=BaseLoader(), extensions=["jinja2.ext.i18n"], undefined=StrictUndefined, trim_blocks=True, lstrip_blocks=True @@ -37,7 +39,7 @@ HOST_ROOT_PATH = "/host-root/" -def _build_message_template_and_print_path(output_dir: Path) -> Template: +def _build_message_template_and_print_path(output_dir: Path, silent: bool) -> Template: output_message_template_file_name = output_dir / "message_template.jinja2" message_template_content = build_message_template() @@ -46,29 +48,34 @@ def _build_message_template_and_print_path(output_dir: Path) -> Template: with open(output_message_template_file_name, "w") as f: f.write(message_template_content) - print(f"Message template written to file: {output_message_template_file_name}") + if not silent: + print(f"Message template written to file: {output_message_template_file_name}") return message_template -def _install_translations_and_print_path(language: Language, output_dir: Path) -> None: +def _install_translations_and_print_path(language: Language, output_dir: Path, silent: bool) -> None: translations_file_name = output_dir / "translations.po" compiled_translations_file_name = output_dir / "compiled_translations.mo" install_translations(language, environment, translations_file_name, compiled_translations_file_name) - print(f"Translations written to file: {translations_file_name}") - print(f"Compiled translations written to file: {compiled_translations_file_name}") + if not silent: + print(f"Translations written to file: {translations_file_name}") + print(f"Compiled translations written to file: {compiled_translations_file_name}") -def _dump_export_data_and_print_path(export_data: ExportData, output_dir: Path) -> None: +def _dump_export_data_and_print_path(export_data: ExportData, output_dir: Path, silent: bool) -> None: output_json_file_name = output_dir / "output.json" with open(output_json_file_name, "w") as f: json.dump(export_data, f, indent=4, cls=JSONEncoderAdditionalTypes) - print(f"JSON written to file: {output_json_file_name}") + if not silent: + print(f"JSON written to file: {output_json_file_name}") -def _build_messages_and_print_path(message_template: Template, export_data: ExportData, output_dir: Path) -> None: +def _build_messages_and_print_path( + message_template: Template, export_data: ExportData, output_dir: Path, silent: bool +) -> None: output_messages_directory_name = output_dir / "messages" # We dump and reload the message data to/from JSON before rendering in order to make sure the template @@ -86,8 +93,10 @@ def _build_messages_and_print_path(message_template: Template, export_data: Expo with open(output_messages_directory_name / (top_level_target_shortened + ".html"), "w") as f: f.write(message_template.render({"data": export_data_dict["messages"][top_level_target]})) - print() - print(termcolor.colored(f"Messages written to: {output_messages_directory_name}", attrs=["bold"])) + + if not silent: + print() + print(termcolor.colored(f"Messages written to: {output_messages_directory_name}", attrs=["bold"])) def main( @@ -107,12 +116,19 @@ def main( help="Custom template arguments in the form of name1=value1,name2=value2,... - the original templates " "don't need them, but if you modified them on your side, they might.", ), + silent: bool = typer.Option( + False, + "--silent", + help="Print only the resulting folder path", + ), verbose: bool = typer.Option( False, "--verbose", help="Print more information (e.g. whether some types of reports have not been observed for a long time).", ), ) -> None: + if silent: + CONSOLE_LOG_HANDLER.setLevel(level=logging.ERROR) blocklist = load_blocklist(Config.Miscellaneous.BLOCKLIST_FILE) if previous_reports_directory: @@ -122,7 +138,7 @@ def main( custom_template_arguments_parsed = parse_custom_template_arguments(custom_template_arguments) db = DB() - export_db_connector = DataLoader(db, blocklist, language, tag) + export_db_connector = DataLoader(db, blocklist, language, tag, silent) # we strip microseconds so that the timestamp in export_data json and folder name are equal timestamp = datetime.datetime.now().replace(microsecond=0) export_data = build_export_data( @@ -132,16 +148,19 @@ def main( output_dir = OUTPUT_LOCATION / date_str os.mkdir(output_dir) - _install_translations_and_print_path(language, output_dir) + _install_translations_and_print_path(language, output_dir, silent) + + run_export_hooks(output_dir, export_data, silent) - run_export_hooks(output_dir, export_data) + _dump_export_data_and_print_path(export_data, output_dir, silent) + message_template = _build_message_template_and_print_path(output_dir, silent) - _dump_export_data_and_print_path(export_data, output_dir) - message_template = _build_message_template_and_print_path(output_dir) + print_and_save_stats(export_data, output_dir, silent) - print_and_save_stats(export_data, output_dir) + if silent: + print(output_dir) - if verbose: + if verbose and not silent: print_long_unseen_report_types(previous_reports + export_db_connector.reports) print("Available tags (and the counts of raw task results - not to be confused with vulnerabilities):") @@ -151,10 +170,11 @@ def main( for tag in sorted([key for key in export_db_connector.tag_stats.keys() if key]): print(f"\t{tag}: {export_db_connector.tag_stats[tag]}") - _build_messages_and_print_path(message_template, export_data, output_dir) + _build_messages_and_print_path(message_template, export_data, output_dir, silent) - for alert in export_data.alerts: - print(termcolor.colored("ALERT:" + alert, color="red")) + if not silent: + for alert in export_data.alerts: + print(termcolor.colored("ALERT:" + alert, color="red")) if __name__ == "__main__": diff --git a/artemis/reporting/export/stats.py b/artemis/reporting/export/stats.py index 66340a402..9c4384497 100644 --- a/artemis/reporting/export/stats.py +++ b/artemis/reporting/export/stats.py @@ -5,7 +5,7 @@ from artemis.reporting.export.export_data import ExportData -def print_and_save_stats(export_data: ExportData, output_dir: Path) -> None: +def print_and_save_stats(export_data: ExportData, output_dir: Path, silent: bool) -> None: num_reports_per_type: Counter[ReportType] = Counter() for _, data in export_data.messages.items(): @@ -20,7 +20,9 @@ def print_and_save_stats(export_data: ExportData, output_dir: Path) -> None: sorted([(count, report_type) for report_type, count in num_reports_per_type.items()]) ): f.write(f"Num reports of type {report_type}: {count}\n") - print(f"Stats (written to file: {output_stats_file_name}):") - with open(output_stats_file_name, "r") as f: - for line in f: - print("\t" + line.strip()) + + if not silent: + print(f"Stats (written to file: {output_stats_file_name}):") + with open(output_stats_file_name, "r") as f: + for line in f: + print("\t" + line.strip()) diff --git a/artemis/templating.py b/artemis/templating.py index c9019afb0..61b317d19 100644 --- a/artemis/templating.py +++ b/artemis/templating.py @@ -41,7 +41,7 @@ def render_task_table_row(task_result: Dict[str, Any]) -> List[str]: def render_analyses_table_row(entry: Dict[str, Any]) -> List[str]: return [ - html.escape(entry["payload"]["data"]), + html.escape(entry["target"]), html.escape(entry["tag"] or ""), TEMPLATE_ANALYSIS_TABLE_ROW_PENDING_TASKS.render({"entry": entry}), TEMPLATE_ANALYSIS_TABLE_ROW_ACTIONS.render({"entry": entry}), diff --git a/artemis/utils.py b/artemis/utils.py index 6f5b47c4c..72ee1e32b 100644 --- a/artemis/utils.py +++ b/artemis/utils.py @@ -3,6 +3,7 @@ import time import urllib.parse from ipaddress import ip_address +from pathlib import Path from typing import Any, Callable, List, Optional from whoisdomain import Domain, WhoisQuotaExceeded # type: ignore @@ -10,6 +11,10 @@ from artemis.config import Config +CONSOLE_LOG_HANDLER = logging.StreamHandler() +CONSOLE_LOG_HANDLER.setLevel(logging.INFO) +CONSOLE_LOG_HANDLER.setFormatter(logging.Formatter(Config.Miscellaneous.LOGGING_FORMAT_STRING)) + class CalledProcessErrorWithMessage(subprocess.CalledProcessError): def __init__(self, message: str, returncode: int, cmd: List[str], output: bytes, stderr: bytes): @@ -64,10 +69,9 @@ def perform_whois_or_sleep(domain: str, logger: logging.Logger) -> Optional[Doma def build_logger(name: str) -> logging.Logger: logger = logging.getLogger(name) logger.setLevel(logging.INFO) - handler = logging.StreamHandler() - handler.setLevel(logging.INFO) - handler.setFormatter(logging.Formatter(Config.Miscellaneous.LOGGING_FORMAT_STRING)) - logger.addHandler(handler) + for handler in logger.handlers: + logger.removeHandler(handler) + logger.addHandler(CONSOLE_LOG_HANDLER) return logger @@ -111,3 +115,8 @@ def is_ip_address(host: str) -> bool: return True except ValueError: return False + + +def read_template(path: str) -> str: + with open(Path(__file__).parent.parent / "templates" / path) as f: + return f.read() diff --git a/docker-compose.test-e2e.yaml b/docker-compose.test-e2e.yaml index f7ff68c2e..36e050a28 100644 --- a/docker-compose.test-e2e.yaml +++ b/docker-compose.test-e2e.yaml @@ -1,9 +1,36 @@ services: - karton-port_scanner: + web: + env_file: env.test environment: - SCANNING_PACKETS_PER_SECOND: 5 - CUSTOM_PORT_SCANNER_PORTS: 21,80,6379 + API_TOKEN: api-token + volumes: + - data-web:/data + - ./docker/karton-test.ini:/etc/karton/karton.ini karton-bruter: environment: REQUESTS_PER_SECOND: 0 + volumes: ["./docker/karton-test.ini:/etc/karton/karton.ini"] + + karton-classifier: + env_file: env.test + volumes: ["./docker/karton-test.ini:/etc/karton/karton.ini"] + + karton-ftp_bruter: + env_file: env.test + volumes: ["./docker/karton-test.ini:/etc/karton/karton.ini"] + + karton-mail_dns_scanner: + env_file: env.test + volumes: ["./docker/karton-test.ini:/etc/karton/karton.ini"] + + karton-port_scanner: + env_file: env.test + environment: + SCANNING_PACKETS_PER_SECOND: 5 + CUSTOM_PORT_SCANNER_PORTS: 21,80,6379 + volumes: ["./docker/karton-test.ini:/etc/karton/karton.ini"] + + karton-system: + env_file: env.test + volumes: ["./docker/karton-test.ini:/etc/karton/karton.ini"] diff --git a/docker-compose.test.yaml b/docker-compose.test.yaml index 7afdacf62..00c6bc0cf 100644 --- a/docker-compose.test.yaml +++ b/docker-compose.test.yaml @@ -4,16 +4,14 @@ services: build: context: . dockerfile: docker/Dockerfile - environment: - DB_CONN_STR: "" - POSTGRES_CONN_STR: postgresql://postgres:postgres@postgres-test/artemis - REDIS_CONN_STR: redis://test-redis:6379/1 + env_file: env.test command: python -m unittest discover test.e2e volumes: # We don't copy the test code into the image so that we don't pollute production image # with test data. - ./test/:/opt/test/ - ./docker/karton.ini:/etc/karton/karton.ini + - data-test:/data/ test: build: @@ -21,12 +19,8 @@ services: dockerfile: docker/Dockerfile command: bash -c "python -m unittest discover test.modules && python -m unittest discover test.reporting && python -m unittest discover test.unit" environment: - DB_CONN_STR: "" - TEST_REDIS_HOST: test-redis TEST_REDIS_PORT: 6379 - REDIS_CONN_STR: redis://test-redis:6379/1 - POSTGRES_CONN_STR: postgresql://postgres:postgres@postgres-test/artemis # Use only one provider to speed up the tests GAU_ADDITIONAL_OPTIONS: "--providers wayback" @@ -38,6 +32,7 @@ services: SCANNING_PACKETS_PER_SECOND: 5 CUSTOM_PORT_SCANNER_PORTS: 21,80,6379 NUCLEI_CHECK_TEMPLATE_LIST: False + env_file: env.test volumes: # We don't copy the test code into the image so that we don't pollute production image # with test data. @@ -184,3 +179,6 @@ services: image: php:7.4-apache volumes: - ./test/reporting/data/bruteable_files/htpasswd/:/var/www/html/ + +volumes: + data-test: diff --git a/docker/karton-test.ini b/docker/karton-test.ini new file mode 100644 index 000000000..4b90f399e --- /dev/null +++ b/docker/karton-test.ini @@ -0,0 +1,12 @@ +[s3] +# These need to be provided, so let's provide a mock - but we don't want to have a proper +# s3-compatible storage instance, as we don't use this feature. +address=http://s3mock:9090/ +access_key= +secret_key= +bucket=bucket + +[redis] +host=test-redis +port=6379 +db=0 diff --git a/docs/generating-reports.rst b/docs/generating-reports.rst index 38eadea0a..a459c7206 100644 --- a/docs/generating-reports.rst +++ b/docs/generating-reports.rst @@ -2,8 +2,8 @@ Generating reports to be sent ============================= -Artemis can generate HTML reports containing a description of found vulnerabilities. An -example report can be found in the :ref:`generating-reports-example-report` section. +Artemis contains a command-line tool to generate HTML reports containing a description of +found vulnerabilities. An example report can be found in the :ref:`generating-reports-example-report` section. Such reports are sent by CERT PL via e-mail to the scanned entities in our constituency. @@ -15,7 +15,7 @@ To generate such reports, you first need to start Artemis and scan some targets, Then, run the following script in the repository root: -``./scripts/export_emails`` +``./scripts/export_reports`` This script will produce **HTML messages ready to be sent**. @@ -33,7 +33,7 @@ yourself. .. note :: Please keep in mind that the reporting script resolves domains and performs HTTP requests. -To view additional options, use ``./scripts/export_emails --help`` - for example, you will be able to change +To view additional options, use ``./scripts/export_reports --help`` - for example, you will be able to change language, filter reports by tag or skip sending messages that have already been sent. Troubleshooting diff --git a/docs/user-guide/troubleshooting.rst b/docs/user-guide/troubleshooting.rst index 8ccf36324..87dce1e7c 100644 --- a/docs/user-guide/troubleshooting.rst +++ b/docs/user-guide/troubleshooting.rst @@ -22,6 +22,7 @@ this setting will fix the problem. To solve this, run: .. code-block:: + git config --global core.autocrlf input This command sets Git to convert line endings to LF on checkout but doesn't convert them when committing files. diff --git a/env.test b/env.test new file mode 100644 index 000000000..ca9f85a96 --- /dev/null +++ b/env.test @@ -0,0 +1,3 @@ +DB_CONN_STR= +REDIS_CONN_STR=redis://test-redis:6379/1 +POSTGRES_CONN_STR=postgresql://postgres:postgres@postgres-test/artemis diff --git a/scripts/export_emails b/scripts/export_reports similarity index 96% rename from scripts/export_emails rename to scripts/export_reports index 721c62fe6..757904e49 100755 --- a/scripts/export_emails +++ b/scripts/export_reports @@ -24,5 +24,5 @@ if exists */docker-compose.additional*; then done fi -docker compose $FILE_OPTIONS $ADDITIONAL_DOCKER_COMPOSE_OPTIONS build autoreporter +docker compose $FILE_OPTIONS $ADDITIONAL_DOCKER_COMPOSE_OPTIONS build --quiet autoreporter docker compose $FILE_OPTIONS $ADDITIONAL_DOCKER_COMPOSE_OPTIONS run autoreporter python3 -m artemis.reporting.export.main "$@" diff --git a/scripts/test b/scripts/test index e194245d7..eb57da628 100755 --- a/scripts/test +++ b/scripts/test @@ -9,7 +9,7 @@ fi docker compose -f docker-compose.test.yaml down --remove-orphans # Let's first start everything except the test container and perform relevant setup -docker compose -f docker-compose.test.yaml up -d --build --scale test=0 +docker compose -f docker-compose.test.yaml up -d --build --scale test=0 --scale=test-e2e=0 # Wait for the dependency services to be available docker compose -f docker-compose.test.yaml run test /wait-for-it.sh test-old-joomla-mysql:3306 diff --git a/templates/components/generating_reports_hint.jinja2 b/templates/components/generating_reports_hint.jinja2 new file mode 100644 index 000000000..80fd60c9d --- /dev/null +++ b/templates/components/generating_reports_hint.jinja2 @@ -0,0 +1,5 @@ +Hint: if, instead of browsing the raw task results, you want to export concise + +HTML reports with few false positives and duplicates, browse to the +generating reports +section of the documentation. diff --git a/templates/components/navbar.jinja2 b/templates/components/navbar.jinja2 index b9f939a15..428e33d94 100644 --- a/templates/components/navbar.jinja2 +++ b/templates/components/navbar.jinja2 @@ -22,6 +22,9 @@ + diff --git a/templates/index.jinja2 b/templates/index.jinja2 index 01e0f6ea2..3a358aa34 100644 --- a/templates/index.jinja2 +++ b/templates/index.jinja2 @@ -1,4 +1,5 @@ {% extends "components/base.jinja2" %} + {% block main %}

Analysed targets

diff --git a/templates/no_api_token.jinja2 b/templates/no_api_token.jinja2 new file mode 100644 index 000000000..72d84da29 --- /dev/null +++ b/templates/no_api_token.jinja2 @@ -0,0 +1,8 @@ +{% extends "components/base.jinja2" %} + +{% block main %} +

No API token

+

+ Set the API_TOKEN variable in the .env file to use the API. +

+{% endblock %} diff --git a/templates/task_list.jinja2 b/templates/task_list.jinja2 index 83aff4f6e..95ed6e34a 100644 --- a/templates/task_list.jinja2 +++ b/templates/task_list.jinja2 @@ -28,11 +28,7 @@

- Hint: if, instead of browsing the raw task results, you want to export concise - - HTML reports with few false positives and duplicates, browse to the - generating reports - section of the documentation. + {% include "components/generating_reports_hint.jinja2" %}
{% endblock %} diff --git a/test/e2e/base.py b/test/e2e/base.py index caf577295..c7e5f208f 100644 --- a/test/e2e/base.py +++ b/test/e2e/base.py @@ -5,7 +5,10 @@ import requests from bs4 import BeautifulSoup +from karton.core.backend import KartonBackend +from karton.core.config import Config as KartonConfig +from artemis.db import DB, Analysis, ScheduledTask, TaskResult from artemis.utils import build_logger BACKEND_URL = "http://web:5000/" @@ -25,6 +28,21 @@ def __init__(self, *args, **kwargs): # type: ignore def setUp(self) -> None: self._wait_for_backend() + db = DB() + session = db.session() + session.query(ScheduledTask).delete() + session.query(Analysis).delete() + session.query(TaskResult).delete() + session.commit() + + backend = KartonBackend(config=KartonConfig()) + + for key in backend.redis.keys("karton.task*"): + backend.redis.delete(key) + + for key in backend.redis.keys("karton.queue*"): + backend.redis.delete(key) + def submit_tasks(self, tasks: List[str], tag: str) -> None: with requests.Session() as s: response = s.get(BACKEND_URL + "add") diff --git a/test/e2e/test_automated_interaction.py b/test/e2e/test_automated_interaction.py new file mode 100644 index 000000000..f2b954c72 --- /dev/null +++ b/test/e2e/test_automated_interaction.py @@ -0,0 +1,100 @@ +import time +from test.e2e.base import BACKEND_URL, BaseE2ETestCase + +import requests + +from artemis.frontend import get_binds_that_can_be_disabled + + +class AutomatedInteractionTestCase(BaseE2ETestCase): + def test_api_token_is_required(self) -> None: + self.assertEqual( + requests.post( + BACKEND_URL + "api/add", + {}, + headers={"Content-Type": "application/json", "X-API-Token": "invalid-api-token"}, + ).status_code, + 401, + ) + self.assertEqual( + requests.get(BACKEND_URL + "api/analyses", headers={"X-API-Token": "invalid-api-token"}).status_code, 401 + ) + self.assertEqual( + requests.get( + BACKEND_URL + "api/num-queued-tasks", headers={"X-API-Token": "invalid-api-token"} + ).status_code, + 401, + ) + self.assertEqual( + requests.get(BACKEND_URL + "api/task-results", headers={"X-API-Token": "invalid-api-token"}).status_code, + 401, + ) + + def test_automated_interaction(self) -> None: + self.assertEqual( + requests.post( + BACKEND_URL + "api/add", + json={ + "targets": ["test-smtp-server.artemis"], + "tag": "automated-interaction", + "disabled_modules": [ + bind.identity + for bind in get_binds_that_can_be_disabled() + if bind.identity not in ["mail_dns_scanner", "classifier"] + ], + }, + headers={"X-API-Token": "api-token"}, + ).json(), + {"ok": True}, + ) + + analyses = requests.get(BACKEND_URL + "api/analyses", headers={"X-API-Token": "api-token"}).json() + self.assertEqual(len(analyses), 1) + self.assertEqual(set(analyses[0].keys()), {"stopped", "target", "created_at", "id", "tag"}) + self.assertEqual(analyses[0]["stopped"], False) + self.assertEqual(analyses[0]["target"], "test-smtp-server.artemis") + self.assertEqual(analyses[0]["tag"], "automated-interaction") + + for i in range(100): + num_queued_tasks = int( + requests.get(BACKEND_URL + "api/num-queued-tasks", headers={"X-API-Token": "api-token"}).content.strip() + ) + + if num_queued_tasks == 0: + break + + time.sleep(1) + self.assertEqual(num_queued_tasks, 0) + + task_results = requests.get( + BACKEND_URL + "api/task-results?only_interesting=true", headers={"X-API-Token": "api-token"} + ).json() + self.assertEqual(len(task_results), 1) + self.assertEqual( + set(task_results[0].keys()), + { + "created_at", + "receiver", + "status_reason", + "task", + "status", + "analysis_id", + "id", + "tag", + "target_string", + "result", + }, + ) + self.assertEqual(task_results[0]["receiver"], "mail_dns_scanner") + self.assertEqual(task_results[0]["status"], "INTERESTING") + self.assertEqual( + task_results[0]["status_reason"], + "Found problems: Valid DMARC record not found. We recommend using all three mechanisms: SPF, DKIM and DMARC to decrease the possibility of successful e-mail message spoofing.", + ) + self.assertEqual(task_results[0]["tag"], "automated-interaction") + self.assertEqual(task_results[0]["target_string"], "test-smtp-server.artemis") + + task_results = requests.get( + BACKEND_URL + "api/task-results?search=should-not-exist", headers={"X-API-Token": "api-token"} + ).json() + self.assertEqual(len(task_results), 0)