From f2b8b53d7ae921eee7d2e6ea972eb9053570005d Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Thu, 4 Jul 2024 13:32:59 +0200 Subject: [PATCH] ENH: Add attribute mapping The bot can now construct an event much more alligned to custom needs, allowing setting comments and selecting just a subset of fields to export --- CHANGELOG.md | 1 + docs/user/bots.md | 25 ++++++ intelmq/bots/outputs/misp/output_feed.py | 78 ++++++++++++++++--- .../bots/outputs/misp/test_output_feed.py | 47 ++++++++++- 4 files changed, 138 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e49ba2740..fae4dd41c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ - `intelmq.bots.outputs.misp.output_feed`: - Handle failures if saved current event wasn't saved or is incorrect (PR by Kamil Mankowski). - Allow saving messages in bulks instead of refreshing the feed immediately (PR#2505 by Kamil Mankowski). + - Add `attribute_mapping` parameter to allow selecting a subset of event attributes as well as additional attribute parameters (PR by Kamil Mankowski). - `intelmq.bots.outputs.smtp_batch.output`: Documentation on multiple recipients added (PR#2501 by Edvard Rejthar). ### Documentation diff --git a/docs/user/bots.md b/docs/user/bots.md index 17a80395c..3d2d64e4d 100644 --- a/docs/user/bots.md +++ b/docs/user/bots.md @@ -4602,6 +4602,31 @@ incoming messages until the given number of them. Use it if your bot proceeds a and constant saving to the disk is a problem. Reloading or restarting bot as well as generating a new MISP event based on `interval_event` triggers saving regardless of the cache size. +**`attribute_mapping`** + +(optional, dict) If set, allows selecting which IntelMQ event fields are mapped to MISP attributes +as well as attribute parameters (like e.g. a comment). The expected format is a *dictonary of dictionaries*: +first-level key represents an IntelMQ field that will be directly translated to a MISP attribute; nested +dictionary represents addditional parameters PyMISP can take when creating an attribute. They can use +names of other IntelMQ fields (then the value of such field will be used), or static values. If not needed, +leave empty dict. + +For example: + +```yaml +attribute_mapping: + source.ip: + feed.name: + comment: event_description.text + destination.ip: + to_ids: False +``` + +would create a MISP object with three attributes `source.ip`, `feed.name` and `destination.ip` +and set their values as in the IntelMQ event. In addition, the `feed.name` would have a comment +as given in the `event_description.text` from IntelMQ event, and `destination.ip` would be set +as not usable for IDS. + **Usage in MISP** Configure the destination directory of this feed as feed in MISP, either as local location, or served via a web server. diff --git a/intelmq/bots/outputs/misp/output_feed.py b/intelmq/bots/outputs/misp/output_feed.py index 53c655679..878858cea 100644 --- a/intelmq/bots/outputs/misp/output_feed.py +++ b/intelmq/bots/outputs/misp/output_feed.py @@ -9,8 +9,11 @@ from pathlib import Path from uuid import uuid4 +import pymisp + from intelmq.lib.bot import OutputBot from intelmq.lib.exceptions import MissingDependencyError +from ....lib.message import Message, MessageFactory from intelmq.lib.mixins import CacheMixin from intelmq.lib.utils import parse_relative @@ -30,8 +33,11 @@ class MISPFeedOutputBot(OutputBot, CacheMixin): bulk_save_count: int = None misp_org_name = None misp_org_uuid = None - output_dir: str = "/opt/intelmq/var/lib/bots/mispfeed-output" # TODO: should be path + output_dir: str = ( + "/opt/intelmq/var/lib/bots/mispfeed-output" # TODO: should be path + ) _is_multithreadable: bool = False + attribute_mapping: dict = None @staticmethod def check_output_dir(dirname): @@ -56,11 +62,13 @@ def init(self): if self.interval_event is None: self.timedelta = datetime.timedelta(hours=1) else: - self.timedelta = datetime.timedelta(minutes=parse_relative(self.interval_event)) + self.timedelta = datetime.timedelta( + minutes=parse_relative(self.interval_event) + ) - if (self.output_dir / '.current').exists(): + if (self.output_dir / ".current").exists(): try: - with (self.output_dir / '.current').open() as f: + with (self.output_dir / ".current").open() as f: self.current_file = Path(f.read()) if self.current_file.exists(): @@ -127,12 +135,49 @@ def process(self): def _add_message_to_feed(self, message: dict): obj = self.current_event.add_object(name="intelmq_event") + if not self.attribute_mapping: + self._default_mapping(obj, message) + else: + self._custom_mapping(obj, message) + + def _default_mapping(self, obj: pymisp.MISPObject, message: dict): for object_relation, value in message.items(): try: obj.add_attribute(object_relation, value=value) except NewAttributeError: # This entry isn't listed in the harmonization file, ignoring. - pass + self.logger.warning( + "Object relation %s not exists in MISP definition, ignoring", + object_relation, + ) + + def _extract_misp_attribute_kwargs(self, message: dict, definition: dict) -> dict: + # For caching and default mapping, the serialized version is the right format to work on. + # However, for any custom mapping the Message object is more sufficient as it handles + # subfields. + message = MessageFactory.from_dict( + message, harmonization=self.harmonization, default_type="Event" + ) + result = {} + for parameter, value in definition.items(): + # Check if the value is a harmonization key or a static value + if isinstance(value, str) and ( + value in self.harmonization["event"] + or value.split(".", 1)[0] in self.harmonization["event"] + ): + result[parameter] = message.get(value) + else: + result[parameter] = value + return result + + def _custom_mapping(self, obj: pymisp.MISPObject, message: dict): + for object_relation, definition in self.attribute_mapping.items(): + obj.add_attribute( + object_relation, + value=message[object_relation], + **self._extract_misp_attribute_kwargs(message, definition), + ) + # In case of manual mapping, we want to fail if it produces incorrect values def _generate_feed(self, message: dict = None): if message: @@ -151,18 +196,27 @@ def _generate_feed(self, message: dict = None): @staticmethod def check(parameters): - if 'output_dir' not in parameters: + if "output_dir" not in parameters: return [["error", "Parameter 'output_dir' not given."]] try: - created = MISPFeedOutputBot.check_output_dir(parameters['output_dir']) + created = MISPFeedOutputBot.check_output_dir(parameters["output_dir"]) except OSError: - return [["error", - "Directory %r of parameter 'output_dir' does not exist and could not be created." % parameters['output_dir']]] + return [ + [ + "error", + "Directory %r of parameter 'output_dir' does not exist and could not be created." + % parameters["output_dir"], + ] + ] else: if created: - return [["info", - "Directory %r of parameter 'output_dir' did not exist, but has now been created." - "" % parameters['output_dir']]] + return [ + [ + "info", + "Directory %r of parameter 'output_dir' did not exist, but has now been created." + "" % parameters["output_dir"], + ] + ] BOT = MISPFeedOutputBot diff --git a/intelmq/tests/bots/outputs/misp/test_output_feed.py b/intelmq/tests/bots/outputs/misp/test_output_feed.py index 631b7b7bd..abb4b9c36 100644 --- a/intelmq/tests/bots/outputs/misp/test_output_feed.py +++ b/intelmq/tests/bots/outputs/misp/test_output_feed.py @@ -8,6 +8,7 @@ from pathlib import Path from tempfile import TemporaryDirectory +from .....lib.message import Message, MessageFactory import intelmq.lib.test as test from intelmq.bots.outputs.misp.output_feed import MISPFeedOutputBot @@ -92,7 +93,7 @@ def test_accumulating_events(self): # Simulating leftovers in the queue when it's time to generate new event Path(f"{self.directory.name}/.current").unlink() - self.bot.cache_put(EXAMPLE_EVENT) + self.bot.cache_put(MessageFactory.from_dict(EXAMPLE_EVENT).to_dict(jsondict_as_string=True)) self.run_bot(parameters={"bulk_save_count": 3}) new_event = open(f"{self.directory.name}/.current").read() @@ -100,6 +101,50 @@ def test_accumulating_events(self): objects = json.load(f)["Event"]["Object"] assert len(objects) == 1 + def test_attribute_mapping(self): + self.run_bot( + parameters={ + "attribute_mapping": { + "source.ip": {}, + "feed.name": {"comment": "event_description.text"}, + "destination.ip": {"to_ids": False}, + "malware.name": {"comment": "extra.non_ascii"} + } + } + ) + + current_event = open(f"{self.directory.name}/.current").read() + with open(current_event) as f: + objects = json.load(f).get("Event", {}).get("Object", []) + + assert len(objects) == 1 + attributes = objects[0].get("Attribute") + assert len(attributes) == 4 + source_ip = next( + attr for attr in attributes if attr.get("object_relation") == "source.ip" + ) + assert source_ip["value"] == "152.166.119.2" + assert source_ip["comment"] == "" + + feed_name = next( + attr for attr in attributes if attr.get("object_relation") == "feed.name" + ) + assert feed_name["value"] == EXAMPLE_EVENT["feed.name"] + assert feed_name["comment"] == EXAMPLE_EVENT["event_description.text"] + + destination_ip = next( + attr for attr in attributes if attr.get("object_relation") == "destination.ip" + ) + assert destination_ip["value"] == EXAMPLE_EVENT["destination.ip"] + assert destination_ip["to_ids"] is False + + malware_name = next( + attr for attr in attributes if attr.get("object_relation") == "malware.name" + ) + assert malware_name["value"] == EXAMPLE_EVENT["malware.name"] + assert malware_name["comment"] == EXAMPLE_EVENT["extra.non_ascii"] + + def tearDown(self): self.cache.delete(self.bot_id) self.directory.cleanup()