Skip to content

Commit

Permalink
ENH: Add attribute mapping
Browse files Browse the repository at this point in the history
The bot can now construct an event much more alligned to custom
needs, allowing setting comments and selecting just a subset of
fields to export
  • Loading branch information
kamil-certat committed Jul 4, 2024
1 parent 6461efa commit f2b8b53
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 13 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
- `intelmq.bots.outputs.misp.output_feed`:
- Handle failures if saved current event wasn't saved or is incorrect (PR by Kamil Mankowski).
- Allow saving messages in bulks instead of refreshing the feed immediately (PR#2505 by Kamil Mankowski).
- Add `attribute_mapping` parameter to allow selecting a subset of event attributes as well as additional attribute parameters (PR by Kamil Mankowski).
- `intelmq.bots.outputs.smtp_batch.output`: Documentation on multiple recipients added (PR#2501 by Edvard Rejthar).

### Documentation
Expand Down
25 changes: 25 additions & 0 deletions docs/user/bots.md
Original file line number Diff line number Diff line change
Expand Up @@ -4602,6 +4602,31 @@ incoming messages until the given number of them. Use it if your bot proceeds a
and constant saving to the disk is a problem. Reloading or restarting bot as well as generating
a new MISP event based on `interval_event` triggers saving regardless of the cache size.

**`attribute_mapping`**

(optional, dict) If set, allows selecting which IntelMQ event fields are mapped to MISP attributes
as well as attribute parameters (like e.g. a comment). The expected format is a *dictonary of dictionaries*:
first-level key represents an IntelMQ field that will be directly translated to a MISP attribute; nested
dictionary represents addditional parameters PyMISP can take when creating an attribute. They can use
names of other IntelMQ fields (then the value of such field will be used), or static values. If not needed,
leave empty dict.

For example:

```yaml
attribute_mapping:
source.ip:
feed.name:
comment: event_description.text
destination.ip:
to_ids: False
```

would create a MISP object with three attributes `source.ip`, `feed.name` and `destination.ip`
and set their values as in the IntelMQ event. In addition, the `feed.name` would have a comment
as given in the `event_description.text` from IntelMQ event, and `destination.ip` would be set
as not usable for IDS.

**Usage in MISP**

Configure the destination directory of this feed as feed in MISP, either as local location, or served via a web server.
Expand Down
78 changes: 66 additions & 12 deletions intelmq/bots/outputs/misp/output_feed.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@
from pathlib import Path
from uuid import uuid4

import pymisp

from intelmq.lib.bot import OutputBot
from intelmq.lib.exceptions import MissingDependencyError
from ....lib.message import Message, MessageFactory
from intelmq.lib.mixins import CacheMixin
from intelmq.lib.utils import parse_relative

Expand All @@ -30,8 +33,11 @@ class MISPFeedOutputBot(OutputBot, CacheMixin):
bulk_save_count: int = None
misp_org_name = None
misp_org_uuid = None
output_dir: str = "/opt/intelmq/var/lib/bots/mispfeed-output" # TODO: should be path
output_dir: str = (
"/opt/intelmq/var/lib/bots/mispfeed-output" # TODO: should be path
)
_is_multithreadable: bool = False
attribute_mapping: dict = None

@staticmethod
def check_output_dir(dirname):
Expand All @@ -56,11 +62,13 @@ def init(self):
if self.interval_event is None:
self.timedelta = datetime.timedelta(hours=1)
else:
self.timedelta = datetime.timedelta(minutes=parse_relative(self.interval_event))
self.timedelta = datetime.timedelta(
minutes=parse_relative(self.interval_event)
)

if (self.output_dir / '.current').exists():
if (self.output_dir / ".current").exists():
try:
with (self.output_dir / '.current').open() as f:
with (self.output_dir / ".current").open() as f:
self.current_file = Path(f.read())

if self.current_file.exists():
Expand Down Expand Up @@ -127,12 +135,49 @@ def process(self):

def _add_message_to_feed(self, message: dict):
obj = self.current_event.add_object(name="intelmq_event")
if not self.attribute_mapping:
self._default_mapping(obj, message)
else:
self._custom_mapping(obj, message)

def _default_mapping(self, obj: pymisp.MISPObject, message: dict):
for object_relation, value in message.items():
try:
obj.add_attribute(object_relation, value=value)
except NewAttributeError:
# This entry isn't listed in the harmonization file, ignoring.
pass
self.logger.warning(
"Object relation %s not exists in MISP definition, ignoring",
object_relation,
)

def _extract_misp_attribute_kwargs(self, message: dict, definition: dict) -> dict:
# For caching and default mapping, the serialized version is the right format to work on.
# However, for any custom mapping the Message object is more sufficient as it handles
# subfields.
message = MessageFactory.from_dict(
message, harmonization=self.harmonization, default_type="Event"
)
result = {}
for parameter, value in definition.items():
# Check if the value is a harmonization key or a static value
if isinstance(value, str) and (
value in self.harmonization["event"]
or value.split(".", 1)[0] in self.harmonization["event"]
):
result[parameter] = message.get(value)
else:
result[parameter] = value
return result

def _custom_mapping(self, obj: pymisp.MISPObject, message: dict):
for object_relation, definition in self.attribute_mapping.items():
obj.add_attribute(
object_relation,
value=message[object_relation],
**self._extract_misp_attribute_kwargs(message, definition),
)
# In case of manual mapping, we want to fail if it produces incorrect values

def _generate_feed(self, message: dict = None):
if message:
Expand All @@ -151,18 +196,27 @@ def _generate_feed(self, message: dict = None):

@staticmethod
def check(parameters):
if 'output_dir' not in parameters:
if "output_dir" not in parameters:
return [["error", "Parameter 'output_dir' not given."]]
try:
created = MISPFeedOutputBot.check_output_dir(parameters['output_dir'])
created = MISPFeedOutputBot.check_output_dir(parameters["output_dir"])
except OSError:
return [["error",
"Directory %r of parameter 'output_dir' does not exist and could not be created." % parameters['output_dir']]]
return [
[
"error",
"Directory %r of parameter 'output_dir' does not exist and could not be created."
% parameters["output_dir"],
]
]
else:
if created:
return [["info",
"Directory %r of parameter 'output_dir' did not exist, but has now been created."
"" % parameters['output_dir']]]
return [
[
"info",
"Directory %r of parameter 'output_dir' did not exist, but has now been created."
"" % parameters["output_dir"],
]
]


BOT = MISPFeedOutputBot
47 changes: 46 additions & 1 deletion intelmq/tests/bots/outputs/misp/test_output_feed.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pathlib import Path
from tempfile import TemporaryDirectory

from .....lib.message import Message, MessageFactory
import intelmq.lib.test as test
from intelmq.bots.outputs.misp.output_feed import MISPFeedOutputBot

Expand Down Expand Up @@ -92,14 +93,58 @@ def test_accumulating_events(self):

# Simulating leftovers in the queue when it's time to generate new event
Path(f"{self.directory.name}/.current").unlink()
self.bot.cache_put(EXAMPLE_EVENT)
self.bot.cache_put(MessageFactory.from_dict(EXAMPLE_EVENT).to_dict(jsondict_as_string=True))
self.run_bot(parameters={"bulk_save_count": 3})

new_event = open(f"{self.directory.name}/.current").read()
with open(new_event) as f:
objects = json.load(f)["Event"]["Object"]
assert len(objects) == 1

def test_attribute_mapping(self):
self.run_bot(
parameters={
"attribute_mapping": {
"source.ip": {},
"feed.name": {"comment": "event_description.text"},
"destination.ip": {"to_ids": False},
"malware.name": {"comment": "extra.non_ascii"}
}
}
)

current_event = open(f"{self.directory.name}/.current").read()
with open(current_event) as f:
objects = json.load(f).get("Event", {}).get("Object", [])

assert len(objects) == 1
attributes = objects[0].get("Attribute")
assert len(attributes) == 4
source_ip = next(
attr for attr in attributes if attr.get("object_relation") == "source.ip"
)
assert source_ip["value"] == "152.166.119.2"
assert source_ip["comment"] == ""

feed_name = next(
attr for attr in attributes if attr.get("object_relation") == "feed.name"
)
assert feed_name["value"] == EXAMPLE_EVENT["feed.name"]
assert feed_name["comment"] == EXAMPLE_EVENT["event_description.text"]

destination_ip = next(
attr for attr in attributes if attr.get("object_relation") == "destination.ip"
)
assert destination_ip["value"] == EXAMPLE_EVENT["destination.ip"]
assert destination_ip["to_ids"] is False

malware_name = next(
attr for attr in attributes if attr.get("object_relation") == "malware.name"
)
assert malware_name["value"] == EXAMPLE_EVENT["malware.name"]
assert malware_name["comment"] == EXAMPLE_EVENT["extra.non_ascii"]


def tearDown(self):
self.cache.delete(self.bot_id)
self.directory.cleanup()
Expand Down

0 comments on commit f2b8b53

Please sign in to comment.