From 3efeaf642e2e0fd299d761d4e818c80ed7da5cd7 Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Wed, 16 Oct 2024 16:50:38 +0200 Subject: [PATCH 1/4] add support for ISO 8601 timestamps in syslogs --- dissect/target/plugins/os/unix/log/helpers.py | 44 +++++++++++++++++++ .../target/plugins/os/unix/log/messages.py | 27 ++++++++---- tests/plugins/os/unix/log/test_messages.py | 27 ++++++++++++ 3 files changed, 90 insertions(+), 8 deletions(-) create mode 100644 dissect/target/plugins/os/unix/log/helpers.py diff --git a/dissect/target/plugins/os/unix/log/helpers.py b/dissect/target/plugins/os/unix/log/helpers.py new file mode 100644 index 000000000..6b9e40255 --- /dev/null +++ b/dissect/target/plugins/os/unix/log/helpers.py @@ -0,0 +1,44 @@ +import itertools +import logging +import re +from datetime import datetime +from typing import Iterator + +from dissect.target.helpers.fsutil import TargetPath, open_decompress + +log = logging.getLogger(__name__) + +_RE_TS = r"^[A-Za-z]{3}\s*[0-9]{1,2}\s[0-9]{1,2}:[0-9]{2}:[0-9]{2}" +_RE_TS_ISO = r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}\+\d{2}:\d{2}" + +RE_TS = re.compile(_RE_TS) +RE_TS_ISO = re.compile(_RE_TS_ISO) +RE_LINE = re.compile( + rf"(?P{_RE_TS}|{_RE_TS_ISO})\s(?P\S+)\s(?P\S+?)(\[(?P\d+)\])?:\s(?P.+)$" +) + + +def iso_readlines(file: TargetPath) -> Iterator[tuple[datetime, str]]: + """Iterator reading the provided log file in ISO format. Mimics ``year_rollover_helper`` behaviour.""" + + with open_decompress(file, "rt") as fh: + for line in fh: + if not (match := RE_TS_ISO.match(line)): + log.warning("No timestamp found in one of the lines in %s!", file) + log.debug("Skipping line: %s", line) + continue + + try: + ts = datetime.strptime(match[0], "%Y-%m-%dT%H:%M:%S.%f%z") + + except ValueError as e: + log.warning("Unable to parse ISO timestamp in line: %s", line) + log.debug("", exc_info=e) + continue + + yield ts, line + + +def is_iso_fmt(file: TargetPath) -> bool: + """Determine if the provided log file uses ISO 8601 timestamp format logging or not.""" + return any(itertools.islice(iso_readlines(file), 0, 2)) diff --git a/dissect/target/plugins/os/unix/log/messages.py b/dissect/target/plugins/os/unix/log/messages.py index 0ba34d785..03302da30 100644 --- a/dissect/target/plugins/os/unix/log/messages.py +++ b/dissect/target/plugins/os/unix/log/messages.py @@ -11,6 +11,7 @@ from dissect.target.helpers.record import TargetRecordDescriptor from dissect.target.helpers.utils import year_rollover_helper from dissect.target.plugin import Plugin, alias, export +from dissect.target.plugins.os.unix.log.helpers import RE_LINE, is_iso_fmt, iso_readlines MessagesRecord = TargetRecordDescriptor( "linux/log/messages", @@ -54,7 +55,7 @@ def check_compatible(self) -> None: def messages(self) -> Iterator[MessagesRecord]: """Return contents of /var/log/messages*, /var/log/syslog* and cloud-init logs. - Due to year rollover detection, the contents of the files are returned in reverse. + Due to year rollover detection, the log contents could be returned in reversed or mixed chronological order. The messages log file holds information about a variety of events such as the system error messages, system startups and shutdowns, change in the network configuration, etc. Aims to store valuable, non-debug and @@ -73,16 +74,26 @@ def messages(self) -> Iterator[MessagesRecord]: yield from self._parse_cloud_init_log(log_file, tzinfo) continue - for ts, line in year_rollover_helper(log_file, RE_TS, DEFAULT_TS_LOG_FORMAT, tzinfo): - daemon = dict(enumerate(RE_DAEMON.findall(line))).get(0) - pid = dict(enumerate(RE_PID.findall(line))).get(0) - message = dict(enumerate(RE_MSG.findall(line))).get(0, line) + if is_iso_fmt(log_file): + iterable = iso_readlines(log_file) + else: + iterable = year_rollover_helper(log_file, RE_TS, DEFAULT_TS_LOG_FORMAT, tzinfo) + + for ts, line in iterable: + match = RE_LINE.match(line) + + if not match: + self.target.log.warning("Unable to parse message line in %s", log_file) + self.target.log.debug("Line %s", line) + continue + + values = match.groupdict() yield MessagesRecord( ts=ts, - daemon=daemon, - pid=pid, - message=message, + daemon=values["service"], + pid=values["pid"], + message=values["message"], source=log_file, _target=self.target, ) diff --git a/tests/plugins/os/unix/log/test_messages.py b/tests/plugins/os/unix/log/test_messages.py index 08c297839..d5f35d606 100644 --- a/tests/plugins/os/unix/log/test_messages.py +++ b/tests/plugins/os/unix/log/test_messages.py @@ -10,6 +10,7 @@ from dissect.target.filesystem import VirtualFilesystem from dissect.target.filesystems.tar import TarFilesystem from dissect.target.plugins.general import default +from dissect.target.plugins.os.unix._os import UnixPlugin from dissect.target.plugins.os.unix.log.messages import MessagesPlugin, MessagesRecord from tests._utils import absolute_path @@ -138,3 +139,29 @@ def test_unix_messages_cloud_init(target_unix: Target, fs_unix: VirtualFilesyste == "Cloud-init v. 1.2.3-4ubuntu5 running 'init-local' at Tue, 9 Aug 2005 11:55:21 +0000. Up 13.37 seconds." # noqa: E501 ) assert results[-1].source == "/var/log/installer/cloud-init.log.1.gz" + + +def test_unix_messages_ts_iso_8601_format(target_unix: Target, fs_unix: VirtualFilesystem) -> None: + """test if we correctly detect and parse ISO 8601 formatted syslog logs.""" + + fs_unix.map_file_fh("/etc/hostname", BytesIO(b"hostname")) + messages = """ + 2024-12-31T13:37:00.123456+02:00 hostname systemd[1]: Started anacron.service - Run anacron jobs. + 2024-12-31T13:37:00.123456+02:00 hostname anacron[1337]: Anacron 2.3 started on 2024-12-31 + 2024-12-31T13:37:00.123456+02:00 hostname anacron[1337]: Normal exit (0 jobs run) + 2024-12-31T13:37:00.123456+02:00 hostname systemd[1]: anacron.service: Deactivated successfully. + """ + fs_unix.map_file_fh("/var/log/syslog.1", BytesIO(gzip.compress(textwrap.dedent(messages).encode()))) + + target_unix.add_plugin(UnixPlugin) + target_unix.add_plugin(MessagesPlugin) + results = sorted(list(target_unix.syslog()), key=lambda r: r.ts) + + assert len(results) == 4 + + assert results[0].hostname == "hostname" + assert results[0].daemon == "systemd" + assert results[0].pid == 1 + assert results[0].ts == datetime(2024, 12, 31, 11, 37, 0, 123456, tzinfo=timezone.utc) + assert results[0].message == "Started anacron.service - Run anacron jobs." + assert results[0].source == "/var/log/syslog.1" From 65548b9d4582081b8cc16d3ea55fd608087fb355 Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Mon, 11 Nov 2024 11:22:28 +0100 Subject: [PATCH 2/4] implement review feedback --- dissect/target/plugins/os/unix/log/auth.py | 27 +------------------ dissect/target/plugins/os/unix/log/helpers.py | 24 +++++++++-------- .../target/plugins/os/unix/log/messages.py | 13 ++++----- 3 files changed, 21 insertions(+), 43 deletions(-) diff --git a/dissect/target/plugins/os/unix/log/auth.py b/dissect/target/plugins/os/unix/log/auth.py index 4be43260e..4cd234a3b 100644 --- a/dissect/target/plugins/os/unix/log/auth.py +++ b/dissect/target/plugins/os/unix/log/auth.py @@ -1,6 +1,5 @@ from __future__ import annotations -import itertools import logging import re from abc import ABC, abstractmethod @@ -12,10 +11,10 @@ from dissect.target import Target from dissect.target.exceptions import UnsupportedPluginError -from dissect.target.helpers.fsutil import open_decompress from dissect.target.helpers.record import DynamicDescriptor, TargetRecordDescriptor from dissect.target.helpers.utils import year_rollover_helper from dissect.target.plugin import Plugin, alias, export +from dissect.target.plugins.os.unix.log.helpers import is_iso_fmt, iso_readlines log = logging.getLogger(__name__) @@ -347,27 +346,3 @@ def authlog(self) -> Iterator[Any]: for ts, line in iterable: yield self._auth_log_builder.build_record(ts, auth_file, line) - - -def iso_readlines(file: Path) -> Iterator[tuple[datetime, str]]: - """Iterator reading the provided auth log file in ISO format. Mimics ``year_rollover_helper`` behaviour.""" - with open_decompress(file, "rt") as fh: - for line in fh: - if not (match := RE_TS_ISO.match(line)): - log.warning("No timestamp found in one of the lines in %s!", file) - log.debug("Skipping line: %s", line) - continue - - try: - ts = datetime.strptime(match[0], "%Y-%m-%dT%H:%M:%S.%f%z") - except ValueError as e: - log.warning("Unable to parse ISO timestamp in line: %s", line) - log.debug("", exc_info=e) - continue - - yield ts, line - - -def is_iso_fmt(file: Path) -> bool: - """Determine if the provided auth log file uses new ISO format logging or not.""" - return any(itertools.islice(iso_readlines(file), 0, 2)) diff --git a/dissect/target/plugins/os/unix/log/helpers.py b/dissect/target/plugins/os/unix/log/helpers.py index 6b9e40255..a6a652715 100644 --- a/dissect/target/plugins/os/unix/log/helpers.py +++ b/dissect/target/plugins/os/unix/log/helpers.py @@ -2,25 +2,28 @@ import logging import re from datetime import datetime +from pathlib import Path from typing import Iterator -from dissect.target.helpers.fsutil import TargetPath, open_decompress +from dissect.target.helpers.fsutil import open_decompress log = logging.getLogger(__name__) -_RE_TS = r"^[A-Za-z]{3}\s*[0-9]{1,2}\s[0-9]{1,2}:[0-9]{2}:[0-9]{2}" -_RE_TS_ISO = r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}\+\d{2}:\d{2}" - -RE_TS = re.compile(_RE_TS) -RE_TS_ISO = re.compile(_RE_TS_ISO) +RE_TS = re.compile(r"^[A-Za-z]{3}\s*\d{1,2}\s\d{1,2}:\d{2}:\d{2}") +RE_TS_ISO = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}\+\d{2}:\d{2}") RE_LINE = re.compile( - rf"(?P{_RE_TS}|{_RE_TS_ISO})\s(?P\S+)\s(?P\S+?)(\[(?P\d+)\])?:\s(?P.+)$" + r""" + \d{2}:\d{2}\s # First match on the similar ending of the different timestamps + (?:\S+)\s # The hostname, but do not capture it + (?P\S+?)(\[(?P\d+)\])?: # The daemon with optionally the PID between brackets + \s*(?P.+?)\s*$ # The log message stripped from spaces left and right + """, + re.VERBOSE, ) -def iso_readlines(file: TargetPath) -> Iterator[tuple[datetime, str]]: +def iso_readlines(file: Path) -> Iterator[tuple[datetime, str]]: """Iterator reading the provided log file in ISO format. Mimics ``year_rollover_helper`` behaviour.""" - with open_decompress(file, "rt") as fh: for line in fh: if not (match := RE_TS_ISO.match(line)): @@ -30,7 +33,6 @@ def iso_readlines(file: TargetPath) -> Iterator[tuple[datetime, str]]: try: ts = datetime.strptime(match[0], "%Y-%m-%dT%H:%M:%S.%f%z") - except ValueError as e: log.warning("Unable to parse ISO timestamp in line: %s", line) log.debug("", exc_info=e) @@ -39,6 +41,6 @@ def iso_readlines(file: TargetPath) -> Iterator[tuple[datetime, str]]: yield ts, line -def is_iso_fmt(file: TargetPath) -> bool: +def is_iso_fmt(file: Path) -> bool: """Determine if the provided log file uses ISO 8601 timestamp format logging or not.""" return any(itertools.islice(iso_readlines(file), 0, 2)) diff --git a/dissect/target/plugins/os/unix/log/messages.py b/dissect/target/plugins/os/unix/log/messages.py index 2c414d4ea..3b2f18bd8 100644 --- a/dissect/target/plugins/os/unix/log/messages.py +++ b/dissect/target/plugins/os/unix/log/messages.py @@ -11,7 +11,11 @@ from dissect.target.helpers.record import TargetRecordDescriptor from dissect.target.helpers.utils import year_rollover_helper from dissect.target.plugin import Plugin, alias, export -from dissect.target.plugins.os.unix.log.helpers import RE_LINE, is_iso_fmt, iso_readlines +from dissect.target.plugins.os.unix.log.helpers import ( + RE_LINE, + is_iso_fmt, + iso_readlines, +) MessagesRecord = TargetRecordDescriptor( "linux/log/messages", @@ -83,19 +87,16 @@ def messages(self) -> Iterator[MessagesRecord]: iterable = year_rollover_helper(log_file, RE_TS, DEFAULT_TS_LOG_FORMAT, tzinfo) for ts, line in iterable: - match = RE_LINE.match(line) + match = RE_LINE.search(line) if not match: self.target.log.warning("Unable to parse message line in %s", log_file) self.target.log.debug("Line %s", line) continue - values = match.groupdict() yield MessagesRecord( ts=ts, - daemon=values["service"], - pid=values["pid"], - message=values["message"], + **match.groupdict(), source=log_file, _target=self.target, ) From fc1471b28a2b01db3d01fee1bfa15765a999ece5 Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Thu, 21 Nov 2024 14:53:05 +0100 Subject: [PATCH 3/4] implement review comments --- dissect/target/plugins/os/unix/log/auth.py | 18 ++++++------------ dissect/target/plugins/os/unix/log/helpers.py | 2 +- dissect/target/plugins/os/unix/log/messages.py | 11 ++++------- tests/plugins/os/unix/log/test_messages.py | 6 +++--- 4 files changed, 14 insertions(+), 23 deletions(-) diff --git a/dissect/target/plugins/os/unix/log/auth.py b/dissect/target/plugins/os/unix/log/auth.py index 4cd234a3b..f7e37a804 100644 --- a/dissect/target/plugins/os/unix/log/auth.py +++ b/dissect/target/plugins/os/unix/log/auth.py @@ -14,21 +14,15 @@ from dissect.target.helpers.record import DynamicDescriptor, TargetRecordDescriptor from dissect.target.helpers.utils import year_rollover_helper from dissect.target.plugin import Plugin, alias, export -from dissect.target.plugins.os.unix.log.helpers import is_iso_fmt, iso_readlines +from dissect.target.plugins.os.unix.log.helpers import ( + RE_LINE, + RE_TS, + is_iso_fmt, + iso_readlines, +) log = logging.getLogger(__name__) -RE_TS = re.compile(r"^[A-Za-z]{3}\s*\d{1,2}\s\d{1,2}:\d{2}:\d{2}") -RE_TS_ISO = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}\+\d{2}:\d{2}") -RE_LINE = re.compile( - r""" - \d{2}:\d{2}\s # First match on the similar ending of the different timestamps - (?P\S+)\s # The hostname - (?P\S+?)(\[(?P\d+)\])?: # The service with optionally the PID between brackets - \s*(?P.+?)\s*$ # The log message stripped from spaces left and right - """, - re.VERBOSE, -) # Generic regular expressions RE_IPV4_ADDRESS = re.compile( diff --git a/dissect/target/plugins/os/unix/log/helpers.py b/dissect/target/plugins/os/unix/log/helpers.py index a6a652715..f37c4d8c3 100644 --- a/dissect/target/plugins/os/unix/log/helpers.py +++ b/dissect/target/plugins/os/unix/log/helpers.py @@ -15,7 +15,7 @@ r""" \d{2}:\d{2}\s # First match on the similar ending of the different timestamps (?:\S+)\s # The hostname, but do not capture it - (?P\S+?)(\[(?P\d+)\])?: # The daemon with optionally the PID between brackets + (?P\S+?)(\[(?P\d+)\])?: # The service / daemon with optionally the PID between brackets \s*(?P.+?)\s*$ # The log message stripped from spaces left and right """, re.VERBOSE, diff --git a/dissect/target/plugins/os/unix/log/messages.py b/dissect/target/plugins/os/unix/log/messages.py index 3b2f18bd8..43bfed43e 100644 --- a/dissect/target/plugins/os/unix/log/messages.py +++ b/dissect/target/plugins/os/unix/log/messages.py @@ -13,6 +13,7 @@ from dissect.target.plugin import Plugin, alias, export from dissect.target.plugins.os.unix.log.helpers import ( RE_LINE, + RE_TS, is_iso_fmt, iso_readlines, ) @@ -21,7 +22,7 @@ "linux/log/messages", [ ("datetime", "ts"), - ("string", "daemon"), + ("string", "service"), ("varint", "pid"), ("string", "message"), ("path", "source"), @@ -29,12 +30,8 @@ ) DEFAULT_TS_LOG_FORMAT = "%b %d %H:%M:%S" -RE_TS = re.compile(r"(\w+\s{1,2}\d+\s\d{2}:\d{2}:\d{2})") -RE_DAEMON = re.compile(r"^[^:]+:\d+:\d+[^\[\]:]+\s([^\[:]+)[\[|:]{1}") -RE_PID = re.compile(r"\w\[(\d+)\]") -RE_MSG = re.compile(r"[^:]+:\d+:\d+[^:]+:\s(.*)$") RE_CLOUD_INIT_LINE = re.compile( - r"^(?P\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}) - (?P.*)\[(?P\w+)\]\: (?P.*)$" + r"^(?P\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}) - (?P.*)\[(?P\w+)\]\: (?P.*)$" ) @@ -146,7 +143,7 @@ def _parse_cloud_init_log(self, log_file: Path, tzinfo: tzinfo | None = timezone yield MessagesRecord( ts=ts, - daemon=values["daemon"], + service=values["service"], pid=None, message=values["message"], source=log_file, diff --git a/tests/plugins/os/unix/log/test_messages.py b/tests/plugins/os/unix/log/test_messages.py index d5f35d606..33a259cfb 100644 --- a/tests/plugins/os/unix/log/test_messages.py +++ b/tests/plugins/os/unix/log/test_messages.py @@ -126,13 +126,13 @@ def test_unix_messages_cloud_init(target_unix: Target, fs_unix: VirtualFilesyste assert len(results) == 4 assert results[0].ts == datetime(2005, 8, 9, 11, 55, 21, 0, tzinfo=ZoneInfo("Europe/Amsterdam")) - assert results[0].daemon == "foo.py" + assert results[0].service == "foo.py" assert results[0].pid is None assert results[0].message == "This is a cloud-init message!" assert results[0].source == "/var/log/installer/cloud-init.log" assert results[-1].ts == datetime(2005, 8, 9, 11, 55, 21, 1_000, tzinfo=ZoneInfo("Europe/Amsterdam")) - assert results[-1].daemon == "util.py" + assert results[-1].service == "util.py" assert results[-1].pid is None assert ( results[-1].message @@ -160,7 +160,7 @@ def test_unix_messages_ts_iso_8601_format(target_unix: Target, fs_unix: VirtualF assert len(results) == 4 assert results[0].hostname == "hostname" - assert results[0].daemon == "systemd" + assert results[0].service == "systemd" assert results[0].pid == 1 assert results[0].ts == datetime(2024, 12, 31, 11, 37, 0, 123456, tzinfo=timezone.utc) assert results[0].message == "Started anacron.service - Run anacron jobs." From 51f4a0b997a74a5a30479bb731c92773d92ce632 Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Thu, 21 Nov 2024 14:59:47 +0100 Subject: [PATCH 4/4] fix dhcp ips --- dissect/target/plugins/os/unix/linux/network_managers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dissect/target/plugins/os/unix/linux/network_managers.py b/dissect/target/plugins/os/unix/linux/network_managers.py index d3471769b..4fb4def76 100644 --- a/dissect/target/plugins/os/unix/linux/network_managers.py +++ b/dissect/target/plugins/os/unix/linux/network_managers.py @@ -567,7 +567,7 @@ def records_enumerate(iterable: Iterable) -> Iterator[tuple[int, JournalRecord | continue # Debian and CentOS dhclient - if hasattr(record, "daemon") and record.daemon == "dhclient" and "bound to" in line: + if hasattr(record, "service") and record.service == "dhclient" and "bound to" in line: ip = line.split("bound to")[1].split(" ")[1].strip() ips.add(ip) continue