diff --git a/dissect/target/plugins/os/unix/linux/network_managers.py b/dissect/target/plugins/os/unix/linux/network_managers.py index d3471769b..4fb4def76 100644 --- a/dissect/target/plugins/os/unix/linux/network_managers.py +++ b/dissect/target/plugins/os/unix/linux/network_managers.py @@ -567,7 +567,7 @@ def records_enumerate(iterable: Iterable) -> Iterator[tuple[int, JournalRecord | continue # Debian and CentOS dhclient - if hasattr(record, "daemon") and record.daemon == "dhclient" and "bound to" in line: + if hasattr(record, "service") and record.service == "dhclient" and "bound to" in line: ip = line.split("bound to")[1].split(" ")[1].strip() ips.add(ip) continue diff --git a/dissect/target/plugins/os/unix/log/auth.py b/dissect/target/plugins/os/unix/log/auth.py index 4be43260e..f7e37a804 100644 --- a/dissect/target/plugins/os/unix/log/auth.py +++ b/dissect/target/plugins/os/unix/log/auth.py @@ -1,6 +1,5 @@ from __future__ import annotations -import itertools import logging import re from abc import ABC, abstractmethod @@ -12,24 +11,18 @@ from dissect.target import Target from dissect.target.exceptions import UnsupportedPluginError -from dissect.target.helpers.fsutil import open_decompress from dissect.target.helpers.record import DynamicDescriptor, TargetRecordDescriptor from dissect.target.helpers.utils import year_rollover_helper from dissect.target.plugin import Plugin, alias, export +from dissect.target.plugins.os.unix.log.helpers import ( + RE_LINE, + RE_TS, + is_iso_fmt, + iso_readlines, +) log = logging.getLogger(__name__) -RE_TS = re.compile(r"^[A-Za-z]{3}\s*\d{1,2}\s\d{1,2}:\d{2}:\d{2}") -RE_TS_ISO = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}\+\d{2}:\d{2}") -RE_LINE = re.compile( - r""" - \d{2}:\d{2}\s # First match on the similar ending of the different timestamps - (?P\S+)\s # The hostname - (?P\S+?)(\[(?P\d+)\])?: # The service with optionally the PID between brackets - \s*(?P.+?)\s*$ # The log message stripped from spaces left and right - """, - re.VERBOSE, -) # Generic regular expressions RE_IPV4_ADDRESS = re.compile( @@ -347,27 +340,3 @@ def authlog(self) -> Iterator[Any]: for ts, line in iterable: yield self._auth_log_builder.build_record(ts, auth_file, line) - - -def iso_readlines(file: Path) -> Iterator[tuple[datetime, str]]: - """Iterator reading the provided auth log file in ISO format. Mimics ``year_rollover_helper`` behaviour.""" - with open_decompress(file, "rt") as fh: - for line in fh: - if not (match := RE_TS_ISO.match(line)): - log.warning("No timestamp found in one of the lines in %s!", file) - log.debug("Skipping line: %s", line) - continue - - try: - ts = datetime.strptime(match[0], "%Y-%m-%dT%H:%M:%S.%f%z") - except ValueError as e: - log.warning("Unable to parse ISO timestamp in line: %s", line) - log.debug("", exc_info=e) - continue - - yield ts, line - - -def is_iso_fmt(file: Path) -> bool: - """Determine if the provided auth log file uses new ISO format logging or not.""" - return any(itertools.islice(iso_readlines(file), 0, 2)) diff --git a/dissect/target/plugins/os/unix/log/helpers.py b/dissect/target/plugins/os/unix/log/helpers.py new file mode 100644 index 000000000..f37c4d8c3 --- /dev/null +++ b/dissect/target/plugins/os/unix/log/helpers.py @@ -0,0 +1,46 @@ +import itertools +import logging +import re +from datetime import datetime +from pathlib import Path +from typing import Iterator + +from dissect.target.helpers.fsutil import open_decompress + +log = logging.getLogger(__name__) + +RE_TS = re.compile(r"^[A-Za-z]{3}\s*\d{1,2}\s\d{1,2}:\d{2}:\d{2}") +RE_TS_ISO = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}\+\d{2}:\d{2}") +RE_LINE = re.compile( + r""" + \d{2}:\d{2}\s # First match on the similar ending of the different timestamps + (?:\S+)\s # The hostname, but do not capture it + (?P\S+?)(\[(?P\d+)\])?: # The service / daemon with optionally the PID between brackets + \s*(?P.+?)\s*$ # The log message stripped from spaces left and right + """, + re.VERBOSE, +) + + +def iso_readlines(file: Path) -> Iterator[tuple[datetime, str]]: + """Iterator reading the provided log file in ISO format. Mimics ``year_rollover_helper`` behaviour.""" + with open_decompress(file, "rt") as fh: + for line in fh: + if not (match := RE_TS_ISO.match(line)): + log.warning("No timestamp found in one of the lines in %s!", file) + log.debug("Skipping line: %s", line) + continue + + try: + ts = datetime.strptime(match[0], "%Y-%m-%dT%H:%M:%S.%f%z") + except ValueError as e: + log.warning("Unable to parse ISO timestamp in line: %s", line) + log.debug("", exc_info=e) + continue + + yield ts, line + + +def is_iso_fmt(file: Path) -> bool: + """Determine if the provided log file uses ISO 8601 timestamp format logging or not.""" + return any(itertools.islice(iso_readlines(file), 0, 2)) diff --git a/dissect/target/plugins/os/unix/log/messages.py b/dissect/target/plugins/os/unix/log/messages.py index 37f80f504..43bfed43e 100644 --- a/dissect/target/plugins/os/unix/log/messages.py +++ b/dissect/target/plugins/os/unix/log/messages.py @@ -11,12 +11,18 @@ from dissect.target.helpers.record import TargetRecordDescriptor from dissect.target.helpers.utils import year_rollover_helper from dissect.target.plugin import Plugin, alias, export +from dissect.target.plugins.os.unix.log.helpers import ( + RE_LINE, + RE_TS, + is_iso_fmt, + iso_readlines, +) MessagesRecord = TargetRecordDescriptor( "linux/log/messages", [ ("datetime", "ts"), - ("string", "daemon"), + ("string", "service"), ("varint", "pid"), ("string", "message"), ("path", "source"), @@ -24,12 +30,8 @@ ) DEFAULT_TS_LOG_FORMAT = "%b %d %H:%M:%S" -RE_TS = re.compile(r"(\w+\s{1,2}\d+\s\d{2}:\d{2}:\d{2})") -RE_DAEMON = re.compile(r"^[^:]+:\d+:\d+[^\[\]:]+\s([^\[:]+)[\[|:]{1}") -RE_PID = re.compile(r"\w\[(\d+)\]") -RE_MSG = re.compile(r"[^:]+:\d+:\d+[^:]+:\s(.*)$") RE_CLOUD_INIT_LINE = re.compile( - r"^(?P\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}) - (?P.*)\[(?P\w+)\]\: (?P.*)$" + r"^(?P\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}) - (?P.*)\[(?P\w+)\]\: (?P.*)$" ) @@ -56,7 +58,7 @@ def check_compatible(self) -> None: def messages(self) -> Iterator[MessagesRecord]: """Return contents of /var/log/messages*, /var/log/syslog* and cloud-init logs. - Due to year rollover detection, the contents of the files are returned in reverse. + Due to year rollover detection, the log contents could be returned in reversed or mixed chronological order. The messages log file holds information about a variety of events such as the system error messages, system startups and shutdowns, change in the network configuration, etc. Aims to store valuable, non-debug and @@ -75,16 +77,23 @@ def messages(self) -> Iterator[MessagesRecord]: yield from self._parse_cloud_init_log(log_file, tzinfo) continue - for ts, line in year_rollover_helper(log_file, RE_TS, DEFAULT_TS_LOG_FORMAT, tzinfo): - daemon = dict(enumerate(RE_DAEMON.findall(line))).get(0) - pid = dict(enumerate(RE_PID.findall(line))).get(0) - message = dict(enumerate(RE_MSG.findall(line))).get(0, line) + if is_iso_fmt(log_file): + iterable = iso_readlines(log_file) + + else: + iterable = year_rollover_helper(log_file, RE_TS, DEFAULT_TS_LOG_FORMAT, tzinfo) + + for ts, line in iterable: + match = RE_LINE.search(line) + + if not match: + self.target.log.warning("Unable to parse message line in %s", log_file) + self.target.log.debug("Line %s", line) + continue yield MessagesRecord( ts=ts, - daemon=daemon, - pid=pid, - message=message, + **match.groupdict(), source=log_file, _target=self.target, ) @@ -134,7 +143,7 @@ def _parse_cloud_init_log(self, log_file: Path, tzinfo: tzinfo | None = timezone yield MessagesRecord( ts=ts, - daemon=values["daemon"], + service=values["service"], pid=None, message=values["message"], source=log_file, diff --git a/tests/plugins/os/unix/log/test_messages.py b/tests/plugins/os/unix/log/test_messages.py index 08c297839..33a259cfb 100644 --- a/tests/plugins/os/unix/log/test_messages.py +++ b/tests/plugins/os/unix/log/test_messages.py @@ -10,6 +10,7 @@ from dissect.target.filesystem import VirtualFilesystem from dissect.target.filesystems.tar import TarFilesystem from dissect.target.plugins.general import default +from dissect.target.plugins.os.unix._os import UnixPlugin from dissect.target.plugins.os.unix.log.messages import MessagesPlugin, MessagesRecord from tests._utils import absolute_path @@ -125,16 +126,42 @@ def test_unix_messages_cloud_init(target_unix: Target, fs_unix: VirtualFilesyste assert len(results) == 4 assert results[0].ts == datetime(2005, 8, 9, 11, 55, 21, 0, tzinfo=ZoneInfo("Europe/Amsterdam")) - assert results[0].daemon == "foo.py" + assert results[0].service == "foo.py" assert results[0].pid is None assert results[0].message == "This is a cloud-init message!" assert results[0].source == "/var/log/installer/cloud-init.log" assert results[-1].ts == datetime(2005, 8, 9, 11, 55, 21, 1_000, tzinfo=ZoneInfo("Europe/Amsterdam")) - assert results[-1].daemon == "util.py" + assert results[-1].service == "util.py" assert results[-1].pid is None assert ( results[-1].message == "Cloud-init v. 1.2.3-4ubuntu5 running 'init-local' at Tue, 9 Aug 2005 11:55:21 +0000. Up 13.37 seconds." # noqa: E501 ) assert results[-1].source == "/var/log/installer/cloud-init.log.1.gz" + + +def test_unix_messages_ts_iso_8601_format(target_unix: Target, fs_unix: VirtualFilesystem) -> None: + """test if we correctly detect and parse ISO 8601 formatted syslog logs.""" + + fs_unix.map_file_fh("/etc/hostname", BytesIO(b"hostname")) + messages = """ + 2024-12-31T13:37:00.123456+02:00 hostname systemd[1]: Started anacron.service - Run anacron jobs. + 2024-12-31T13:37:00.123456+02:00 hostname anacron[1337]: Anacron 2.3 started on 2024-12-31 + 2024-12-31T13:37:00.123456+02:00 hostname anacron[1337]: Normal exit (0 jobs run) + 2024-12-31T13:37:00.123456+02:00 hostname systemd[1]: anacron.service: Deactivated successfully. + """ + fs_unix.map_file_fh("/var/log/syslog.1", BytesIO(gzip.compress(textwrap.dedent(messages).encode()))) + + target_unix.add_plugin(UnixPlugin) + target_unix.add_plugin(MessagesPlugin) + results = sorted(list(target_unix.syslog()), key=lambda r: r.ts) + + assert len(results) == 4 + + assert results[0].hostname == "hostname" + assert results[0].service == "systemd" + assert results[0].pid == 1 + assert results[0].ts == datetime(2024, 12, 31, 11, 37, 0, 123456, tzinfo=timezone.utc) + assert results[0].message == "Started anacron.service - Run anacron jobs." + assert results[0].source == "/var/log/syslog.1"