Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for ISO 8601 timestamps in syslogs #907

Merged
merged 8 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 1 addition & 26 deletions dissect/target/plugins/os/unix/log/auth.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import itertools
import logging
import re
from abc import ABC, abstractmethod
Expand All @@ -12,10 +11,10 @@

from dissect.target import Target
from dissect.target.exceptions import UnsupportedPluginError
from dissect.target.helpers.fsutil import open_decompress
from dissect.target.helpers.record import DynamicDescriptor, TargetRecordDescriptor
from dissect.target.helpers.utils import year_rollover_helper
from dissect.target.plugin import Plugin, alias, export
from dissect.target.plugins.os.unix.log.helpers import is_iso_fmt, iso_readlines

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -347,27 +346,3 @@ def authlog(self) -> Iterator[Any]:

for ts, line in iterable:
yield self._auth_log_builder.build_record(ts, auth_file, line)


def iso_readlines(file: Path) -> Iterator[tuple[datetime, str]]:
"""Iterator reading the provided auth log file in ISO format. Mimics ``year_rollover_helper`` behaviour."""
with open_decompress(file, "rt") as fh:
for line in fh:
if not (match := RE_TS_ISO.match(line)):
log.warning("No timestamp found in one of the lines in %s!", file)
log.debug("Skipping line: %s", line)
continue

try:
ts = datetime.strptime(match[0], "%Y-%m-%dT%H:%M:%S.%f%z")
except ValueError as e:
log.warning("Unable to parse ISO timestamp in line: %s", line)
log.debug("", exc_info=e)
continue

yield ts, line


def is_iso_fmt(file: Path) -> bool:
"""Determine if the provided auth log file uses new ISO format logging or not."""
return any(itertools.islice(iso_readlines(file), 0, 2))
46 changes: 46 additions & 0 deletions dissect/target/plugins/os/unix/log/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import itertools
import logging
import re
from datetime import datetime
from pathlib import Path
from typing import Iterator

from dissect.target.helpers.fsutil import open_decompress

log = logging.getLogger(__name__)

RE_TS = re.compile(r"^[A-Za-z]{3}\s*\d{1,2}\s\d{1,2}:\d{2}:\d{2}")
RE_TS_ISO = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}\+\d{2}:\d{2}")
RE_LINE = re.compile(
r"""
\d{2}:\d{2}\s # First match on the similar ending of the different timestamps
(?:\S+)\s # The hostname, but do not capture it
(?P<daemon>\S+?)(\[(?P<pid>\d+)\])?: # The daemon with optionally the PID between brackets
\s*(?P<message>.+?)\s*$ # The log message stripped from spaces left and right
""",
re.VERBOSE,
)


def iso_readlines(file: Path) -> Iterator[tuple[datetime, str]]:
"""Iterator reading the provided log file in ISO format. Mimics ``year_rollover_helper`` behaviour."""
with open_decompress(file, "rt") as fh:
for line in fh:
if not (match := RE_TS_ISO.match(line)):
log.warning("No timestamp found in one of the lines in %s!", file)
log.debug("Skipping line: %s", line)
continue

try:
ts = datetime.strptime(match[0], "%Y-%m-%dT%H:%M:%S.%f%z")
except ValueError as e:
log.warning("Unable to parse ISO timestamp in line: %s", line)
log.debug("", exc_info=e)
continue

yield ts, line


def is_iso_fmt(file: Path) -> bool:
"""Determine if the provided log file uses ISO 8601 timestamp format logging or not."""
return any(itertools.islice(iso_readlines(file), 0, 2))
28 changes: 20 additions & 8 deletions dissect/target/plugins/os/unix/log/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
from dissect.target.helpers.record import TargetRecordDescriptor
from dissect.target.helpers.utils import year_rollover_helper
from dissect.target.plugin import Plugin, alias, export
from dissect.target.plugins.os.unix.log.helpers import (
RE_LINE,
is_iso_fmt,
iso_readlines,
)

MessagesRecord = TargetRecordDescriptor(
"linux/log/messages",
Expand Down Expand Up @@ -56,7 +61,7 @@ def check_compatible(self) -> None:
def messages(self) -> Iterator[MessagesRecord]:
"""Return contents of /var/log/messages*, /var/log/syslog* and cloud-init logs.

Due to year rollover detection, the contents of the files are returned in reverse.
Due to year rollover detection, the log contents could be returned in reversed or mixed chronological order.

The messages log file holds information about a variety of events such as the system error messages, system
startups and shutdowns, change in the network configuration, etc. Aims to store valuable, non-debug and
Expand All @@ -75,16 +80,23 @@ def messages(self) -> Iterator[MessagesRecord]:
yield from self._parse_cloud_init_log(log_file, tzinfo)
continue

for ts, line in year_rollover_helper(log_file, RE_TS, DEFAULT_TS_LOG_FORMAT, tzinfo):
daemon = dict(enumerate(RE_DAEMON.findall(line))).get(0)
pid = dict(enumerate(RE_PID.findall(line))).get(0)
message = dict(enumerate(RE_MSG.findall(line))).get(0, line)
JSCU-CNI marked this conversation as resolved.
Show resolved Hide resolved
if is_iso_fmt(log_file):
iterable = iso_readlines(log_file)

JSCU-CNI marked this conversation as resolved.
Show resolved Hide resolved
else:
iterable = year_rollover_helper(log_file, RE_TS, DEFAULT_TS_LOG_FORMAT, tzinfo)
JSCU-CNI marked this conversation as resolved.
Show resolved Hide resolved

for ts, line in iterable:
match = RE_LINE.search(line)

if not match:
self.target.log.warning("Unable to parse message line in %s", log_file)
self.target.log.debug("Line %s", line)
continue

yield MessagesRecord(
ts=ts,
daemon=daemon,
pid=pid,
message=message,
**match.groupdict(),
source=log_file,
_target=self.target,
)
Expand Down
27 changes: 27 additions & 0 deletions tests/plugins/os/unix/log/test_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from dissect.target.filesystem import VirtualFilesystem
from dissect.target.filesystems.tar import TarFilesystem
from dissect.target.plugins.general import default
from dissect.target.plugins.os.unix._os import UnixPlugin
from dissect.target.plugins.os.unix.log.messages import MessagesPlugin, MessagesRecord
from tests._utils import absolute_path

Expand Down Expand Up @@ -138,3 +139,29 @@ def test_unix_messages_cloud_init(target_unix: Target, fs_unix: VirtualFilesyste
== "Cloud-init v. 1.2.3-4ubuntu5 running 'init-local' at Tue, 9 Aug 2005 11:55:21 +0000. Up 13.37 seconds." # noqa: E501
)
assert results[-1].source == "/var/log/installer/cloud-init.log.1.gz"


def test_unix_messages_ts_iso_8601_format(target_unix: Target, fs_unix: VirtualFilesystem) -> None:
"""test if we correctly detect and parse ISO 8601 formatted syslog logs."""

fs_unix.map_file_fh("/etc/hostname", BytesIO(b"hostname"))
messages = """
2024-12-31T13:37:00.123456+02:00 hostname systemd[1]: Started anacron.service - Run anacron jobs.
2024-12-31T13:37:00.123456+02:00 hostname anacron[1337]: Anacron 2.3 started on 2024-12-31
2024-12-31T13:37:00.123456+02:00 hostname anacron[1337]: Normal exit (0 jobs run)
2024-12-31T13:37:00.123456+02:00 hostname systemd[1]: anacron.service: Deactivated successfully.
"""
fs_unix.map_file_fh("/var/log/syslog.1", BytesIO(gzip.compress(textwrap.dedent(messages).encode())))

target_unix.add_plugin(UnixPlugin)
target_unix.add_plugin(MessagesPlugin)
results = sorted(list(target_unix.syslog()), key=lambda r: r.ts)

assert len(results) == 4

assert results[0].hostname == "hostname"
assert results[0].daemon == "systemd"
assert results[0].pid == 1
assert results[0].ts == datetime(2024, 12, 31, 11, 37, 0, 123456, tzinfo=timezone.utc)
assert results[0].message == "Started anacron.service - Run anacron jobs."
assert results[0].source == "/var/log/syslog.1"