Skip to content

Commit

Permalink
Add "log4j" input format
Browse files Browse the repository at this point in the history
Only works for the most common "bracketed style" format:

2024-01-19 15:04:23,456 [main] INFO [com.example.Class] - Message
  • Loading branch information
dloss committed Jan 19, 2025
1 parent 0a46e41 commit 66aa3e4
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 0 deletions.
30 changes: 30 additions & 0 deletions klp.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,10 @@
"timestamp_key": "off",
"levels": {
"trace": "cyan",
"finest": "cyan",
"debug": "bright_cyan",
"finer": "bright_cyan",
"config": "bright_cyan",
"info": "bright_green",
"notice": "bright_green",
"warn": "bright_yellow",
Expand All @@ -141,6 +144,7 @@
"critical": "bright_red",
"emerg": "bright_red",
"emergency": "bright_red",
"severe": "bright_red",
},
"context_prefix": {
"before": "blue",
Expand Down Expand Up @@ -342,6 +346,7 @@ def build_globals_dict(modules: List[Any]) -> Dict[str, Any]:
parse_jsonl,
parse_clf,
parse_combined,
parse_log4j,
parse_unix,
parse_line,
parse_data,
Expand Down Expand Up @@ -771,6 +776,7 @@ def identity(x):
"jsonl": parse_jsonl,
"clf": parse_clf,
"combined": parse_combined,
"log4j": parse_log4j,
"unix": parse_unix,
"line": parse_line,
"ts1m": parse_ts1m,
Expand Down Expand Up @@ -1150,6 +1156,29 @@ def parse_combined(line):
return {}


def parse_log4j(line: str) -> dict:
"""Parse a Log4j formatted line into dictionary with string values.
Examples:
2024-01-19 15:04:23,456 [main] SEVERE [com.example.CustomerService] - Failed to connect
19-Jan-2024 15:04:23.456 [main] ERROR [com.example] - Message
"""
pattern = r"""
^\s*(?P<timestamp>[^\[]+?)\s+ # Timestamp (any non-bracket chars until whitespace)
\[(?P<thread>[^\]]+)\]\s+ # Thread name in brackets
(?P<level>[\w_]+)\s+ # Log level (word chars and underscore)
\[(?P<logger>[^\]]+)\]\s+-\s+ # Logger name in brackets
(?P<message>.+?) # Message (non-greedy)
\s*$ # Optional trailing whitespace
"""

match = re.match(pattern, line.strip(), re.VERBOSE)
if not match:
return {}

return {k: v.strip() for k, v in match.groupdict().items()}


def parse_unix(line: str) -> Dict[str, str]:
"""
Parse a line of log data in typical Unix server format into structured components.
Expand Down Expand Up @@ -3730,6 +3759,7 @@ def parse_args():
"table",
"clf",
"combined",
"log4j",
"unix",
"line",
"data",
Expand Down
47 changes: 47 additions & 0 deletions tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
parse_jsonl,
parse_clf,
parse_combined,
parse_log4j,
parse_unix,
)

Expand Down Expand Up @@ -77,6 +78,52 @@ def test_parse_combined():
assert parse_combined("Invalid line") == {}


def test_parse_log4j():
"""Test Log4j format parsing with various cases."""
# Standard line
line = "2024-01-19 15:04:23,456 [main] INFO [com.example.Service] - Started successfully"
result = parse_log4j(line)
assert result["timestamp"] == "2024-01-19 15:04:23,456"
assert result["thread"] == "main"
assert result["level"] == "INFO"
assert result["logger"] == "com.example.Service"
assert result["message"] == "Started successfully"

# Different timestamp format
line = "19-Jan-2024 15:04:23.456 [main] ERROR [com.example] - Failed to start"
result = parse_log4j(line)
assert result["timestamp"] == "19-Jan-2024 15:04:23.456"
assert result["level"] == "ERROR"

# Complex thread name
line = "2024-01-19 15:04:23,456 [http-nio-8080-exec-1] DEBUG [com.example] - Processing request"
result = parse_log4j(line)
assert result["thread"] == "http-nio-8080-exec-1"
assert result["level"] == "DEBUG"

# Complex logger name with version
line = (
"2024-01-19 15:04:23,456 [main] INFO [com.example.v2.api.Service] - API ready"
)
result = parse_log4j(line)
assert result["logger"] == "com.example.v2.api.Service"

# Message with special characters
line = '2024-01-19 15:04:23,456 [main] WARN [com.example] - User "john.doe" failed auth: error=Invalid credentials'
result = parse_log4j(line)
assert result["message"] == 'User "john.doe" failed auth: error=Invalid credentials'

# With extra whitespace
line = (
" 2024-01-19 15:04:23,456 [main] INFO [com.example] - Extra spaces "
)
result = parse_log4j(line)
assert result["message"] == "Extra spaces"

# Invalid format
assert parse_log4j("Invalid line") == {}


def test_parse_unix():
"""Test Unix syslog format parsing."""
# Standard format with PID
Expand Down

0 comments on commit 66aa3e4

Please sign in to comment.