-
Notifications
You must be signed in to change notification settings - Fork 13
/
logseek
executable file
·304 lines (237 loc) · 8.14 KB
/
logseek
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
#!/usr/bin/perl
=head1 NAME
logseek - jump to a date/time within a large log file
=head1 SYNOPSIS
$ logseek --help
$ logseek -5 'yesterday 9am' /var/log/messages
=head1 DESCRIPTION
Uses a binary search to rapidly seek to a particular date/time within
a large log file.
=head1 BUGS
Doesn't support multiple files. Doesn't support many timestamp
formats.
=head1 SEE ALSO
grep(1), tail(1)
=head1 AUTHOR
Adam Spiers <logseek@adamspiers.org>
=cut
use strict;
use warnings;
use Date::Manip qw(ParseDate Date_Cmp);
use Fcntl ':seek';
use Getopt::Long;
use Search::Binary;
# If we jump less than 512 bytes, switch to sequential searching.
use constant SEQ_SEARCH_THRESHOLD => 512;
###############################################################################
# Parse options
my %opts = ( verbose => 0, strict => 0 );
# We rely on there being a timestamp at least every 500 lines.
my $unparsable_line_threshold = 500;
Getopt::Long::Configure("bundling");
# Convert -5 into -C5
foreach my $i (0 .. $#ARGV) {
$ARGV[$i] eq '--' and last;
$ARGV[$i] =~ s/^-(\d+)$/-C$1/ and last;
}
GetOptions(
\%opts,
'before|B=s', 'context|C=i', 'position|p', 'strict|s', 'verbose|v+',
'threshold|t=i',
) or usage();
$unparsable_line_threshold = $opts{'threshold'} if $opts{'threshold'};
usage("Invalid NUM '$opts{before}' for -b/--before")
unless ($opts{before} || '5') =~ /^\d+(b|k|m|G)?$/;
usage("Can't have both --before and --context")
if $opts{before} and $opts{context};
usage() if $opts{help} or @ARGV != 2;
my ($human_date, $log) = @ARGV;
my $date = ParseDate($human_date)
or usage ("Didn't understand date '$human_date'; aborting.\n");
my $fh;
open($fh, $log) or usage("open($log) failed: $!\n");
###############################################################################
# Main program
my $min = 0;
my $size = -s $fh;
my $pos = binary_search(
$min, # minimum search boundary
$size, # maximum search boundary
$date, # value we're looking for
\&log_read, # callback from Search::Binary to find it
$fh,
SEQ_SEARCH_THRESHOLD, # switch to sequential searching if we're this close
);
my $before = $opts{before} || $opts{context};
jump_back($fh, $before) if $before;
output_result($fh);
exit 0;
#################################################################################
sub log_read {
my ($fh, $val, $pos) = @_;
if ($pos) {
# read 1st whole rec starting at/after $pos
seek($fh, $pos - 1, SEEK_SET); # Jump to just before $pos
my $discard_this = <$fh>;
}
my $unparsable_lines = 0;
READLINE: {
my $newpos = tell($fh);
my $line = <$fh>;
unless (defined $line) {
# Search::Binary asks for the first arg to be positive iff the
# value we're looking for ($date) is strictly greater than the
# current record's. We hit EOF, so this must be the case.
warn "* Hit EOF while searching for $human_date\n" if $opts{verbose};
return (0, $newpos);
}
chomp $line;
warn "* Read: $line\n" if $opts{verbose} > 1;
my $rpos = readable_pos($newpos, $size);
my ($line_human_date, $line_date) = parse_date_from_line($line, $rpos);
unless ($line_date) {
# If we're not being strict, keep chewing lines till we find a
# date.
warn "* Ignoring line $rpos: $line\n" if $opts{verbose} > 1;
die "Hit threshold of " . $unparsable_line_threshold .
" lines without parsable timestamp; aborting.\n"
if ++$unparsable_lines >= $unparsable_line_threshold;
redo READLINE;
}
$unparsable_lines = 0;
# Now let Search::Binary do the hard work.
# Search::Binary asks for the first arg to be positive iff the
# value we're looking for ($date) is strictly greater than the
# current record's, 0 if equal, -1 if strictly less.
warn "* Jumped to: $line_human_date ($newpos bytes)\n" if $opts{verbose};
return (Date_Cmp($date, $line_date), $newpos);
}
}
sub parse_date_from_line {
my ($line, $rpos) = @_;
my $line_human_date = extract_date_from_line($line, $rpos);
if (! $line_human_date) {
die "Couldn't extract date from line @ $pos bytes\n$line\n"
if $opts{strict};
return;
}
warn "* Extracted date '$line_human_date' from line $rpos\n"
if $opts{verbose} > 2;
my $line_date = ParseDate($line_human_date);
if (! $line_date) {
die "Couldn't parse date $line_human_date on line $rpos\n"
if $opts{strict};
return;
}
return ($line_human_date, $line_date);
}
sub extract_date_from_line {
my ($line, $rpos) = @_;
# FIXME: make pattern "sticky"
# syslog, Apache error log
return $1 if $line =~ /^\[?([\w\s:]{10}[\d\s:]*)/;
# Apache access log
return $1 if $line =~ /^(?:\S+| ) \S+ \S+ \[(.+?)\]/;
# twiki log*.txt
return $1 if $line =~ /^\| (.\d \w\w\w \d{4} - .\d:\d\d)/;
# conserver log
return $1 if $line =~ /^\[(\w\w\w \w\w\w .\d \d\d:\d\d:\d\d \d\d\d\d)\]/;
# xend log
return $1 if $line =~ /^\[(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d) \d\d\d\d\]/;
# pk_backend_zypp (and probably others)
return $1 if $line =~ /^(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d) /;
# TODO: Add more patterns here.
return undef;
}
sub jump_back {
my ($fh, $how_much) = @_;
if ($how_much =~ /^(\d+)(b|k|M|G)$/) {
my $bytes = $2 eq 'b' ? $1
: $2 eq 'k' ? $1 * (2**10)
: $2 eq 'M' ? $1 * (2**20)
: $2 eq 'G' ? $1 * (2**30)
: die "BUG";
seek($fh, -$bytes - 1, SEEK_CUR);
warn "* Jumped back $bytes bytes\n" if $opts{verbose};
my $discard_this = <$fh>;
}
else {
eval {
require File::ReadBackwards;
};
if ($@) {
die "You need the File::ReadBackwards Perl module installed to do this.\n";
}
my $bw = File::ReadBackwards->new( $log );
my $pos = tell($fh);
warn "* Starting to read backwards from $pos bytes\n" if $opts{verbose};
seek($bw->get_handle, $pos, SEEK_SET);
# FIXME: VERY NAUGHTY! fiddle with File::ReadBackwards object
# internals because it doesn't natively support starting from
# anywhere but the end of the file. Doh!
$bw->{seek_pos} = $pos;
# Get size of first block to read; either a trailing partial one
# (the % size) or full sized one (max read size).
# Max is 8k, hardcoded in File::ReadBackwards as a lexical :-(
my $max_read_size = 1 << 13;
$bw->{read_size} = $pos % $max_read_size || $max_read_size;
# First read will get the line we're already on.
$bw->readline;
for (my $i = 0; $i < $how_much; $i++) {
my $line = $bw->readline;
chomp $line;
warn "* Read previous line: $line\n" if $opts{verbose} > 1;
$line or last;
}
seek($fh, $bw->tell, SEEK_SET);
}
}
sub readable_pos {
my ($pos, $size) = @_;
return sprintf "@ %d bytes (%.1f%%)", $pos, $pos / $size * 100;
}
sub output_result {
my ($fh) = @_;
if ($opts{position}) {
my $pos = tell $fh;
printf "%d %.1f%%\n", $pos, $pos / $size * 100;
}
else {
if ($opts{context}) {
my $c = 0;
while (<$fh>) {
print;
last if ++$c >= $opts{context} * 2;
}
}
else {
print while <$fh>;
}
}
}
sub usage {
warn @_, "\n" if @_;
die <<EOUSAGE;
Usage: $0 [options] DATE_EXPR LOGFILE
Using binary search to cope with huge files, outputs log file starting
at first line with a linestamp matching DATE_EXPR. Currently syslog,
Apache, twiki, and conserver linestamp formats are supported, but more
could easily be added.
DATE_EXPR can be things like:
May 1
yesterday 12am
2 weeks ago
2004/06/22
Run 'perldoc Date::Manip' and read the ParseDate section for more information.
Options:
-B, --before=NUM Print NUM lines of leading context before target line,
or NUM bytes/kB/MB/GB if NUM is suffixed with b/k/M/G
(N.B. there is no -A or --after, use head(1) instead.)
-C, --context=NUM Print NUM lines of context
-NUM Same as --context=NUM
-p, --position Output start position in bytes instead of the file itself
-s, --strict Abort if a non-date line is found
-t, --threshold=NUM Assume a timestamp at least every NUM lines
-v, --verbose See binary search in action (repeat to increase verbosity)
EOUSAGE
}