-
Notifications
You must be signed in to change notification settings - Fork 13
/
zipseqs
executable file
·105 lines (86 loc) · 2.22 KB
/
zipseqs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
my %opts;
GetOptions(\%opts, 'prefixes|p', 'format|f=s', 'eof|e') or usage();
sub usage {
die <<EOF;
Usage: $0 file1 file2 [...]
$0 [ -p | --prefixes ] prefix1 file1 prefix2 file2 [...]
$0 [ -f | --format=STR ] file1 file2 [...]
-e, --eof Abort as soon as EOF is reached on any file.
Take two or more sequences and 'zip' them together in the functional
programming sense. Defaults to one item per line. -p prefixes each
item with a specified prefix. -f allows an arbitrary output format,
with each '%s' substituted for an item in the zipped sequence, e.g.
the following are almost equivalent:
$0 -p prefix1 file1 prefix2 file2
$0 -f 'prefix1 %s\nprefix2 %s\n' file1 file2
except that the former would stop intelligently if file2 ran out of
items before file1 - there's no obvious general way of trimming a format
string intelligently when one list runs out before another.
EOF
}
usage() unless @ARGV;
my (@files, %prefixes);
if ($opts{prefixes}) {
usage() unless @ARGV % 2 == 0;
while (@ARGV) {
my $prefix = shift;
my $file = shift;
push @files, $file;
$prefixes{$file} = $prefix;
}
#$format = map "$prefixes{$_} %s\n", @files;
}
else {
@files = @ARGV;
}
$opts{format} =~ s/(\\([abefnrt]|x\d\d|0\d{2,3}))/qq["$1"]/gee if exists $opts{format};
my %fh;
foreach my $file (@files) {
open($fh{$file}, $file) or die "open($file) failed: $!\n";
}
my %eof;
OUTER:
while (1) {
my $out = $opts{format};
foreach my $i (0 .. $#files) {
my $file = $files[$i];
my $line = get_line($file);
unless (defined $line) {
last OUTER if $opts{eof};
next;
}
if (exists $opts{format}) {
$out =~ s/%s/$line/;
}
else {
print $prefixes{$file}, ' ' if $opts{prefixes};
print "$line\n" if $line;
}
}
# prune files which have reached EOF
@files = grep ! $eof{$_}, @files;
# print "@files\n";
last if @files == 0;
if ($opts{format}) {
$out =~ s/%s/EOF/g;
print $out;
}
# sleep 1;
}
sub get_line {
my ($file) = @_;
return undef if $eof{$file};
my $fh = $fh{$file};
my $line = <$fh>;
if (defined $line) {
chomp $line;
}
else {
$eof{$file}++;
}
return $line;
}