-
Notifications
You must be signed in to change notification settings - Fork 2
/
extract_fields
executable file
·68 lines (54 loc) · 1.56 KB
/
extract_fields
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env perl -w
use strict;
use Getopt::Long;
use FileHandle;
use Text::CSV;
my $FIELDS_SEPARATOR = ' ';
my ($INFILE, $OUTFILE) = ('-', '-');
GetOptions(
'field-separator=s' => \$FIELDS_SEPARATOR,
'input-file=s' => \$INFILE,
'output-file=s' => \$OUTFILE
);
my ($ID, $TAGS) = @ARGV;
die "Usage: $0 [--field-separator=<sep>] <id column name> <fields column name>" unless $TAGS;
my %CSV_OPTS = (
'binary' => 1,
'quote_char' => '"',
'escape_char' => '"');
my $CSV = Text::CSV->new(\%CSV_OPTS) or die Text::CSV->error_diag;
sub main() {
my $fh_in = (!$INFILE || $INFILE eq '-')
? *STDIN
: new FileHandle($INFILE, 'r');
die "Cannot open input '$INFILE': $!" unless $fh_in;
my $fh_out = (!$OUTFILE || $OUTFILE eq '-')
? *STDOUT
: new FileHandle($OUTFILE, 'w');
die "Cannot open output '$OUTFILE': $!" unless $fh_out;
my @headers = @{ $CSV->getline($fh_in) or die Text::CSV->error_diag };
$CSV->column_names(@headers);
out($fh_out, $ID, $TAGS);
until ($CSV->eof) {
my $hr = $CSV->getline_hr($fh_in) or do {
last if $CSV->eof;
warn Text::CSV->error_diag;
next;
};
my @tags;
if ($hr->{$TAGS}) {
@tags = split /$FIELDS_SEPARATOR+/, $hr->{$TAGS};
} else {
@tags = ( "N/A" );
}
foreach my $t (@tags) {
out($fh_out, $hr->{$ID}, $t);
}
}
}
sub out($;@) {
my $fh = shift;
$CSV->combine(@_);
print $fh $CSV->string . "\n";
}
main();