-
Notifications
You must be signed in to change notification settings - Fork 2
/
getInSitesStat.pl
187 lines (149 loc) · 5.78 KB
/
getInSitesStat.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#!/usr/bin/perl
##---------------------------------------------------------------------------##
## File:
## @(#) getInSitesStat
## Author:
## Paul Thatcher Edlefsen pedlefse@fredhutch.org
## Description:
## Script for parsing output from the InformativeSites DiveIn
## tool (at the Mullins lab web site) to calculate the stat used
## to estimate the number of founders and perform sequence
## reconstruction of the founders. Takes two input files (first can be missing):
## getInSitesStat 123456789012.txt 123456789012_priv.txt
## By default it writes the output to stdout; use -o to write to a file.
## If the first file does not exist (no informative sites) use "-" for first argument.
##
###******************************************************************************
use Getopt::Std; # for getopts
use strict;
use vars qw( $opt_D $opt_V $opt_o $opt_O );
use vars qw( $VERBOSE $DEBUG );
sub getInSitesStat {
@ARGV = @_;
sub getInSitesStat_usage {
print "\tgetInSitesStat [-DV] [-(o|O) <Output File>] <jobnum.txt> <jobnum_priv.txt> [<Output File>]\n";
exit;
}
# This means -D, -o, -O, -V are ok, but nothin' else.
# opt_D means print debugging output.
# opt_o is an optional filename to put the output. Otherwise, STDOUT.
# opt_O is just like opt_o except it'll overwrite the file if it exists.
# opt_V means be verbose.
# But first reset the opt vars.
( $opt_D, $opt_V, $opt_o, $opt_O ) = ();
if( not getopts('DVo:O:') ) {
getInSitesStat_usage();
}
$DEBUG ||= $opt_D;
$VERBOSE ||= $opt_V;
my $old_autoflush;
if( $VERBOSE ) {
select STDOUT;
$old_autoflush = $|;
$| = 1; # Autoflush.
}
## Sometimes this file is missing, because there are no informative sites. We take "-" to indicate this condition.
my $informative_sites_file = shift @ARGV || getInSitesStat_usage();
my $private_sites_file = shift @ARGV || getInSitesStat_usage();
my $informative_no_gaps = 0; # default for when informative sites file is empty
if( ( $informative_sites_file eq '-' ) || ( !-e $informative_sites_file ) ) {
$informative_no_gaps = 0;
} else {
if( $VERBOSE ) { print "Opening file \"$informative_sites_file\".."; }
unless( open( INFORMATIVE_SITES_FILE_FH, $informative_sites_file ) ) {
warn "Unable to open informative_sites_file file \"$informative_sites_file\": $!";
return 1;
}
if( $VERBOSE ) { print ".done.\n"; }
if( $VERBOSE ) { print "Reading informative sites file.."; }
my ( $line );
while( $line = <INFORMATIVE_SITES_FILE_FH> ) {
( $line ) = ( $line =~ /^(.+?)\s*$/ ); # Chop and Chomp won't remove ^Ms
next unless( defined( $line ) && ( $line ne '' ) );
if( $VERBOSE ) { print "."; }
next unless( $line =~ /^Alignment/ );
( $informative_no_gaps ) = ( $line =~ /Alignment\s\d+\s(\d+)\s\d+/ );
last;
}
if( $VERBOSE ) { print ".done.\n"; }
if( $VERBOSE ) { print "Closing file \"$informative_sites_file\".."; }
close INFORMATIVE_SITES_FILE_FH;
if( $VERBOSE ) { print ".done.\n"; }
} # End if $informative_sites_file eq '-' .. else ..
my $private_no_gaps;
if( ( $private_sites_file eq '-' ) || ( !-e $private_sites_file ) ) {
$private_no_gaps = 0;
} else {
if( $VERBOSE ) { print "Opening file \"$private_sites_file\".."; }
unless( open( PRIVATE_SITES_FILE_FH, $private_sites_file ) ) {
warn "Unable to open private_sites_file file \"$private_sites_file\": $!";
return 1;
}
if( $VERBOSE ) { print ".done.\n"; }
if( $VERBOSE ) { print "Reading private sites file.."; }
my ( $line );
while( $line = <PRIVATE_SITES_FILE_FH> ) {
( $line ) = ( $line =~ /^(.+?)\s*$/ ); # Chop and Chomp won't remove ^Ms
next unless( defined( $line ) && ( $line ne '' ) );
if( $VERBOSE ) { print "."; }
next unless $line =~ /^Alignment/;
( $private_no_gaps ) = ( $line =~ /Alignment\s\d+\s(\d+)\s\d+/ );
last;
}
if( $VERBOSE ) { print ".done.\n"; }
if( $VERBOSE ) { print "Closing file \"$private_sites_file\".."; }
close PRIVATE_SITES_FILE_FH;
if( $VERBOSE ) { print ".done.\n"; }
}
if( $VERBOSE ) {
print "\$informative_no_gaps is $informative_no_gaps\n";
print "\$private_no_gaps is $private_no_gaps\n";
}
# Named for Morgane Rolland, whose advice lead us to consider this statistic.
my $morganes_stat;
if( $private_no_gaps == 0 ) {
if( $informative_no_gaps == 0 ) {
$morganes_stat = 0;
} else {
$morganes_stat = $informative_no_gaps; # pretend that $private_no_gaps == 1.
}
} else {
$morganes_stat = $informative_no_gaps / $private_no_gaps;
}
if( $VERBOSE ) {
print "\$morganes_stat is $morganes_stat\n";
}
my $output_file = $opt_o || $opt_O || shift @ARGV;
if( $output_file ) {
if( !$opt_O && -e $output_file ) {
print "The file \"$output_file\" exists. Use -O to overwrite.\n";
exit;
}
if( $VERBOSE ) { print "Opening file \"$output_file\" for writing.."; }
unless( open OUTPUT_FH, ">$output_file" ) {
warn "Unable to open output file \"$output_file\": $!";
return 1;
}
if( $VERBOSE ) { print ".done.\n"; }
} else {
unless( open OUTPUT_FH, ">&STDOUT" ) {
warn "Unable to rename STDOUT: $!";
return 1;
}
}
if( $VERBOSE ) {
print "Printing output..";
}
print OUTPUT_FH "$informative_no_gaps / $private_no_gaps = $morganes_stat\n";
if( $VERBOSE ) { print ".done.\n"; }
if( $VERBOSE && $output_file ) { print "Closing file \"$output_file\".."; }
close OUTPUT_FH;
if( $VERBOSE && $output_file ) { print ".done.\n"; }
if( $VERBOSE ) {
select STDOUT;
$| = $old_autoflush;
}
return 0;
} # getInSitesStat(..)
getInSitesStat( @ARGV );
1;