forked from jiwoongbio/VAMPr
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathVAMP_fisher.filter.pl
79 lines (72 loc) · 2.2 KB
/
VAMP_fisher.filter.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# Author: Jiwoong Kim (jiwoongbio@gmail.com)
use strict;
use warnings;
local $SIG{__WARN__} = sub { die $_[0] };
use Getopt::Long qw(:config no_ignore_case);
GetOptions(
'h' => \(my $help = ''),
'a=f' => \(my $alpha = 0.05),
'D' => \(my $noDriveCount = ''),
'O' => \(my $noOddsratio = ''),
'm' => \(my $includeMutants = ''),
);
if($help || scalar(@ARGV) == 0) {
die <<EOF;
Usage: perl VAMP_fisher.filter.pl VAMP_fisher.txt > VAMP_fisher.filter.txt
Options: -h display this help message
-a FLOAT alpha, p-value cutoff
-D do not consider drive count
-O do not consider odds ratio
-m include mutants of selected clusters
EOF
}
my ($fisherFile) = @ARGV;
open(my $reader, $fisherFile);
my @columnList = ();
my @tokenHashList = ();
while(my $line = <$reader>) {
chomp($line);
if($line =~ s/^#//) {
@columnList = split(/\t/, $line);
print '#', join("\t", @columnList), "\n";
} else {
my %tokenHash = ();
@tokenHash{@columnList} = split(/\t/, $line);
if($noDriveCount || $tokenHash{'driveCount'}) {
if($tokenHash{'pvalue'} <= $alpha) {
$tokenHash{'selected'} = 1;
} elsif($noOddsratio eq '' && ($tokenHash{'oddsratio'} eq "Inf" || $tokenHash{'oddsratio'} == 0)) {
$tokenHash{'selected'} = 1;
}
}
push(@tokenHashList, \%tokenHash);
}
}
close($reader);
if($includeMutants) {
my %clusterPhenotypeHash = ();
foreach(grep {$_->{'selected'}} @tokenHashList) {
foreach my $genotype (split(/,/, $_->{'genotypes'})) {
(my $cluster = $genotype) =~ s/\|.*$//;
if($cluster eq $genotype) {
$clusterPhenotypeHash{$cluster} = $_->{'phenotype'};
}
}
}
foreach(grep {!$_->{'selected'}} @tokenHashList) {
my @genotypeList = ();
foreach my $genotype (split(/,/, $_->{'genotypes'})) {
(my $cluster = $genotype) =~ s/\|.*$//;
if($cluster ne $genotype) {
if(defined(my $clusterPhenotype = $clusterPhenotypeHash{$cluster})) {
push(@genotypeList, $genotype) if($_->{'phenotype'} ne $clusterPhenotype && $_->{'phenotype'} ne '');
}
}
}
if(@genotypeList) {
$_->{'genotypes'} = join(',', @genotypeList);
$_->{'selected'} = 1;
}
}
}
print join("\t", @$_{@columnList}), "\n" foreach(grep {$_->{'selected'}} @tokenHashList);