forked from weizhongli/cdhit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
clstr2txt.pl
executable file
·57 lines (53 loc) · 1.18 KB
/
clstr2txt.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/perl
my $no = 0;
my $clstr_no = "";
my @this_cluster = ();
print "id clstr clstr_size length clstr_rep clstr_iden clstr_cov\n";
while($ll = <>) {
if ($ll =~ /^>/) {
if ($no>0) {
process_this_cluster();
}
if ($ll =~ /^>Cluster (\d+)/) {
$clstr_no = $1;
}
$no = 0;
@this_cluster = ();
}
else {
my ($id, $len, $rep, $iden);
if ($ll =~ /\d+\t(\d+)[a-z]{2}, >(.+)\.\.\. \*/) {
$len = $1;
$id = $2;
$rep = 1;
$iden = 100;
}
elsif ($ll =~ /\d+\t(\d+)[a-z]{2}, >(.+)\.\.\./) {
$len = $1;
$id = $2;
$rep = 0;
$ll=~/(\d+%|\d+\.\d+%)$/;
$iden = $1;
}
else {
print STDERR "***********\n";
}
push(@this_cluster, [($id, $len, $rep, $iden)]);
$no++;
}
}
if ($no>0) {
process_this_cluster();
}
sub process_this_cluster {
my ($i, $j, $k);
my @t = sort { ($b->[2] <=> $a->[2]) or ( $b->[1] <=> $a->[1]) } @this_cluster;
my $longest = 0;
foreach $i (@t) {
$longest = $i->[1] if ($i->[2]);
}
foreach $i (@t) {
my $cov = int ( $i->[1]/$longest * 100);
print "$i->[0]\t$clstr_no\t$no\t$i->[1]\t$i->[2]\t$i->[3]\t$cov\%\n";
}
}