-
Notifications
You must be signed in to change notification settings - Fork 2
/
final_cleanup.pl
executable file
·44 lines (38 loc) · 1.25 KB
/
final_cleanup.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!usr/bin/perl -w
use strict;
die "Usage: perl $0 [.aln] [minimum coverage]\n" unless (@ARGV == 2);
open (FA, $ARGV[0]) or die "$ARGV[0] $!\n";
open OUT, ">$ARGV[0].cleanup" or die "$ARGV[0].cleanup $!\n";
open LST, ">$ARGV[0].cleanup.list" or die "$ARGV[0].cleanup.list $!\n";
open LOG, ">$ARGV[0].cleanup.log" or die "$ARGV[0].cleanup.log $!\n";
$/=">";
my $null = <FA>;
my $read = <FA>;
my @read = split /\n/, $read;
shift @read;
$read = join "", @read;
my $ref_len = length $read; # Calculate the total length of the alignment
close FA;
open (FA, $ARGV[0]) or die "$ARGV[0] $!\n";
$null = <FA>;
while(<FA>){
chomp;
my @raw = split /\n/;
my $name = shift @raw;
my $seq = join "", @raw;
my $base = $seq;
$base =~ s/\-//g;
my $len = length $base;
my $rate = $len/$ref_len; # Calculate the percentage of non-gap basepairs
if($rate >= $ARGV[1]){ # Keep the sequence if the percentage is higher than the threshold
print OUT ">$name\n$seq\n";
print LST "$name\n";
print LOG "$name\t$rate\n";
}else{
print LOG "$name\t$rate\t*\n";
print "\t$ARGV[0]\t$name\t$rate\t*\n";
}
}
close FA;
close OUT;
close LST;