-
Notifications
You must be signed in to change notification settings - Fork 2
/
clusterProfillicAlignmentProfiles.pl
112 lines (91 loc) · 3.45 KB
/
clusterProfillicAlignmentProfiles.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/perl
##---------------------------------------------------------------------------##
## File:
## @(#) clusterProfillicAlignmentProfiles
## Author:
## Paul Thatcher Edlefsen pedlefse@fredhutch.org
## Description:
## Script for using Profile HMMs to cluster sequences.
## By default it writes the output to
## stdout; use -o to write to a file. Note that this calls runProfillic.pl
## to create a file of alignment profiles. This file is then broken into
## input-sequence-specfiic alignment profile files. And then this
## calls out to the R script clusterProfillicAlignmentProfiles.R.
##
###******************************************************************************
use Getopt::Std; # for getopts
use Text::Wrap; # for wrap
$Text::Wrap::columns = 72;# TODO: DEHACKIFY MAGIC #
use strict;
use vars qw( $opt_D $opt_V $opt_f );
use vars qw( $VERBOSE $DEBUG );
sub clusterProfillicAlignmentProfiles {
@ARGV = @_;
sub clusterProfillicAlignmentProfiles_usage {
print "\tclusterProfillicAlignmentProfiles [-DVf] <input_fasta_filename> <profillic_alignment_profile_files_list_file> [<output dir>]\n";
exit;
}
# This means -D, -V, and -f are ok, but nothin' else.
# opt_f means don't actually cluster them; instead put them all into one cluster ("cluster 0").
# opt_D means print debugging output.
# opt_V means be verbose.
# But first reset the opt vars.
( $opt_D, $opt_V, $opt_f ) = ();
if( not getopts('DVf') ) {
clusterProfillicAlignmentProfiles_usage();
}
$DEBUG ||= $opt_D;
$VERBOSE ||= $opt_V;
my $force_one_cluster = $opt_f;
my $old_autoflush;
if( $VERBOSE ) {
select STDOUT;
$old_autoflush = $|;
$| = 1; # Autoflush.
}
my $input_fasta_file = shift @ARGV || clusterProfillicAlignmentProfiles_usage();
my ( $input_fasta_file_path, $input_fasta_file_short ) =
( $input_fasta_file =~ /^(.*?)\/([^\/]+)$/ );
unless( $input_fasta_file_short ) {
$input_fasta_file_short = $input_fasta_file;
$input_fasta_file_path = ".";
}
my $alignment_profile_files_list_file = shift @ARGV || clusterProfillicAlignmentProfiles_usage();
my $output_dir = shift @ARGV || $input_fasta_file_path;
# Remove the trailing "/" if any
if( defined( $output_dir ) ) {
( $output_dir ) = ( $output_dir =~ /^(.*[^\/])\/*$/ );
}
if( $VERBOSE ) { print "Output will be written in directory \"$output_dir\".."; }
if( !-e $output_dir ) {
`mkdir $output_dir`;
}
if( $VERBOSE ) {
print "Calling R to cluster the alignment profiles..";
if( $force_one_cluster ) {
print( "Forcing one cluster..\n" );
} else {
print( "Clustering..\n" );
}
}
my $R_output = `export clusterProfillicAlignmentProfiles_forceOneCluster="$force_one_cluster"; export clusterProfillicAlignmentProfiles_alignmentProfileFilesListFilename="$alignment_profile_files_list_file"; export clusterProfillicAlignmentProfiles_fastaFilename="$input_fasta_file"; export clusterProfillicAlignmentProfiles_outputDir="$output_dir"; R -f clusterProfillicAlignmentProfiles.R --vanilla --slave`;
# The output has the number of clusters.
print( $R_output );
if( $VERBOSE ) {
print "\t.done.\n";
}
if( $VERBOSE ) {
select STDOUT;
$| = $old_autoflush;
}
if( $VERBOSE ) {
print ".Done.\n";
}
if( $VERBOSE ) {
select STDOUT;
$| = $old_autoflush;
}
return 0;
} # clusterProfillicAlignmentProfiles(..)
clusterProfillicAlignmentProfiles( @ARGV );
1;