-
Notifications
You must be signed in to change notification settings - Fork 10
/
run-dipcall
executable file
·137 lines (118 loc) · 4.88 KB
/
run-dipcall
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/usr/bin/env perl
use strict;
use warnings;
use Getopt::Std;
my $version = "0.3";
my %opts = (t=>8);
getopts("t:d:x:muz:W:a", \%opts);
die("Usage: run-dipcall [options] <prefix> <ref.fa> <pat.fa> <mat.fa>
Options:
-t INT number of threads [$opts{t}]
-d FILE unimap/minimap2 index for ref.fa []
-a call on all contigs regardless of naming
-x FILE PAR on chrX; assuming male
-z INT Z-drop [mapper default]
-m use minimap2 for mapping (default)
-u use unimap for mapping
-W FILE repetitive k-mer list; use winnowmap for mapping
") if @ARGV < 4;
# test file existence
my $pre = shift(@ARGV);
my $ref = shift(@ARGV);
my @hap = @ARGV;
die "ERROR: failed to read the reference file '$ref'\n" unless -f $ref;
die "ERROR: please index the reference with 'samtools faidx'\n" unless -f "$ref.fai";
die "ERROR: failed to read the 1st haplotype '$hap[0]'\n" unless -f $hap[0];
die "ERROR: failed to read the 2nd haplotype '$hap[1]'\n" unless -f $hap[1];
my $is_male = defined($opts{x})? 1 : 0;
if (defined $opts{x}) {
die "ERROR: failed to read PAR\n" unless -f $opts{x};
$is_male = 1;
}
# find the root directory
my $exepath = $0 =~/^\S+\/[^\/\s]+/? $0 : &which($0);
my $root = $0 =~/^(\S+)\/[^\/\s]+/? $1 : undef;
$root = $exepath =~/^(\S+)\/[^\/\s]+/? $1 : undef if !defined($root);
die "ERROR: failed to locate the root directory\n" if !defined($root);
# mapper settings
my $mapper = "minimap2";
my $mm2_opt = q/-xasm5 --cs -t$(N_THREADS)/;
if (defined $opts{u}) {
$mapper = "unimap";
} elsif (defined $opts{W}) {
$mapper = "winnowmap";
$mm2_opt .= " -r2k -W $opts{W}";
}
$mapper = "minimap2" if defined($opts{m});
$mm2_opt .= " -z$opts{z},200" if defined($opts{z});
my $mm2_idx = defined($opts{d})? $opts{d} : $ref;
# write Makefile
my @mak = ();
push(@mak, "ROOT=$root");
push(@mak, "N_THREADS=$opts{t}");
push(@mak, "MAPPER=$mapper");
push(@mak, "MM2_IDX=$mm2_idx");
push(@mak, "MM2_OPT=$mm2_opt");
push(@mak, "REF_FA=$ref");
push(@mak, "");
push(@mak, "all:$pre.dip.bed $pre.dip.vcf.gz", "");
push(@mak, "$pre.hap1.paf.gz:$hap[0]");
push(@mak, q{ $(ROOT)/$(MAPPER) -c --paf-no-hit $(MM2_OPT) $(MM2_IDX) $< 2> $@.log | gzip > $@});
push(@mak, "$pre.hap2.paf.gz:$hap[1]");
push(@mak, q{ $(ROOT)/$(MAPPER) -c --paf-no-hit $(MM2_OPT) $(MM2_IDX) $< 2> $@.log | gzip > $@});
push(@mak, "");
push(@mak, "$pre.hap1.sam.gz:$hap[0]");
push(@mak, q{ $(ROOT)/$(MAPPER) -a $(MM2_OPT) $(MM2_IDX) $< 2> $@.log | gzip > $@});
push(@mak, "$pre.hap2.sam.gz:$hap[1]");
push(@mak, q{ $(ROOT)/$(MAPPER) -a $(MM2_OPT) $(MM2_IDX) $< 2> $@.log | gzip > $@});
push(@mak, "");
push(@mak, "$pre.hap1.bam:$pre.hap1.sam.gz");
push(@mak, q{ $(ROOT)/k8 $(ROOT)/dipcall-aux.js samflt $< | $(ROOT)/samtools sort -m4G --threads 4 -o $@ -});
push(@mak, "$pre.hap2.bam:$pre.hap2.sam.gz");
push(@mak, q{ $(ROOT)/k8 $(ROOT)/dipcall-aux.js samflt $< | $(ROOT)/samtools sort -m4G --threads 4 -o $@ -});
push(@mak, "");
push(@mak, "$pre.pair.vcf.gz:$pre.hap1.bam $pre.hap2.bam");
push(@mak, q{ $(ROOT)/htsbox pileup -q5 -evcf $(REF_FA) $^ | $(ROOT)/htsbox bgzip > $@});
push(@mak, "$pre.dip.vcf.gz:$pre.pair.vcf.gz");
my $pair_opt = defined($opts{a})? "-a" : "";
if ($is_male) {
push(@mak, q{ $(ROOT)/k8 $(ROOT)/dipcall-aux.js vcfpair} . qq{ $pair_opt -p $opts{x} } . q{$< | $(ROOT)/htsbox bgzip > $@});
} else {
push(@mak, q{ $(ROOT)/k8 $(ROOT)/dipcall-aux.js vcfpair} . qq{ $pair_opt } . q{$< | $(ROOT)/htsbox bgzip > $@});
}
push(@mak, "");
push(@mak, "$pre.hap1.var.gz:$pre.hap1.paf.gz");
push(@mak, q{ gzip -dc $< | sort -k6,6 -k8,8n | $(ROOT)/k8 $(ROOT)/paftools.js call - 2> $@.vst | gzip > $@});
push(@mak, "$pre.hap2.var.gz:$pre.hap2.paf.gz");
push(@mak, q{ gzip -dc $< | sort -k6,6 -k8,8n | $(ROOT)/k8 $(ROOT)/paftools.js call - 2> $@.vst | gzip > $@});
push(@mak, "$pre.hap1.bed:$pre.hap1.var.gz");
push(@mak, q{ gzip -dc $< | grep ^R | cut -f2- > $@});
push(@mak, "$pre.hap2.bed:$pre.hap2.var.gz");
push(@mak, q{ gzip -dc $< | grep ^R | cut -f2- > $@});
push(@mak, "$pre.dip.bed:$pre.hap1.bed $pre.hap2.bed");
if ($is_male) {
my $bedtk = '$(ROOT)/bedtk';
my @cmd;
push(@cmd, "$bedtk isec $pre.hap1.bed $pre.hap2.bed | egrep -v '^(chr)?[XY]' > $pre.tmp.bed"); # autosome
push(@cmd, "$bedtk isec $pre.hap1.bed $pre.hap2.bed | $bedtk isec $opts{x} >> $pre.tmp.bed"); # chrX PAR
push(@cmd, "$bedtk sub $pre.hap2.bed $pre.hap1.bed | egrep '^(chr)?X' | $bedtk sub - $opts{x} >> $pre.tmp.bed"); # chrX non-PAR
push(@cmd, "$bedtk sub $pre.hap1.bed $pre.hap2.bed | egrep '^(chr)?Y' >> $pre.tmp.bed"); # chrY non-PAR
push(@cmd, "$bedtk sort $pre.tmp.bed > " . '$@');
push(@mak, "\t" . join("; ", @cmd));
} else {
push(@mak, q{ $(ROOT)/bedtk isec -m $^ > $@});
}
push(@mak, "");
print(join("\n", @mak));
exit(0);
# auxiliary routines
sub which {
my $file = shift;
my $path = (@_)? shift : $ENV{PATH};
return if (!defined($path));
foreach my $x (split(":", $path)) {
$x =~ s/\/$//;
return "$x/$file" if (-x "$x/$file");
}
return;
}