-
Notifications
You must be signed in to change notification settings - Fork 21
/
JAFFAL.groovy
executable file
·92 lines (81 loc) · 2.96 KB
/
JAFFAL.groovy
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
/***********************************************************
** This is the JAFFA pipeline file for fusion detection
** with noisy long read data. For polished long read data,
** use JAFFA_direct.groovy. Run like so:
** bpipe run <path_to_this_file> <path_to_fastq/fasta_files>
** See our website for details on running options:
** https://github.com/Oshlack/JAFFA/wiki.
**
** Author: Nadia Davidson <nadia.davidson@petermac.org>
** Last Update: 2021
*********************************************************/
codeBase = file(bpipe.Config.config.script).parentFile.absolutePath
load codeBase+"/JAFFA_stages.groovy"
get_fasta = {
doc "Converting fastqs to fasta"
output.dir=jaffa_output+branch
produce(branch+".fasta"){
exec "$reformat ignorebadquality=t in=$input out=$output threads=$threads ;"
}
}
minimap2_transcriptome = {
doc "Aligning candidates to transcriptome using minimap2"
output.dir=jaffa_output+branch
produce(branch+".paf"){
exec """
$minimap2 -t $threads -x map-ont -c $transFasta $input > $output1 ;
"""
}
}
/** CODE NOT USED infer_genome_alignment = {
doc "Bypassing genomic alignment and infering genome position from transcriptome alignments"
output.dir=jaffa_output+branch
produce(branch+"_genome.psl"){
exec """
$bypass_genomic_alignment $transTable $input.txt > $output
"""
}
}**/
minimap2_genome = {
doc "Aligning candidates to genome using minimap2"
output.dir=jaffa_output+branch
produce(branch+"_genome.paf",branch+"_genome.psl"){
exec """
$minimap2 -t $threads -x splice -c $genomeFasta $input > $output1;
grep \$'\\t+\\t' $output1 | awk -F'\\t' -v OFS="\\t" '{ print \$4-\$3,0,0,0,0,0,0,0,\$5,\$1,\$2,\$3,\$4,\$6,\$7,\$8,\$9,2, 100","\$4-\$3-100",",\$3","\$3+100",", \$8","\$9-\$4+\$3+100"," }' > $output2 ;
grep \$'\\t-\\t' $output1 | awk -F'\\t' -v OFS="\\t" '{ print \$4-\$3,0,0,0,0,0,0,0,\$5,\$1,\$2,\$3,\$4,\$6,\$7,\$8,\$9,2, 100","\$4-\$3-100",", \$2-\$4","\$2-\$4+100",", \$8","\$9-\$4+\$3+100"," }' >> $output2 ;
"""
}
}
report_3_gene_fusions = {
doc "Checking for reads that support multi-fusion transcripts"
output.dir=jaffa_output+branch
produce(branch+".3gene_summary",branch+".3gene_reads"){
exec """
$make_3_gene_fusion_table $input.summary $input.txt $output2 > $output1
"""
}
}
reassign_dist=50
readLayout="single"
fastqInputFormat="%.gz"
common_steps = segment {
minimap2_transcriptome +
filter_transcripts +
extract_fusion_sequences +
// infer_genome_alignment +
minimap2_genome +
make_fasta_reads_table +
get_final_list +
report_3_gene_fusions
}
// below is the pipeline for a fasta file
if(args[0].endsWith(fastaSuffix)) {
run { run_check + fastaInputFormat * [
common_steps ] + compile_all_results
}
} else { //or fastq.gz will be converted to fasta.
run { run_check + fastqInputFormat * [
get_fasta + common_steps ] + compile_all_results
}
}