# Minimap2 splicing junction bed ```bash %%bash minimap2 --version ``` 2.17-r941 ```bash %%bash sed -e 's/gene_id/gene_name/' \ /cluster/ggs_lab/mtparker/Arabidopsis_annotations/Araport/v11/201606/Araport11_GFF3_genes_transposons.201606.no_chr.gtf \ > annot.gtf head annot.gtf ``` 1 Araport11 5UTR 3631 3759 . + . transcript_id "AT1G01010.1"; gene_name "AT1G01010"; 1 Araport11 exon 3631 3913 . + . transcript_id "AT1G01010.1"; gene_name "AT1G01010"; 1 Araport11 start_codon 3760 3762 . + . transcript_id "AT1G01010.1"; gene_name "AT1G01010"; 1 Araport11 CDS 3760 3913 . + 0 transcript_id "AT1G01010.1"; gene_name "AT1G01010"; 1 Araport11 exon 3996 4276 . + . transcript_id "AT1G01010.1"; gene_name "AT1G01010"; 1 Araport11 CDS 3996 4276 . + 2 transcript_id "AT1G01010.1"; gene_name "AT1G01010"; 1 Araport11 exon 4486 4605 . + . transcript_id "AT1G01010.1"; gene_name "AT1G01010"; 1 Araport11 CDS 4486 4605 . + 0 transcript_id "AT1G01010.1"; gene_name "AT1G01010"; 1 Araport11 exon 4706 5095 . + . transcript_id "AT1G01010.1"; gene_name "AT1G01010"; 1 Araport11 CDS 4706 5095 . + 0 transcript_id "AT1G01010.1"; gene_name "AT1G01010"; ```bash %%bash paftools.js gff2bed -j \ annot.gtf \ > juncs.bed head juncs.bed ``` 1 3913 3995 AT1G01010.1||AT1G01010 1000 + 1 4276 4485 AT1G01010.1||AT1G01010 1000 + 1 4605 4705 AT1G01010.1||AT1G01010 1000 + 1 5095 5173 AT1G01010.1||AT1G01010 1000 + 1 5326 5438 AT1G01010.1||AT1G01010 1000 + 1 7069 7156 AT1G01020.2||AT1G01020 1000 - 1 7450 7563 AT1G01020.2||AT1G01020 1000 - 1 7649 7761 AT1G01020.2||AT1G01020 1000 - 1 7835 7941 AT1G01020.2||AT1G01020 1000 - 1 7987 8235 AT1G01020.2||AT1G01020 1000 - ### no junc-bonus options, no splice preset ```bash %%bash # apart from the junc-bed related settings, these settings should be identical to -x splice preset minimap2 -a -t12 -k15 -w5 --splice \ -g2000 -G200k -A1 -B2 -O2,32 -E1,0 \ -C9 -z200 -ub --splice-flank=yes \ --cs=short \ ref.fa test.fastq > aln.sam ``` [M::mm_idx_gen::5.654*1.26] collected minimizers [M::mm_idx_gen::6.432*2.16] sorted minimizers [M::main::6.432*2.16] loaded/built the index for 7 target sequence(s) [M::mm_mapopt_update::7.156*2.04] mid_occ = 60 [M::mm_idx_stat] kmer size: 15; skip: 5; is_hpc: 0; #seq: 7 [M::mm_idx_stat::7.605*1.98] distinct minimizers: 28111714 (78.50% are singletons); average occurrences: 1.452; average spacing: 2.932 [M::worker_pipeline::95.367*10.39] mapped 721392 sequences [M::worker_pipeline::167.234*11.07] mapped 722436 sequences [M::worker_pipeline::180.310*10.96] mapped 203656 sequences [M::main] Version: 2.17-r941 [M::main] CMD: minimap2 -a -t12 -k15 -w5 --splice -g2000 -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -ub --splice-flank=yes --cs=short ref.fa test.fastq [M::main] Real time: 180.503 sec; CPU: 1976.893 sec; Peak RSS: 10.905 GB ```bash %%bash paftools.js junceval annot.gtf aln.sam ``` # unmapped reads: 668070 # mapped reads: 979414 # primary alignments: 984638 # singletons: 246055 # predicted introns: 2896215 # non-overlapping introns: 4851 # correct introns: 2742705 (94.70%) ### no junc-bonus options, splice preset ```bash %%bash minimap2 -x splice -a -t12 -k15 -w5 --splice \ -g2000 -G200k -A1 -B2 -O2,32 -E1,0 \ -C9 -z200 -ub --splice-flank=yes \ --cs=short \ ref.fa test.fastq > aln_splice.sam ``` [M::mm_idx_gen::5.226*1.27] collected minimizers [M::mm_idx_gen::5.883*2.14] sorted minimizers [M::main::5.884*2.14] loaded/built the index for 7 target sequence(s) [M::mm_mapopt_update::6.604*2.01] mid_occ = 60 [M::mm_idx_stat] kmer size: 15; skip: 5; is_hpc: 0; #seq: 7 [M::mm_idx_stat::7.028*1.95] distinct minimizers: 28111714 (78.50% are singletons); average occurrences: 1.452; average spacing: 2.932 [M::worker_pipeline::92.832*10.50] mapped 721392 sequences [M::worker_pipeline::164.342*11.13] mapped 722436 sequences [M::worker_pipeline::177.539*11.00] mapped 203656 sequences [M::main] Version: 2.17-r941 [M::main] CMD: minimap2 -x splice -a -t12 -k15 -w5 --splice -g2000 -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -ub --splice-flank=yes --cs=short ref.fa test.fastq [M::main] Real time: 177.904 sec; CPU: 1953.891 sec; Peak RSS: 10.777 GB ```bash %%bash paftools.js junceval annot.gtf aln_splice.sam ``` # unmapped reads: 668070 # mapped reads: 979414 # primary alignments: 984638 # singletons: 246055 # predicted introns: 2896215 # non-overlapping introns: 4851 # correct introns: 2742705 (94.70%) ### junc-bonus=0, no splice preset ```bash %%bash minimap2 -a -t12 -k15 -w5 --splice \ -g2000 -G200k -A1 -B2 -O2,32 -E1,0 \ -C9 -z200 -ub --splice-flank=yes \ --cs=short --junc-bonus=0 --junc-bed juncs.bed \ ref.fa test.fastq > aln_bonus_0.sam ``` [M::mm_idx_gen::5.424*1.24] collected minimizers [M::mm_idx_gen::6.241*2.20] sorted minimizers [M::main::6.241*2.20] loaded/built the index for 7 target sequence(s) [M::mm_mapopt_update::6.940*2.08] mid_occ = 60 [M::mm_idx_stat] kmer size: 15; skip: 5; is_hpc: 0; #seq: 7 [M::mm_idx_stat::7.337*2.02] distinct minimizers: 28111714 (78.50% are singletons); average occurrences: 1.452; average spacing: 2.932 [M::worker_pipeline::108.405*10.28] mapped 721392 sequences [M::worker_pipeline::190.435*11.00] mapped 722436 sequences [M::worker_pipeline::206.903*10.93] mapped 203656 sequences [M::main] Version: 2.17-r941 [M::main] CMD: minimap2 -a -t12 -k15 -w5 --splice -g2000 -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -ub --splice-flank=yes --cs=short --junc-bonus=0 --junc-bed juncs.bed ref.fa test.fastq [M::main] Real time: 207.241 sec; CPU: 2261.330 sec; Peak RSS: 10.919 GB ```bash %%bash paftools.js junceval annot.gtf aln_bonus_0.sam ``` # unmapped reads: 668070 # mapped reads: 979414 # primary alignments: 984638 # singletons: 246055 # predicted introns: 2896215 # non-overlapping introns: 4851 # correct introns: 2742705 (94.70%) ### junc-bonus=4, no splice preset ```bash %%bash minimap2 -a -t12 -k15 -w5 --splice \ -g2000 -G200k -A1 -B2 -O2,32 -E1,0 \ -C9 -z200 -ub --splice-flank=yes \ --cs=short --junc-bonus=4 --junc-bed juncs.bed \ ref.fa test.fastq > aln_bonus_4.sam ``` [M::mm_idx_gen::5.519*1.30] collected minimizers [M::mm_idx_gen::6.281*2.17] sorted minimizers [M::main::6.281*2.17] loaded/built the index for 7 target sequence(s) [M::mm_mapopt_update::6.991*2.05] mid_occ = 60 [M::mm_idx_stat] kmer size: 15; skip: 5; is_hpc: 0; #seq: 7 [M::mm_idx_stat::7.404*1.99] distinct minimizers: 28111714 (78.50% are singletons); average occurrences: 1.452; average spacing: 2.932 [M::worker_pipeline::105.625*10.63] mapped 721392 sequences [M::worker_pipeline::187.618*11.21] mapped 722436 sequences [M::worker_pipeline::203.306*11.12] mapped 203656 sequences [M::main] Version: 2.17-r941 [M::main] CMD: minimap2 -a -t12 -k15 -w5 --splice -g2000 -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -ub --splice-flank=yes --cs=short --junc-bonus=4 --junc-bed juncs.bed ref.fa test.fastq [M::main] Real time: 203.585 sec; CPU: 2260.143 sec; Peak RSS: 10.779 GB ```bash %%bash paftools.js junceval annot.gtf aln_bonus_4.sam ``` # unmapped reads: 668070 # mapped reads: 979414 # primary alignments: 984638 # singletons: 246055 # predicted introns: 2896215 # non-overlapping introns: 4851 # correct introns: 2742705 (94.70%) ### junc-bonus=9, no splice preset ```bash %%bash minimap2 -a -t12 -k15 -w5 --splice \ -g2000 -G200k -A1 -B2 -O2,32 -E1,0 \ -C9 -z200 -ub --splice-flank=yes \ --cs=short --junc-bonus=9 --junc-bed juncs.bed \ ref.fa test.fastq > aln_bonus_9.sam ``` [M::mm_idx_gen::6.303*1.10] collected minimizers [M::mm_idx_gen::6.946*1.84] sorted minimizers [M::main::6.946*1.84] loaded/built the index for 7 target sequence(s) [M::mm_mapopt_update::7.652*1.76] mid_occ = 60 [M::mm_idx_stat] kmer size: 15; skip: 5; is_hpc: 0; #seq: 7 [M::mm_idx_stat::8.057*1.72] distinct minimizers: 28111714 (78.50% are singletons); average occurrences: 1.452; average spacing: 2.932 [M::worker_pipeline::104.377*10.52] mapped 721392 sequences [M::worker_pipeline::187.224*11.16] mapped 722436 sequences [M::worker_pipeline::203.006*11.07] mapped 203656 sequences [M::main] Version: 2.17-r941 [M::main] CMD: minimap2 -a -t12 -k15 -w5 --splice -g2000 -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -ub --splice-flank=yes --cs=short --junc-bonus=9 --junc-bed juncs.bed ref.fa test.fastq [M::main] Real time: 203.323 sec; CPU: 2247.157 sec; Peak RSS: 10.644 GB ```bash %%bash paftools.js junceval annot.gtf aln_bonus_9.sam ``` # unmapped reads: 668070 # mapped reads: 979414 # primary alignments: 984638 # singletons: 246055 # predicted introns: 2896215 # non-overlapping introns: 4851 # correct introns: 2742705 (94.70%) ### junc-bonus=0, splice preset ```bash %%bash minimap2 -x splice -a -t12 -k15 -w5 --splice \ -g2000 -G200k -A1 -B2 -O2,32 -E1,0 \ -C9 -z200 -ub --splice-flank=yes \ --cs=short --junc-bonus=0 --junc-bed juncs.bed \ ref.fa test.fastq > aln_bonus_0_splice.sam ``` [M::mm_idx_gen::6.759*1.06] collected minimizers [M::mm_idx_gen::7.423*1.78] sorted minimizers [M::main::7.423*1.78] loaded/built the index for 7 target sequence(s) [M::mm_mapopt_update::8.131*1.71] mid_occ = 60 [M::mm_idx_stat] kmer size: 15; skip: 5; is_hpc: 0; #seq: 7 [M::mm_idx_stat::8.544*1.68] distinct minimizers: 28111714 (78.50% are singletons); average occurrences: 1.452; average spacing: 2.932 [M::worker_pipeline::105.400*10.47] mapped 721392 sequences [M::worker_pipeline::187.942*11.12] mapped 722436 sequences [M::worker_pipeline::204.019*11.03] mapped 203656 sequences [M::main] Version: 2.17-r941 [M::main] CMD: minimap2 -x splice -a -t12 -k15 -w5 --splice -g2000 -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -ub --splice-flank=yes --cs=short --junc-bonus=0 --junc-bed juncs.bed ref.fa test.fastq [M::main] Real time: 204.327 sec; CPU: 2249.863 sec; Peak RSS: 10.611 GB ```bash %%bash paftools.js junceval annot.gtf aln_bonus_0_splice.sam ``` # unmapped reads: 666538 # mapped reads: 980946 # primary alignments: 986187 # singletons: 244051 # predicted introns: 2956054 # non-overlapping introns: 5016 # correct introns: 2932327 (99.20%) ### junc-bonus=4, no splice preset ```bash %%bash minimap2 -x splice -a -t12 -k15 -w5 --splice \ -g2000 -G200k -A1 -B2 -O2,32 -E1,0 \ -C9 -z200 -ub --splice-flank=yes \ --cs=short --junc-bonus=4 --junc-bed juncs.bed \ ref.fa test.fastq > aln_bonus_4_splice.sam ``` [M::mm_idx_gen::5.984*1.23] collected minimizers [M::mm_idx_gen::6.761*2.08] sorted minimizers [M::main::6.761*2.08] loaded/built the index for 7 target sequence(s) [M::mm_mapopt_update::7.485*1.97] mid_occ = 60 [M::mm_idx_stat] kmer size: 15; skip: 5; is_hpc: 0; #seq: 7 [M::mm_idx_stat::7.914*1.92] distinct minimizers: 28111714 (78.50% are singletons); average occurrences: 1.452; average spacing: 2.932 [M::worker_pipeline::106.669*10.33] mapped 721392 sequences [M::worker_pipeline::188.281*11.05] mapped 722436 sequences [M::worker_pipeline::205.050*10.94] mapped 203656 sequences [M::main] Version: 2.17-r941 [M::main] CMD: minimap2 -x splice -a -t12 -k15 -w5 --splice -g2000 -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -ub --splice-flank=yes --cs=short --junc-bonus=4 --junc-bed juncs.bed ref.fa test.fastq [M::main] Real time: 205.275 sec; CPU: 2242.655 sec; Peak RSS: 10.877 GB ```bash %%bash paftools.js junceval annot.gtf aln_bonus_4_splice.sam ``` # unmapped reads: 666538 # mapped reads: 980946 # primary alignments: 986187 # singletons: 244051 # predicted introns: 2956054 # non-overlapping introns: 5016 # correct introns: 2932327 (99.20%) ### junc-bonus=9, splice preset ```bash %%bash minimap2 -x splice -a -t12 -k15 -w5 --splice \ -g2000 -G200k -A1 -B2 -O2,32 -E1,0 \ -C9 -z200 -ub --splice-flank=yes \ --cs=short --junc-bonus=9 --junc-bed juncs.bed \ ref.fa test.fastq > aln_bonus_9_splice.sam ``` [M::mm_idx_gen::5.269*1.26] collected minimizers [M::mm_idx_gen::6.071*2.23] sorted minimizers [M::main::6.072*2.23] loaded/built the index for 7 target sequence(s) [M::mm_mapopt_update::6.774*2.10] mid_occ = 60 [M::mm_idx_stat] kmer size: 15; skip: 5; is_hpc: 0; #seq: 7 [M::mm_idx_stat::7.168*2.04] distinct minimizers: 28111714 (78.50% are singletons); average occurrences: 1.452; average spacing: 2.932 [M::worker_pipeline::104.544*10.56] mapped 721392 sequences [M::worker_pipeline::187.842*11.20] mapped 722436 sequences [M::worker_pipeline::203.053*11.07] mapped 203656 sequences [M::main] Version: 2.17-r941 [M::main] CMD: minimap2 -x splice -a -t12 -k15 -w5 --splice -g2000 -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -ub --splice-flank=yes --cs=short --junc-bonus=9 --junc-bed juncs.bed ref.fa test.fastq [M::main] Real time: 203.351 sec; CPU: 2248.942 sec; Peak RSS: 11.043 GB ```bash %%bash paftools.js junceval annot.gtf aln_bonus_9_splice.sam ``` # unmapped reads: 666538 # mapped reads: 980946 # primary alignments: 986187 # singletons: 244051 # predicted introns: 2956054 # non-overlapping introns: 5016 # correct introns: 2932327 (99.20%)