Skip to content

Commit

Permalink
Merge branch 'master' into featurecounts_strand
Browse files Browse the repository at this point in the history
  • Loading branch information
johanneskoester authored Apr 25, 2022
2 parents 4a98e61 + b9e25ae commit 262db59
Show file tree
Hide file tree
Showing 26 changed files with 568 additions and 5 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ jobs:

- name: Setup Snakemake environment
run: |
# ensure that mamba is happy to write into the cache
sudo chown -R runner:docker /usr/share/miniconda/pkgs/cache
conda install -c conda-forge mamba --quiet
export PATH="/usr/share/miniconda/bin:$PATH"
mamba create -c bioconda -c conda-forge --quiet -y --name snakemake snakemake-minimal pytest
Expand Down
8 changes: 5 additions & 3 deletions .github/workflows/qc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,11 @@ jobs:

- name: Setup Snakemake environment
run: |
conda install -c conda-forge mamba --quiet
export PATH="/usr/share/miniconda/bin:$PATH"
mamba create -c bioconda -c conda-forge --quiet -y --name snakemake snakemake-minimal
# ensure that mamba is happy to write into the cache
sudo chown -R runner:docker /usr/share/miniconda/pkgs/cache
conda install -c conda-forge mamba --quiet
export PATH="/usr/share/miniconda/bin:$PATH"
mamba create -c bioconda -c conda-forge --quiet -y --name snakemake snakemake-minimal
- name: Fetch master
run: |
Expand Down
9 changes: 9 additions & 0 deletions bio/dragmap/align/environment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
channels:
- bioconda
- conda-forge
- defaults
dependencies:
- dragmap =1.2
- samtools =1.14
- picard =2.26
- snakemake-wrapper-utils =0.3
13 changes: 13 additions & 0 deletions bio/dragmap/align/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name: Dragmap
description: |
Map reads with Dragmap.
url: https://github.com/Illumina/DRAGMAP
authors:
- Filipe G. Vieira
input:
- FASTQ file(s)
- reference hash table
output:
- SAM/BAM/CRAM file
notes: |
* The `extra` param allows for additional program arguments.
16 changes: 16 additions & 0 deletions bio/dragmap/align/test/Snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
rule dragmap_align:
input:
reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"],
idx="genome",
output:
"mapped/{sample}.bam",
log:
"logs/dragmap/{sample}.align.log",
params:
extra="",
sorting="none", # Can be 'none', 'samtools' or 'picard'.
sort_order="queryname", # Can be 'queryname' or 'coordinate'.
sort_extra="", # Extra args for samtools/picard.
threads: 8
wrapper:
"master/bio/dragmap/align"
16 changes: 16 additions & 0 deletions bio/dragmap/align/test/Snakefile_picard
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
rule dragmap_align:
input:
reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"],
idx="genome",
output:
"mapped/{sample}.bam",
log:
"logs/dragmap/{sample}.align.log",
params:
extra="",
sorting="picard", # Can be 'none', 'samtools' or 'picard'.
sort_order="queryname", # Can be 'queryname' or 'coordinate'.
sort_extra="", # Extra args for samtools/picard.
threads: 8
wrapper:
"master/bio/dragmap/align"
35 changes: 35 additions & 0 deletions bio/dragmap/align/test/Snakefile_samtools
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
rule dragmap_align:
input:
reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"],
idx="genome",
output:
"mapped/{sample}.bam",
log:
"logs/dragmap/{sample}.align.log",
params:
extra="",
sorting="samtools", # Can be 'none', 'samtools' or 'picard'.
sort_order="queryname", # Can be 'queryname' or 'coordinate'.
sort_extra="", # Extra args for samtools/picard.
threads: 8
wrapper:
"master/bio/dragmap/align"


rule dragmap_align_write_index:
input:
reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"],
idx="genome",
output:
"mapped_with_index/{sample}.bam",
"mapped_with_index/{sample}.bam.csi",
log:
"logs/dragmap/{sample}.align.log",
params:
extra="",
sorting="samtools", # Can be 'none', 'samtools' or 'picard'.
sort_order="coordinate", # Can be 'queryname' or 'coordinate'.
sort_extra="--write-index", # Extra args for samtools/picard.
threads: 8
wrapper:
"master/bio/dragmap/align"
59 changes: 59 additions & 0 deletions bio/dragmap/align/test/genome/hash_table.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Automatically generated by build_hash_table (local version).
# Command line: dragen-os --ht-num-threads 2 --build-hash-table true --ht-reference genome.fasta --output-directory genome
# Hash table version 8
#
# Do not modify.

reference_source = 'genome.fasta'
alt_liftover = ''
reference_name = '/home/fgvieira/data/appz/snakemake-wrappers/bio/dragmap/build/test/genome/reference.bin'
reference_index = '/home/fgvieira/data/appz/snakemake-wrappers/bio/dragmap/build/test/genome/ref_index.bin'
reference_sequences = 1
reference_len = 328704
reference_len_raw = 20
reference_len_not_n = 20
reference_alt_seed = 164864
reference_alt_start = 164864
hash_table = '/home/fgvieira/data/appz/snakemake-wrappers/bio/dragmap/build/test/genome/hash_table.bin'
extend_table = '/home/fgvieira/data/appz/snakemake-wrappers/bio/dragmap/build/test/genome/extend_table.bin'
hash_table_bytes = 65536
extend_table_records = 0
digest_type = 1
digest = 0xC8FF2865
ref_digest = 0x00000000
ref_index_digest = 0x00000000
hash_digest = 0xC8FF2865
liftover_digest = 0x00000000
extend_table_digest = 0x00000000
pri_seed_bases = 17
max_seed_bases = 145
max_ext_increment = 12
ref_seed_interval = 1
table_addr_bits = 16
table_size_64ths = 64
max_seed_freq = 16
pri_max_seed_freq = 16
max_seed_freq_len = 98
target_seed_freq = 4
min_freq_to_extend = 17
thinning_freq_cap = 12
max_thinning_factor = 1
pri_crc_bits = 35
sec_crc_bits = 35
seed_len_cost = 1
seed_freq_cost = 0.5
extension_cost = 0
ext_step_cost = 0.7
ext_rec_cost = 4
repair_strategy = 0
min_repair_prob = 0.2
anchor_bin_bits = 0
hi_freq_rand_hit = 1
ext_rand_hit_freq = 8
pri_crc_poly = 666D451CD
sec_crc_poly = 666D451CD
reference_sequence0 = 'Sheila'
reference_start0 = 163840
reference_beg_trim0 = 0
reference_end_trim0 = 0
reference_len0 = 20
Binary file not shown.
Binary file added bio/dragmap/align/test/genome/hash_table.cmp
Binary file not shown.
190 changes: 190 additions & 0 deletions bio/dragmap/align/test/genome/hash_table_stats.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
Reference sequence:
Original: 20
Encoded: 328704
Masked: 328684 (100.0%)
Unmasked: 20
A bases: 12
C bases: 3
G bases: 3
T bases: 2
GC content: 30.0%
IUPAC-IUB Codes:
0 bases (padding) : 328684
1 base (A,C,G,T) : 20
2 bases (K,M,R,S,W,Y) : 0
3 bases (B,D,H,V) : 0
4 bases (N) : 0

Reference K-mers: (K=17)
Distinct K-mers: 4
K-mer positions: 4
Palindromes: 0
Total K-mer records: 4
Thinned out: 0
Populated seeds: 4
NOTE: All K-mer frequency stats are w.r.t. reference K-mer positions,
and hence a K-mer with frequency N is included N times.
Average K-mer frequency: 1.00
K-mer frequency histogram:
1
4
100%
Log2 K-mer frequency histogram:
0
4
100%

Alt contig K-mer positions: 0
Liftover K-mer matching: 0 ( 0.0%)
Liftover K-mer different: 0 ( 0.0%)
No liftover: 0 ( 0.0%)

Raw primary-seed liftover groups: 0
Average liftover group size: 0.00
Histogram of liftover group sizes:
-
-
-
Histogram of ALT hit count with no liftover:
-
-
-

Liftover groups after possible seed extension: 0
Liftover seed matching: 0 ( 0.0%)
Liftover seed injected: 0 ( 0.0%)
No liftover position: 0 ( 0.0%)
Average liftover group size: 0.00
Histogram of liftover group sizes:
-
-
-
Histogram of ALT hit count with no liftover:
-
-
-

Hash records:
Bytes per record: 8
Number of records: 8192
Hit records: 4 ( 0.0%)
Extension records: 0 ( 0.0%)
Interval records: 0 ( 0.0%)
Chain records: 0 ( 0.0%)
Empty records: 8188 (100.0%)
Raw K-mer occupancy: 0.0%
Final occupancy: 0.0%

Hash buckets:
Records per bucket: 8
Number of buckets: 1024
Histogram of raw K-mer bucket occupancy:
0 1
1020 4
100% 0.39%
Histogram of bucket occupancy after extending or rejecting high frequency seeds:
0 1
1020 4
100% 0.39%
Histogram of physical bucket occupancy as mapped:
0 1
1020 4
100% 0.39%

Seed extensions:
Base seed length: 17
Average extended seed length: 0.0
Average extension increment: 0.0
Average extension steps: 0.00
Extension IDs utilization: 0%
Portion of reference K-mers...
All raw K-mers: 4 (100.0%)
Extended to longer seeds: 0 ( 0.0%)
Remaining as primary hit: 4 (100.0%)
Space in extension table: 0 ( 0.0% of unmasked K-mers)
Average frequencies of reference K-mers...
All raw K-mers: 1.00
Extended to longer seeds: 0.00
Remaining as primary hit: 1.00
As extended seed hit: 0.00
As primary or extended seed: 1.00
Extended seed length histogram:
-
-
-
Seed extension increment histogram:
-
-
-
Seed extension steps histogram:
-
-
-
Pre-extended K-mer frequency histogram:
-
-
-
Remaining primary hit K-mer frequency histogram:
1
4
100%
Post-extended K-mer frequency histogram:
-
-
-

Hash chaining and probing:
Number of chains: 0
Chain buckets: 0
Average length beyond each bucket...
chain: 0.0000
probe: 0.0000
either: 0.0000
Histogram of bucket probe lengths replaced by chaining:
-
-
-
Bucket chain length histogram:
0
1024
100%
Bucket probe length histogram:
0
1024
100%
Chain or probe length histogram:
0
1024
100%

Compression: Records Bits Mean
auto pri hits: 4 12 3.000
auto sec hits: 0 128 128.000
auto nul hits: 328700 657400 2.000
special hits: 0 0 0.000
chain pointers: 0 0 0.000
chain ends: 0 0 0.000
literals: 0 0 0.000
ext literals: 0 0 0.000
TOTAL: 328704 657540 2.000
Misc bits: 5595
Final bits: 663200
Final bytes: 82900

Build thread cycle counts:
cyclesOverhead: 6328198
cyclesBucketOverhead: 0
cyclesExtendPrep: 0
cyclesExtendSort: 0
cyclesLiftover: 0
cyclesPriSort: 0
cyclesExtendFreq: 0
cyclesExtendDynProg: 0
cyclesExtendIntervals: 0
cyclesExtendConstruct: 0
cyclesBucketSort: 57496
cyclesBucketOrganize: 165312
cyclesBucketChain: 8960
cyclesBucketWrite: 592864
cyclesBucketCompress: 53572

Binary file added bio/dragmap/align/test/genome/ref_index.bin
Binary file not shown.
Binary file added bio/dragmap/align/test/genome/reference.bin
Binary file not shown.
Binary file added bio/dragmap/align/test/genome/repeat_mask.bin
Binary file not shown.
Binary file added bio/dragmap/align/test/genome/str_table.bin
Binary file not shown.
4 changes: 4 additions & 0 deletions bio/dragmap/align/test/reads/a.1.fastq
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@1
ACGGCAT
+
!!!!!!!
4 changes: 4 additions & 0 deletions bio/dragmap/align/test/reads/a.2.fastq
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@1
ACGGCAT
+
!!!!!!!
Loading

0 comments on commit 262db59

Please sign in to comment.