-
Notifications
You must be signed in to change notification settings - Fork 0
/
build_sharded_index.wdl
75 lines (60 loc) · 1.45 KB
/
build_sharded_index.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
version 1.1
workflow build_sharded_index {
# split db_fasta into shards, then generate minimap2 index of each shard.
# warning: the same minimap2_options (e.g. '-x sr') must be used for mapping & merging
input {
File db_fasta
Int shards
String? minimap2_options
String docker
}
call split_db_fasta as split {
input: db_fasta, shards, docker
}
scatter (shard_fasta in split.db_shards_fasta) {
call index_shard as index {
input: shard_fasta, minimap2_options, docker
}
}
output {
Array[File] index_shards = index.shard
}
}
task split_db_fasta {
input {
File db_fasta
Int shards
String docker
Int cpu = 4
}
command <<<
set -euxo pipefail
seqkit split2 '~{db_fasta}' -p ~{shards} -j ~{cpu} -O split
>>>
output {
Array[File] db_shards_fasta = glob("split/*")
}
runtime {
docker: docker
cpu: cpu
}
}
task index_shard {
input {
File shard_fasta
String minimap2_options = ""
String docker
}
String db_filename = "~{basename(shard_fasta)}.idx"
command <<<
set -euxo pipefail
# set -I to NOT further split the shard
minimap2 ~{minimap2_options} -I 9999G -d '~{db_filename}' '~{shard_fasta}'
>>>
output {
File shard = db_filename
}
runtime {
docker: docker
}
}