-
Notifications
You must be signed in to change notification settings - Fork 0
/
resistance.wdl
119 lines (98 loc) · 2.88 KB
/
resistance.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# FDA-HFP DSDI resistance WDL Library
# Author: Justin Payne <justin.payne@fda.hhs.gov>
version 1.0
import "identify.wdl" as identify
struct AlignmentCoordinate {
String? contig
Int ref_start
Int ref_end
Int? gene_length
Int? contig_start
Int? contig_end
Int? alignment_length
}
struct ResistanceGene {
Tool tool
String gene
Float? identity
Int? alignmentLength
Int? geneLength
Float? coverage
AlignmentCoordinate? coordinates
String phenotype
String accession
}
task resfinder {
input {
Array[File] fastqs
File? fasta
Boolean? nanopore
String? species
}
command <<<
python -m resfinder --version 2>&1 > ver
python -m resfinder -acq ~{if defined(nanopore) then "--nanopore" else ""} ~{if defined(species) then "-s " + "\"" + species + "\"" else ""} ~{if defined(fasta) then "-ifa " + fasta else "-ifq "}~{sep=" " fastqs} -j /out/resfinder.json -o /out/
>>>
output {
String ver = read_string("ver")
Array[String] results = read_tsv("/out/ResFinder_results_tab.txt")
}
runtime {
docker: "genomicepidemiology/resfinder:latest"
}
}
task parse_resfinder {
input {
Array[String] resfinder_results
String ver = "unspecified version"
}
command <<<
python <<CODE
import json
import csv
ver = "~{ver}"
tool = dict(
name='ResFinder',
version=f"{'.v ' if ver != 'unspecified version' else ''}{ver}"
)
with open("~{write_lines(resfinder_results)}", 'r') as f:
reader = csv.DictReader(f, delimiter='\t')
genes = []
for row in reader:
gene = row['Resistance gene']
identity = float(row['Identity'])
alignment_length, gene_length = map(int, row['Alignment Length/Gene Length'].split('/'))
coverage = float(row['Coverage'])
contig = row['Contig']
ref_start, ref_end = map(int, row['Position in reference'].split('..'))
contig_start, contig_end = map(int, row['Position in contig'].split('..'))
phenotype = row['Phenotype']
accession = row['Accession no.']
genes.append({
'tool': tool,
'gene': gene,
'identity': identity,
'alignmentLength': alignment_length,
'geneLength': gene_length,
'coverage': coverage,
'coordinates': {
'contig': contig,
'ref_start': ref_start,
'ref_end': ref_end,
'contig_start': contig_start,
'contig_end': contig_end,
'alignment_length': contig_end - contig_start
},
'phenotype': phenotype,
'accession': accession
})
print(json.dumps(genes))
CODE
>>>
output {
Array[ResistanceGene] resistanceGenes = read_json(stdout())
}
runtime {
docker: "python:3.12"
}
}