-
Notifications
You must be signed in to change notification settings - Fork 0
/
mQC.xml
executable file
·156 lines (140 loc) · 7.56 KB
/
mQC.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
<tool id="mqc" name="mQC" version="1.10">
<description>quality control of ribosome profiling mapping results</description>
<requirements>
<requirement type="package" version="1.10">mqc</requirement>
</requirements>
<command>
mQC.pl
--galaxy Y
--galaxytest $galaxytest
--experiment_name $exp_name
--cores $cores
--species $species
--ens_v $ensv
--unique $uniq
--mapper $mapper
--maxmultimap $maxmultimap
--ens_db "get"
--offset $offsetSelect.offset
--min_length_gd $min_l_gd
--max_length_gd $max_l_gd
--plotrpftool $plotrpftool
--outhtml $MappingQC_output
--outfolder $MappingQC_output.files_path
#if $inputtypeSelect.inputtype == "sam"
--samfile '$inputtypeSelect.samfile'
--galaxysam Y
#else if $inputtypeSelect.inputtype == "bam"
--samfile '$inputtypeSelect.bamfile'
--galaxysam N
#end if
#if $offsetSelect.offset == "from_file"
--offset_file $offsetSelect.offsetFile
#else if $offsetSelect.offset == "plastid"
#if $inputtypeSelect.inputtype == "sam"
--plastid_bam "convert"
#else if $inputtypeSelect.inputtype == "bam"
--plastid_bam $inputtypeSelect.bamfile
#end if
--min_length_plastid $offsetSelect.min_l_plastid
--max_length_plastid $offsetSelect.max_l_plastid
#end if
</command>
<inputs>
<param name="galaxytest" type="hidden" value="N" />
<conditional name="inputtypeSelect">
<param name="inputtype" type="select" label="Select the format of your input data">
<option value="sam" selected="true">SAM format</option>
<option value="bam">BAM format</option>
</param>
<when value="sam">
<param name="samfile" type="data" format="sam" label="Choose the sam file on which you want to do quality control"/>
</when>
<when value="bam">
<param name="bamfile" type="data" format="bam" label="Choose the bam file on which you want to do quality control"/>
</when>
</conditional>
<param name="exp_name" type="text" label="Enter the experiment name"/>
<param name="cores" type="integer" size="5" value="5" label="Enter the number of cores to run the mapping quality control"/>
<param name="species" type="select" label="Select the species">
<option value="mouse">Mouse</option>
<option value="human" selected="true">Human</option>
<option value="fruitfly">Fruitfly</option>
</param>
<param name="ensv" type="integer" size="5" value="86" label="Enter the Ensembl database version"/>
<param name="uniq" type="select" label="Perform mQC with unique mapped reads only">
<option value="Y" selected="true">Yes</option>
<option value="N">No</option>
</param>
<param name="plotrpftool" type="select" label="Choose the style of the RPF phase plot">
<option value="grouped2D" selected="true">Grouped 2D bar chart</option>
<option value="pyplot3D">3D bar chart (may suffer from Escher effects)</option>
</param>
<param name="mapper" type="select" label="Which mapper is used to generate your SAM file?">
<option value="STAR" selected="true">STAR</option>
<option value="TopHat2">TopHat2</option>
<option value="HiSat2">HiSat2</option>
</param>
<param name="maxmultimap" value="16" type="integer" label="Only use alignments that mapped fewer times than this value"/>
<param name="min_l_gd" value="26" type="integer" label="Enter the minimum RPF length for gene distribution construction"/>
<param name="max_l_gd" value="34" type="integer" label="Enter the maximum RPF length for gene distribution construction"/>
<conditional name="offsetSelect">
<param name="offset" type="select" label="The way P-site offsets were determined">
<option value="standard" selected="true">Standard offsets</option>
<option value="plastid">Calculate offsets with plastid</option>
<option value="from_file">Offsets from custom file</option>
</param>
<when value="standard"/>
<when value="from_file">
<param name="offsetFile" type="data" format="tabular" label="Choose a text file with the offsets. Each line should be structured as RPF length - offset, seperated by a tab or a space" />
</when>
<when value="plastid">
<param name="min_l_plastid" value="22" type="integer" label="Enter the minimum RPF length for plastid offset calculation"/>
<param name="max_l_plastid" value="34" type="integer" label="Enter the maximum RPF length for plastid offset calculation"/>
</when>
</conditional>
</inputs>
<outputs>
<data format="html" name="MappingQC_output" label="HTML output" />
</outputs>
<tests>
<test>
<param name="galaxytest" value="Y"/>
<param name="inputtype" value="sam"/>
<param name="samfile" value="test_data_mESC.sam" ftype="sam"/>
<param name="exp_name" value="test_data_mESC"/>
<param name="cores" value="10"/>
<param name="species" value="mouse"/>
<param name="ensv" value="86"/>
<param name="uniq" value="Y"/>
<param name="plotrpftool" value="grouped2D"/>
<param name="mapper" value="STAR"/>
<param name="maxmultimap" value="16"/>
<param name="min_l_gd" value="26"/>
<param name="max_l_gd" value="34"/>
<param name="offset" value="standard"/>
<output name="MappingQC_output" file="mQC_test_data_mESC/mQC_test_data_mESC.html" ftype="html"/>
</test>
</tests>
<help>
.. class:: infomark
**What it does**
MappingQC is a tool to easily generate some figures which give a nice overview of the quality of the mapping of ribosome profiling data. More specific, it gives an overview of the P site offset calculation, the gene distribution and the metagenic classification. Furthermore, MappingQC does a thorough analysis of the triplet periodicity and the linked triplet phase (typical for ribosome profiling) in the canonical transcript of your data. Especially, the link between the phase distribution and the RPF length, the relative sequence position and the triplet identity are taken into account.
There is also a version from MappingQC available, which is directly linked to PROTEOFORMER and its SQLite results database structure. This makes its usage in combination with PROTEOFORMER more straight forward and can speed up the total analysis process.
-----
.. class:: infomark
**Input**
1) SAM file with mapping results
2) Ensembl DB with annotation for the selected species and Ensembl version
------
.. class:: infomark
**Output**
1) HTML file: overview file of the tables and figures. Main output of MappingQC
2) ZIP file: contains a folder with all necessary figures. In this folder you can also find the overview HTML file.
</help>
<citations>
<citation type="bibtex">
@article{verbruggen_menschaert_2017, title={mQC: An essential quality visualization tool for ribosome profiling}, journal={Unpublished}, author={Verbruggen, Steven and Menschaert, Gerben}, year={2018}}
</citation>
</citations>
</tool>