galaxyproject · bgruening · Mar 4, 2023 · Jul 9, 2021 · Jul 9, 2021 · Jul 9, 2021
diff --git a/tools/picrust2/.shed.yml b/tools/picrust2/.shed.yml
@@ -0,0 +1,18 @@
+name: picrust2
+owner: iuc
+description: "PICRUSt2: Phylogenetic Investigation of Communities by Reconstruction of Unobserved States"
+homepage_url: https://github.com/picrust/picrust2/wiki
+long_description: |
+    PICRUSt2 is a software for predicting functional abundances based only on marker gene sequences.
+remote_repository_url: https://github.com/picrust/picrust2
+type: unrestricted
+categories:
+- Metagenomics
+auto_tool_repositories:
+  name_template: "{{ tool_id }}"
+  description_template: "Wrapper for the PICRUSt2 tool suite: {{ tool_name }}"
+suite:
+  name: "suite_picrust2"
+  description: "PICRUSt2: Phylogenetic Investigation of Communities by Reconstruction of Unobserved States"
+  long_description: |
+    PICRUSt2 is a software for predicting functional abundances based only on marker gene sequences.
diff --git a/tools/picrust2/add_descriptions.xml b/tools/picrust2/add_descriptions.xml
@@ -0,0 +1,96 @@
+<tool id="picrust2_add_descriptions" name="PICRUSt2 Add descriptions" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>column to a function abundance table</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="bio_tool"/>
+    <expand macro="requirements"/>
+    <version_command>add_descriptions.py -v</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+add_descriptions.py
+    --input '$input'
+    --output '$func_abun_table_description'
+#if $map_file.selector == "default"
+    --map_type '$map_file.map_type'
+#else if $map_file.selector == "custom"
+    --custom_map_table '$map_file.custom_map_table'
+#end if
+    ]]></command>
+    <inputs>
+        <param argument="--input" type="data" format="tabular" label="Input function (gene family or pathway) abundance table"/>
+        <conditional name="map_file">
+            <param name="selector" type="select" label="Argument specifying non-default reference files to use for sequence placement">
+                <option value="default" selected="true">Default mapping file</option>
+                <option value="custom">Customized mapping file</option>
+            </param>
+            <when value="default">
+                <param argument="--map_type" type="select" label="Mapping table to use">
+                    <option value="COG">Clusters of Orthologous Genes database (COG)</option>
+                    <option value="EC">Enzyme Commission number database (EC number)</option>
+                    <option value="KO">KEGG Orthology database (KO)</option>
+                    <option value="PFAM">Pfam database</option>
+                    <option value="TIGRFAM">TIGRFAM database</option>
+                    <option value="METACYC">Metabolic Pathway database (MetaCyc)</option>
+                </param>
+            </when>
+            <when value="custom">
+                <param argument="--custom_map_table" type="data" format="tabular" label="Custom mapping table" help="A table with at least two columns linking function ids to descriptions for each function">
+                    <validator type="dataset_metadata_in_range" metadata_name="columns" min="2" message="Input needs to have at least two columns"/>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="func_abun_table_description" format="tabular" from_work_dir="added_description.tsv"/>
+    </outputs>
+    <tests>
+    <test expect_num_outputs="1">
+        <param name="input" value="pred_metagenome_unstrat.tsv.gz"/>
+        <conditional name="map_file">
+            <param name="selector" value="default"/>
+            <param name="map_type" value="EC"/>
+        </conditional>
+        <output name="func_abun_table_description">
+            <assert_contents>
+                <has_text text="description"/>
+                <has_n_lines n="1000"/>
+            </assert_contents>
+        </output>
+    </test>
+    <test expect_num_outputs="1">
+        <param name="input" value="pred_metagenome_unstrat.tsv.gz"/>
+        <conditional name="map_file">
+            <param name="selector" value="custom"/>
+            <param name="custom_map_table" value="ec_unstrat_test.txt.gz"/>
+        </conditional>
+        <output name="func_abun_table_description">
+            <assert_contents>
+                <has_text text="description"/>
+                <has_n_lines n="1000"/>
+            </assert_contents>
+        </output>
+    </test>
+    </tests>
+    <help><![CDATA[
+@HELP_HEADER@
+
+Add Descriptions
+================
+Adds a description column to a function abundance table and outputs a new file.
+
+Note
+====
+The user needs to specify the input file and what type of functions are in the input table. Will throw an error if no ids overlap and otherwise will fill in "not_found" for the description of ids in the function table not in the mapfile.
+
+Input
+=====
+Input function abundance table.
+
+Output
+======
+Output function abundance table with added description column.
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1038/s41587-020-0548-6</citation>
+    </citations>
+</tool>
diff --git a/tools/picrust2/hsp.xml b/tools/picrust2/hsp.xml
@@ -0,0 +1,121 @@
+<tool id="picrust2_hsp" name="PICRUSt2 Hidden state prediction (HSP)" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>to predict gene family abundances</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="bio_tool"/>
+    <expand macro="requirements"/>
+    <version_command>hsp.py -v</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+@VAR_ACCESS_FOO@
+hsp.py
+    --tree '$tree'
+    @HSP_PARAMS@
+    --output '$prediction_output'
+    $check
+    ## not implemented --chunk_size hoping that the default is carefully chosen
+    ## otherwise one might need to compute a "good" value from the number of
+    ## entries in the trait table and the number of available processors
+    -p "\${GALAXY_SLOTS:-1}"
+    ]]></command>
+    <inputs>
+        <param argument="--tree" type="data" format="newick" label="Newick tree with study sequences placed amongst reference sequences" help="The full reference tree containing both study sequences (i.e. ASVs or OTUs) and reference sequences."/>
+        <expand macro="hsp_params" nsti_truevalue="--calculate_NSTI" nsti_checked="false" in_trait_arg="--in_trait" in_trait_multiple="false" in_trait_label_suff="">
+            <token name="add_default_traits">
+                <option value="16S">16S</option>
+                <option value="PHENO">PHENO</option>
+            </token>
+            <token name="custom_traits">
+                <param argument="--observed_trait_table" type="data" format="tabular" label="Customized trait table" help="Describes directly observed traits (e.g. sequenced genomes) in tab-delimited format. "/>
+            </token>
+            <!-- add the seed parameter here (for the emp_prob option) .. param absent in picrust2_pipeline -->
+            <param argument="--seed" type="integer" value="100" label="Seed to make output reproducible" help="is necessary for the emp_prob method"/>
+        </expand>
+        <param argument="--check" type="boolean" truevalue="--check" falsevalue="" checked="false" label="Check input trait table before HSP"/>
+    </inputs>
+    <outputs>
+        <data name="prediction_output" format="tabular"/>
+    </outputs>
+    <tests>
+    <test expect_num_outputs="1">
+        <param name="tree" ftype="newick" value="out_tree.zip"/>
+        <conditional name="trait_input">
+            <param name="selector" value="default"/>
+            <param name="in_trait" value="16S"/>
+        </conditional>
+        <conditional name="hsp_method__options">
+            <param name="hsp_method" value="mp"/>
+            <param name="edge_exponent" value="0.0"/>
+        </conditional>
+        <param name="calculate_NSTI" value="false"/>
+        <param name="check" value="false"/>
+        <output name="prediction_output" ftype="tabular">
+            <assert_contents>
+                <has_text text="sequence"/>
+                <has_n_lines n="38"/>
+            </assert_contents>
+        </output>
+    </test>
+    <test expect_num_outputs="1">
+        <param name="tree" ftype="newick" value="out_tree.zip"/>
+        <conditional name="trait_input">
+            <param name="selector" value="custom"/>
+            <param name="observed_trait_table" value="known_traits.tsv.gz"/>
+        </conditional>
+        <conditional name="hsp_method__options">
+            <param name="hsp_method" value="mp"/>
+            <param name="edge_exponent" value="0.0"/>
+        </conditional>
+        <param name="calculate_NSTI" value="false"/>
+        <param name="check" value="false"/>
+        <output name="prediction_output">
+            <assert_contents>
+                <has_text text="2040502012"/>
+                <has_n_lines n="20005"/>
+            </assert_contents>
+        </output>
+    </test>
+    <test expect_num_outputs="1">
+        <param name="tree" ftype="newick" value="out_tree.zip"/>
+        <conditional name="trait_input">
+            <param name="selector" value="default"/>
+            <param name="in_trait" value="16S"/>
+        </conditional>
+        <conditional name="hsp_method__options">
+            <param name="hsp_method" value="emp_prob"/>
+            <param name="seed" value="100"/>
+        </conditional>
+        <param name="calculate_NSTI" value="false"/>
+        <param name="check" value="false"/>
+        <output name="prediction_output">
+            <assert_contents>
+                <has_text text="sequence"/>
+                <has_n_lines n="38"/>
+            </assert_contents>
+        </output>
+    </test>
+    </tests>
+    <help><![CDATA[
+@HELP_HEADER@
+
+Hidden State Prediction (HSP)
+=============================
+Performs hidden state prediction on tips in the input tree with unknown trait values. Typically this script is used to predict the copy number of gene families present in the predicted genome for each amplicon sequence variant, given a tree and a set of known trait values. This script outputs a table of trait predictions.
+
+Note
+====
+Run hidden-state prediction (hsp) to predict gene family abundances.
+
+Input
+=====
+Newick tree with study sequences placed amongst reference sequences.
+
+Output
+======
+Tabular file containing predicted counts.
+
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1038/s41587-020-0548-6</citation>
+    </citations>
+</tool>