-
Notifications
You must be signed in to change notification settings - Fork 441
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into topic/reusable-workflows
- Loading branch information
Showing
344 changed files
with
187,205 additions
and
26,426 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
name: data_manager_semibin | ||
owner: iuc | ||
description: "SemiBin: Semi-supervised Metagenomic Binning Using Siamese Neural Networks" | ||
homepage_url: https://semibin.readthedocs.io/en/latest/ | ||
long_description: | | ||
Command tool for metagenomic binning with semi-supervised deep learning using information from reference genomes | ||
remote_repository_url: "https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin" | ||
type: unrestricted | ||
categories: | ||
- Data Managers |
153 changes: 153 additions & 0 deletions
153
data_managers/data_manager_semibin/data_manager/data_manager_semibin.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
#!/usr/bin/env python | ||
# | ||
# Data manager for reference data for the MetaPhlAn Galaxy tools | ||
import argparse | ||
import json | ||
import subprocess | ||
from datetime import date | ||
from pathlib import Path | ||
|
||
|
||
# Utility functions for interacting with Galaxy JSON | ||
def read_input_json(json_fp): | ||
"""Read the JSON supplied from the data manager tool | ||
Returns a tuple (param_dict,extra_files_path) | ||
'param_dict' is an arbitrary dictionary of parameters | ||
input into the tool; 'extra_files_path' is the path | ||
to a directory where output files must be put for the | ||
receiving data manager to pick them up. | ||
NB the directory pointed to by 'extra_files_path' | ||
doesn't exist initially, it is the job of the script | ||
to create it if necessary. | ||
""" | ||
with open(json_fp) as fh: | ||
params = json.load(fh) | ||
return (params['param_dict'], | ||
Path(params['output_data'][0]['extra_files_path'])) | ||
|
||
|
||
# Utility functions for creating data table dictionaries | ||
# | ||
# Example usage: | ||
# >>> d = create_data_tables_dict() | ||
# >>> add_data_table(d,'my_data') | ||
# >>> add_data_table_entry(dict(dbkey='hg19',value='human')) | ||
# >>> add_data_table_entry(dict(dbkey='mm9',value='mouse')) | ||
# >>> print(json.dumps(d)) | ||
def create_data_tables_dict(): | ||
"""Return a dictionary for storing data table information | ||
Returns a dictionary that can be used with 'add_data_table' | ||
and 'add_data_table_entry' to store information about a | ||
data table. It can be converted to JSON to be sent back to | ||
the data manager. | ||
""" | ||
d = { | ||
'data_tables': {} | ||
} | ||
return d | ||
|
||
|
||
def add_data_table(d, table): | ||
"""Add a data table to the data tables dictionary | ||
Creates a placeholder for a data table called 'table'. | ||
""" | ||
d['data_tables'][table] = [] | ||
|
||
|
||
def add_data_table_entry(d, table, entry): | ||
"""Add an entry to a data table | ||
Appends an entry to the data table 'table'. 'entry' | ||
should be a dictionary where the keys are the names of | ||
columns in the data table. | ||
Raises an exception if the named data table doesn't | ||
exist. | ||
""" | ||
try: | ||
d['data_tables'][table].append(entry) | ||
except KeyError: | ||
raise Exception("add_data_table_entry: no table '%s'" % table) | ||
|
||
|
||
def download_gtdb(data_tables, table_name, target_dp, test=False): | ||
"""Download GTDB | ||
Creates references to the specified file(s) on the Galaxy | ||
server in the appropriate data table (determined from the | ||
file extension). | ||
The 'data_tables' dictionary should have been created using | ||
the 'create_data_tables_dict' and 'add_data_table' functions. | ||
Arguments: | ||
data_tables: a dictionary containing the data table info | ||
table_name: name of the table | ||
target_dp: directory to put copy or link to the data file | ||
""" | ||
db_dp = target_dp | ||
if not test: | ||
cmd = "SemiBin download_GTDB --reference-db-data-dir %s" % (db_dp) | ||
subprocess.check_call(cmd, shell=True) | ||
dbkey = 'gtdb' | ||
name = "GTDB reference genome generated by MMseqs2 used in SemiBin" | ||
else: | ||
dbkey = 'test' | ||
name = "Test" | ||
empty_fp = db_dp / Path("empty") | ||
empty_fp.touch() | ||
add_data_table_entry( | ||
data_tables, | ||
table_name, | ||
dict( | ||
dbkey=dbkey, | ||
value='%s' % (date.today().strftime("%d%m%Y")), | ||
name=name, | ||
path=str(db_dp))) | ||
|
||
|
||
if __name__ == "__main__": | ||
print("Starting...") | ||
|
||
# Read command line | ||
parser = argparse.ArgumentParser(description='Download reference genomes (GTDB)') | ||
parser.add_argument('--json', help="Path to JSON file") | ||
parser.add_argument('--test', action='store_true', help="Test") | ||
args = parser.parse_args() | ||
print("args : %s" % args) | ||
|
||
# Read the input JSON | ||
json_fp = Path(args.json) | ||
params, target_dp = read_input_json(json_fp) | ||
|
||
# Make the target directory | ||
print("Making %s" % target_dp) | ||
target_dp.mkdir(parents=True, exist_ok=True) | ||
|
||
# Set up data tables dictionary | ||
data_tables = create_data_tables_dict() | ||
add_data_table(data_tables, "gtdb") | ||
|
||
# Fetch data from specified data sources | ||
print("Download and build database") | ||
download_gtdb( | ||
data_tables, | ||
"gtdb", | ||
target_dp, | ||
args.test) | ||
|
||
# Write output JSON | ||
print("Outputting JSON") | ||
with open(json_fp, 'w') as fh: | ||
json.dump(data_tables, fh, sort_keys=True) | ||
print("Done.") |
39 changes: 39 additions & 0 deletions
39
data_managers/data_manager_semibin/data_manager/data_manager_semibin.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
<tool id="data_manager_semibin" name="Download reference genomes (GTDB) for SemiBin" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | ||
<description></description> | ||
<requirements> | ||
<requirement type="package" version="@TOOL_VERSION@">semibin</requirement> | ||
</requirements> | ||
<macros> | ||
<token name="@TOOL_VERSION@">1.1.1</token> | ||
<token name="@VERSION_SUFFIX@">0</token> | ||
<token name="@PROFILE@">21.01</token> | ||
</macros> | ||
<command detect_errors="exit_code"><![CDATA[ | ||
python '$__tool_directory__/data_manager_semibin.py' | ||
--json '$out_file' | ||
$type | ||
]]></command> | ||
<inputs> | ||
<param name="type" type="hidden" value="" /> | ||
</inputs> | ||
<outputs> | ||
<data name="out_file" format="data_manager_json"/> | ||
</outputs> | ||
<tests> | ||
<test expect_num_outputs="1"> | ||
<param name="type" value="--test" /> | ||
<output name="out_file"> | ||
<assert_contents> | ||
<has_text text="test"/> | ||
<has_text text="Test"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
</tests> | ||
<help><![CDATA[ | ||
This tool downloads reference genomes (GTDB) for SemiBin | ||
]]></help> | ||
<citations> | ||
<citation type="doi">10.1038/s41467-022-29843-y</citation> | ||
</citations> | ||
</tool> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
<?xml version="1.0"?> | ||
<data_managers> | ||
<data_manager tool_file="data_manager/data_manager_semibin.xml" id="data_manager_semibin" > | ||
<data_table name="gtdb"> <!-- Defines a Data Table to be modified. --> | ||
<output> <!-- Handle the output of the Data Manager Tool --> | ||
<column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool --> | ||
<column name="name" /> <!-- columns that are going to be specified by the Data Manager Tool --> | ||
<column name="dbkey" /> <!-- columns that are going to be specified by the Data Manager Tool --> | ||
<column name="path" output_ref="out_file" > | ||
<move type="directory"> | ||
<source>${path}</source> | ||
<target base="${GALAXY_DATA_MANAGER_DATA_PATH}">semibin/data/${dbkey}</target> | ||
</move> | ||
<value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/semibin/data/${dbkey}</value_translation> | ||
<value_translation type="function">abspath</value_translation> | ||
</column> | ||
</output> | ||
</data_table> | ||
</data_manager> | ||
</data_managers> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#file has this format (white space characters are TAB characters) | ||
#dbkey description name /path/to/data | ||
test-date description test /path/to/data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
#file has this format (white space characters are TAB characters) | ||
#dbkey description name /path/to/data |
6 changes: 6 additions & 0 deletions
6
data_managers/data_manager_semibin/tool_data_table_conf.xml.sample
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
<tables> | ||
<table name="gtdb" comment_char="#"> | ||
<columns>value, name, dbkey, path</columns> | ||
<file path="tool-data/gtdb.loc" /> | ||
</table> | ||
</tables> |
6 changes: 6 additions & 0 deletions
6
data_managers/data_manager_semibin/tool_data_table_conf.xml.test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
<tables> | ||
<table name="gtdb" comment_char="#"> | ||
<columns>value, name, dbkey, path</columns> | ||
<file path="${__HERE__}/test-data/gtdb.loc" /> | ||
</table> | ||
</tables> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.