-
Notifications
You must be signed in to change notification settings - Fork 80
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[MRG] add
sourmash sig check
for comparing picklists and databases (#…
…1907) * copy sig check code over * add --fail to sig check * require manifests etc * test no --picklist * test nosave manifest * test __iadd__ for manifests * remove 'add_to_found' * revert * simplify per @bluegenes
- Loading branch information
Showing
12 changed files
with
592 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
"""check signature collections against a picklist""" | ||
|
||
usage=""" | ||
sourmash sig check <filenames> --picklist ... -o miss.csv -m manifest.csv | ||
This will check the signature contents of <filenames> against the given | ||
picklist, optionally outputting the unmatched picklist rows to 'miss.csv' | ||
and optionally outputting a manifest of the matched signatures to | ||
'manifest.csv'. | ||
By default, 'sig check' requires a pre-existing manifest for collections; | ||
this prevents potentially slow manifest rebuilding. You | ||
can turn this check off with '--no-require-manifest'. | ||
""" | ||
|
||
from sourmash.cli.utils import (add_moltype_args, add_ksize_arg, | ||
add_picklist_args, add_pattern_args) | ||
|
||
|
||
def subparser(subparsers): | ||
subparser = subparsers.add_parser('check', usage=usage) | ||
subparser.add_argument('signatures', nargs='*') | ||
subparser.add_argument( | ||
'-q', '--quiet', action='store_true', | ||
help='suppress non-error output' | ||
) | ||
subparser.add_argument( | ||
'-o', '--output-missing', metavar='FILE', | ||
help='output picklist with remaining unmatched entries to this file', | ||
) | ||
subparser.add_argument( | ||
'-f', '--force', action='store_true', | ||
help='try to load all files as signatures' | ||
) | ||
subparser.add_argument( | ||
'--from-file', | ||
help='a text file containing a list of files to load signatures from' | ||
) | ||
subparser.add_argument( | ||
'-m', '--save-manifest-matching', | ||
help='save a manifest of the matching entries to this file.' | ||
) | ||
subparser.add_argument( | ||
'--fail-if-missing', action='store_true', | ||
help='exit with an error code (-1) if there are any missing picklist values.' | ||
) | ||
subparser.add_argument( | ||
'--no-require-manifest', | ||
help='do not require a manifest; generate dynamically if needed', | ||
action='store_true' | ||
) | ||
add_ksize_arg(subparser, 31) | ||
add_moltype_args(subparser) | ||
add_pattern_args(subparser) | ||
add_picklist_args(subparser) | ||
|
||
|
||
def main(args): | ||
import sourmash | ||
return sourmash.sig.__main__.check(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
name2 | ||
"NOT THERE" | ||
"NC_003197.2 Salmonella enterica subsp. enterica serovar Typhimurium str. LT2, complete genome" | ||
"NC_003197.2 Salmonella enterica subsp. enterica serovar Typhimurium str. LT2, complete genome" | ||
"NC_003197.2 Salmonella enterica subsp. enterica serovar Typhimurium str. LT2, complete genome" | ||
"NC_004631.1 Salmonella enterica subsp. enterica serovar Typhi Ty2, complete genome" | ||
"NC_004631.1 Salmonella enterica subsp. enterica serovar Typhi Ty2, complete genome" | ||
"NC_004631.1 Salmonella enterica subsp. enterica serovar Typhi Ty2, complete genome" | ||
"NC_006905.1 Salmonella enterica subsp. enterica serovar Choleraesuis str. SC-B67, complete genome" | ||
"NC_006905.1 Salmonella enterica subsp. enterica serovar Choleraesuis str. SC-B67, complete genome" | ||
"NC_006905.1 Salmonella enterica subsp. enterica serovar Choleraesuis str. SC-B67, complete genome" | ||
NC_011294.1 Salmonella enterica subsp. enterica serovar Enteritidis str. P125109 complete genome | ||
NC_011294.1 Salmonella enterica subsp. enterica serovar Enteritidis str. P125109 complete genome | ||
NC_011294.1 Salmonella enterica subsp. enterica serovar Enteritidis str. P125109 complete genome | ||
NC_011274.1 Salmonella enterica subsp. enterica serovar Gallinarum str. 287/91 complete genome | ||
NC_011274.1 Salmonella enterica subsp. enterica serovar Gallinarum str. 287/91 complete genome | ||
NC_011274.1 Salmonella enterica subsp. enterica serovar Gallinarum str. 287/91 complete genome | ||
"NC_006511.1 Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC 9150, complete genome" | ||
"NC_006511.1 Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC 9150, complete genome" | ||
"NC_006511.1 Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC 9150, complete genome" | ||
"NC_011080.1 Salmonella enterica subsp. enterica serovar Newport str. SL254, complete genome" | ||
"NC_011080.1 Salmonella enterica subsp. enterica serovar Newport str. SL254, complete genome" | ||
"NC_011080.1 Salmonella enterica subsp. enterica serovar Newport str. SL254, complete genome" | ||
"NC_003198.1 Salmonella enterica subsp. enterica serovar Typhi str. CT18, complete genome" | ||
"NC_003198.1 Salmonella enterica subsp. enterica serovar Typhi str. CT18, complete genome" | ||
"NC_003198.1 Salmonella enterica subsp. enterica serovar Typhi str. CT18, complete genome" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# SOURMASH-MANIFEST-VERSION: 1.0 | ||
internal_location,md5,md5short,ksize,moltype,num,scaled,n_hashes,with_abundance,name,filename | ||
6d6e87e1154e95b279e5e7db414bc37b,6d6e87e1154e95b279e5e7db414bc37b,6d6e87e1,31,DNA,500,0,500,0,,SRR2255622_1.fastq.gz | ||
60f7e23c24a8d94791cc7a8680c493f9,60f7e23c24a8d94791cc7a8680c493f9,60f7e23c,31,DNA,500,0,500,0,,SRR2060939_1.fastq.gz | ||
0107d767a345eff67ecdaed2ee5cd7ba,0107d767a345eff67ecdaed2ee5cd7ba,0107d767,31,DNA,500,0,500,0,,SRR453566_1.fastq.gz | ||
f71e78178af9e45e6f1d87a0c53c465c,f71e78178af9e45e6f1d87a0c53c465c,f71e7817,31,DNA,500,0,500,0,,SRR2241509_1.fastq.gz | ||
f0c834bc306651d2b9321fb21d3e8d8f,f0c834bc306651d2b9321fb21d3e8d8f,f0c834bc,31,DNA,500,0,500,0,,SRR453569_1.fastq.gz | ||
4e94e60265e04f0763142e20b52c0da1,4e94e60265e04f0763142e20b52c0da1,4e94e602,31,DNA,500,0,500,0,,SRR2060939_2.fastq.gz | ||
b59473c94ff2889eca5d7165936e64b3,b59473c94ff2889eca5d7165936e64b3,b59473c9,31,DNA,500,0,500,0,,SRR453570_1.fastq.gz |
Oops, something went wrong.