From a133e68b8e4d98cfe952923c3b741aabe132f5f7 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 4 Jun 2024 11:52:10 -0700 Subject: [PATCH] MRG: add `--set-name` to `sig intersect` and `sig subtract` (#3162) **Note:** PR into #3161 This PR cleans up and regularizes naming output on `sig` subcommands. Specifically, it: - [x] adds `--set-name` to `sig intersect` and `sig subtract` to set names on output signatures. - [x] confirms and tests default names for output from `sig inflate`, `sig filter`, `sig downsample`, and `sig flatten` - [x] updates documentation appropriately - [x] aliases `--name` to `--set-name` on `sig merge`, and nominates `--name` for deprecation and removal on v5 - [x] nominates `--name` on `sig extract`, `sig filter`, and `sig flatten` for deprecation and removal on v5 - [x] highlights `--name` on `sketch dna`, `sketch protein`, and `sketch translate` - ref https://github.com/sourmash-bio/sourmash/issues/3152 Fixes https://github.com/sourmash-bio/sourmash/issues/1801 Fixes https://github.com/sourmash-bio/sourmash/issues/3152 Related issues: * https://github.com/sourmash-bio/sourmash/issues/3173 --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- doc/command-line.md | 6 ++++ src/sourmash/cli/sig/intersect.py | 1 + src/sourmash/cli/sig/merge.py | 2 +- src/sourmash/cli/sig/subtract.py | 1 + src/sourmash/cli/sketch/dna.py | 7 ++-- src/sourmash/cli/sketch/protein.py | 7 ++-- src/sourmash/cli/sketch/translate.py | 7 ++-- src/sourmash/sig/__main__.py | 7 ++-- tests/test_cmd_signature.py | 53 +++++++++++++++++++++++++--- 9 files changed, 77 insertions(+), 14 deletions(-) diff --git a/doc/command-line.md b/doc/command-line.md index 90633d342e..63ee87d368 100644 --- a/doc/command-line.md +++ b/doc/command-line.md @@ -1625,6 +1625,8 @@ k-mer sizes or molecule types present in any of the signature files, you will need to choose one k-mer size with `-k/--ksize`, and/or one moltype with `--dna/--protein/--hp/--dayhoff`. +Use `--set-name ` to set the name of the output sketch. + Note: `merge` only creates one output file, with one signature in it. ### `sourmash signature rename` - rename a signature @@ -1661,6 +1663,8 @@ k-mer sizes or molecule types present in any of the signature files, you will need to choose one k-mer size with `-k/--ksize`, and/or one moltype with `--dna/--protein/--hp/--dayhoff`. +Use `--set-name ` to set the name of the output sketch. + Note: `subtract` only creates one output file, with one signature in it. ### `sourmash signature intersect` - intersect two (or more) signatures @@ -1686,6 +1690,8 @@ k-mer sizes or molecule types present in any of the signature files, you will need to choose one k-mer size with `-k/--ksize`, and/or one moltype with `--dna/--protein/--hp/--dayhoff`. +Use `--set-name ` to set the name of the output sketch(es). + ### `sourmash signature inflate` - transfer abundances from one signature to others Use abundances from one signature to provide abundances on other signatures. diff --git a/src/sourmash/cli/sig/intersect.py b/src/sourmash/cli/sig/intersect.py index 521e83f10f..b0c100fc9d 100644 --- a/src/sourmash/cli/sig/intersect.py +++ b/src/sourmash/cli/sig/intersect.py @@ -44,6 +44,7 @@ def subparser(subparsers): metavar="FILE", help="intersect with & take abundances from this signature", ) + subparser.add_argument("--set-name", help="set name for output signature") subparser.add_argument( "-f", "--force", action="store_true", help="try to load all files as signatures" ) diff --git a/src/sourmash/cli/sig/merge.py b/src/sourmash/cli/sig/merge.py index 026749a5f0..cd8bbf1520 100644 --- a/src/sourmash/cli/sig/merge.py +++ b/src/sourmash/cli/sig/merge.py @@ -43,7 +43,7 @@ def subparser(subparsers): subparser.add_argument( "--flatten", action="store_true", help="remove abundances from all signatures" ) - subparser.add_argument("--name", help="rename merged signature") + subparser.add_argument("--set-name", "--name", help="rename merged signature") subparser.add_argument( "-f", "--force", action="store_true", help="try to load all files as signatures" ) diff --git a/src/sourmash/cli/sig/subtract.py b/src/sourmash/cli/sig/subtract.py index 69a349ace3..3a41bb038c 100644 --- a/src/sourmash/cli/sig/subtract.py +++ b/src/sourmash/cli/sig/subtract.py @@ -50,6 +50,7 @@ def subparser(subparsers): metavar="FILE", help="intersect with & take abundances from this signature", ) + subparser.add_argument("--set-name", help="set name for output signature") add_ksize_arg(subparser) add_moltype_args(subparser) diff --git a/src/sourmash/cli/sketch/dna.py b/src/sourmash/cli/sketch/dna.py index 19f6de7509..aa53b7bfa9 100644 --- a/src/sourmash/cli/sketch/dna.py +++ b/src/sourmash/cli/sketch/dna.py @@ -67,12 +67,15 @@ def subparser(subparsers): "-o", "--output", help="output computed signatures to this file" ) file_args.add_argument( - "--merge", + "--set-name", "--name", + "--merge", + dest="merge", type=str, default="", metavar="FILE", - help="merge all input files into one signature file with the " "specified name", + help="name the output sketch as specified; note, merges all input " + "files while sketching", ) file_args.add_argument( "--output-dir", diff --git a/src/sourmash/cli/sketch/protein.py b/src/sourmash/cli/sketch/protein.py index 3092d35367..e74c729b2e 100644 --- a/src/sourmash/cli/sketch/protein.py +++ b/src/sourmash/cli/sketch/protein.py @@ -61,12 +61,15 @@ def subparser(subparsers): "--from-file", help="a text file containing a list of sequence files to load" ) file_args.add_argument( - "--merge", + "--set-name", "--name", + "--merge", + dest="merge", type=str, default="", metavar="FILE", - help="merge all input files into one signature file with the " "specified name", + help="name the output sketch as specified; note, merges all input " + "files while sketching", ) file_args.add_argument( "--output-dir", diff --git a/src/sourmash/cli/sketch/translate.py b/src/sourmash/cli/sketch/translate.py index f5bccab46f..2d0f25036f 100644 --- a/src/sourmash/cli/sketch/translate.py +++ b/src/sourmash/cli/sketch/translate.py @@ -67,12 +67,15 @@ def subparser(subparsers): "--from-file", help="a text file containing a list of sequence files to load" ) file_args.add_argument( - "--merge", + "--set-name", "--name", + "--merge", + dest="merge", type=str, default="", metavar="FILE", - help="merge all input files into one signature file with the " "specified name", + help="name the output sketch as specified; note, merges all input " + "files while sketching", ) file_args.add_argument( "--output-dir", diff --git a/src/sourmash/sig/__main__.py b/src/sourmash/sig/__main__.py index 3ec03ed9e0..3dbd86c0a8 100644 --- a/src/sourmash/sig/__main__.py +++ b/src/sourmash/sig/__main__.py @@ -512,7 +512,7 @@ def merge(args): error("no signatures to merge!?") sys.exit(-1) - merged_sigobj = sourmash.SourmashSignature(mh, name=args.name) + merged_sigobj = sourmash.SourmashSignature(mh, name=args.set_name) with sourmash_args.SaveSignaturesToLocation(args.output) as save_sigs: save_sigs.add(merged_sigobj) @@ -582,7 +582,8 @@ def intersect(args): intersect_mh = intersect_mh.inflate(abund_sig.minhash) - intersect_sigobj = sourmash.SourmashSignature(intersect_mh) + intersect_sigobj = sourmash.SourmashSignature(intersect_mh, name=args.set_name) + with sourmash_args.SaveSignaturesToLocation(args.output) as save_sigs: save_sigs.add(intersect_sigobj) @@ -703,7 +704,7 @@ def subtract(args): subtract_mh = subtract_mh.inflate(abund_sig.minhash) - subtract_sigobj = sourmash.SourmashSignature(subtract_mh) + subtract_sigobj = sourmash.SourmashSignature(subtract_mh, name=args.set_name) with sourmash_args.SaveSignaturesToLocation(args.output) as save_sigs: save_sigs.add(subtract_sigobj) diff --git a/tests/test_cmd_signature.py b/tests/test_cmd_signature.py index 432881b3b9..6041befbba 100644 --- a/tests/test_cmd_signature.py +++ b/tests/test_cmd_signature.py @@ -332,11 +332,13 @@ def test_sig_filter_1(c): assert len(filtered_sigs) == 2 - mh47 = load_one_signature_from_json(sig47).minhash - mh63 = load_one_signature_from_json(sig63).minhash + ss47 = load_one_signature_from_json(sig47) + ss63 = load_one_signature_from_json(sig63) - assert filtered_sigs[0].minhash == mh47 - assert filtered_sigs[1].minhash == mh63 + assert filtered_sigs[0].minhash == ss47.minhash + assert filtered_sigs[0].name == ss47.name + assert filtered_sigs[1].minhash == ss63.minhash + assert filtered_sigs[1].name == ss63.name @utils.in_tempdir @@ -473,6 +475,27 @@ def test_sig_intersect_1(runtmp): assert actual_intersect_sig.minhash == test_intersect_sig.minhash +def test_sig_intersect_1_rename(runtmp): + # intersect of 47 and 63 should be intersection of mins + sig47 = utils.get_test_data("47.fa.sig") + sig63 = utils.get_test_data("63.fa.sig") + sig47and63 = utils.get_test_data("47+63-intersect.fa.sig") + runtmp.run_sourmash("sig", "intersect", sig47, sig63, "--set-name", "footest") + + # stdout should be new signature + out = runtmp.last_result.out + + test_intersect_sig = load_one_signature_from_json(sig47and63) + actual_intersect_sig = load_one_signature_from_json(out) + + print(test_intersect_sig.minhash) + print(actual_intersect_sig.minhash) + print(out) + + assert actual_intersect_sig.minhash == test_intersect_sig.minhash + assert actual_intersect_sig.name == "footest" + + def test_sig_intersect_1_fromfile_picklist(runtmp): c = runtmp @@ -793,6 +816,26 @@ def test_sig_subtract_1(runtmp): assert set(actual_subtract_sig.minhash.hashes.keys()) == set(mins) +def test_sig_subtract_1_name(runtmp): + # subtract of 63 from 47; rename + sig47 = utils.get_test_data("47.fa.sig") + sig63 = utils.get_test_data("63.fa.sig") + runtmp.run_sourmash("sig", "subtract", sig47, sig63, "--set-name", "footest") + + # stdout should be new signature + out = runtmp.last_result.out + + test1_sig = load_one_signature_from_json(sig47) + test2_sig = load_one_signature_from_json(sig63) + actual_subtract_sig = load_one_signature_from_json(out) + + mins = set(test1_sig.minhash.hashes.keys()) + mins -= set(test2_sig.minhash.hashes.keys()) + + assert set(actual_subtract_sig.minhash.hashes.keys()) == set(mins) + assert actual_subtract_sig.name == "footest" + + def test_sig_subtract_1_sigzip(runtmp): c = runtmp # subtract of 63 from 47 @@ -3032,6 +3075,7 @@ def test_sig_flatten_1(runtmp): test_flattened = load_one_signature_from_json(sig47) assert test_flattened.minhash == siglist[0].minhash + assert test_flattened.name == siglist[0].name def test_sig_flatten_1_from_file(runtmp): @@ -3136,6 +3180,7 @@ def test_sig_downsample_1_scaled(c): test_mh = test_downsample_sig.minhash.downsample(scaled=10000) assert actual_downsample_sig.minhash == test_mh + assert actual_downsample_sig.name == test_downsample_sig.name @utils.in_tempdir