Skip to content

Commit

Permalink
MRG: add --set-name to sig intersect and sig subtract (#3162)
Browse files Browse the repository at this point in the history
**Note:** PR into #3161 

This PR cleans up and regularizes naming output on `sig` subcommands.
Specifically, it:

- [x] adds `--set-name` to `sig intersect` and `sig subtract` to set
names on output signatures.
- [x] confirms and tests default names for output from `sig inflate`,
`sig filter`, `sig downsample`, and `sig flatten`
- [x] updates documentation appropriately
- [x] aliases `--name` to `--set-name` on `sig merge`, and nominates
`--name` for deprecation and removal on v5
- [x] nominates `--name` on `sig extract`, `sig filter`, and `sig
flatten` for deprecation and removal on v5
- [x] highlights `--name` on `sketch dna`, `sketch protein`, and `sketch
translate` - ref #3152

Fixes #1801
Fixes #3152

Related issues:
* #3173

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
ctb and pre-commit-ci[bot] committed Jun 4, 2024
1 parent 9283dc4 commit a133e68
Show file tree
Hide file tree
Showing 9 changed files with 77 additions and 14 deletions.
6 changes: 6 additions & 0 deletions doc/command-line.md
Original file line number Diff line number Diff line change
Expand Up @@ -1625,6 +1625,8 @@ k-mer sizes or molecule types present in any of the signature files,
you will need to choose one k-mer size with `-k/--ksize`, and/or one
moltype with `--dna/--protein/--hp/--dayhoff`.

Use `--set-name <name>` to set the name of the output sketch.

Note: `merge` only creates one output file, with one signature in it.

### `sourmash signature rename` - rename a signature
Expand Down Expand Up @@ -1661,6 +1663,8 @@ k-mer sizes or molecule types present in any of the signature files,
you will need to choose one k-mer size with `-k/--ksize`, and/or one
moltype with `--dna/--protein/--hp/--dayhoff`.

Use `--set-name <name>` to set the name of the output sketch.

Note: `subtract` only creates one output file, with one signature in it.

### `sourmash signature intersect` - intersect two (or more) signatures
Expand All @@ -1686,6 +1690,8 @@ k-mer sizes or molecule types present in any of the signature files,
you will need to choose one k-mer size with `-k/--ksize`, and/or one
moltype with `--dna/--protein/--hp/--dayhoff`.

Use `--set-name <name>` to set the name of the output sketch(es).

### `sourmash signature inflate` - transfer abundances from one signature to others

Use abundances from one signature to provide abundances on other signatures.
Expand Down
1 change: 1 addition & 0 deletions src/sourmash/cli/sig/intersect.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def subparser(subparsers):
metavar="FILE",
help="intersect with & take abundances from this signature",
)
subparser.add_argument("--set-name", help="set name for output signature")
subparser.add_argument(
"-f", "--force", action="store_true", help="try to load all files as signatures"
)
Expand Down
2 changes: 1 addition & 1 deletion src/sourmash/cli/sig/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def subparser(subparsers):
subparser.add_argument(
"--flatten", action="store_true", help="remove abundances from all signatures"
)
subparser.add_argument("--name", help="rename merged signature")
subparser.add_argument("--set-name", "--name", help="rename merged signature")
subparser.add_argument(
"-f", "--force", action="store_true", help="try to load all files as signatures"
)
Expand Down
1 change: 1 addition & 0 deletions src/sourmash/cli/sig/subtract.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def subparser(subparsers):
metavar="FILE",
help="intersect with & take abundances from this signature",
)
subparser.add_argument("--set-name", help="set name for output signature")
add_ksize_arg(subparser)
add_moltype_args(subparser)

Expand Down
7 changes: 5 additions & 2 deletions src/sourmash/cli/sketch/dna.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,15 @@ def subparser(subparsers):
"-o", "--output", help="output computed signatures to this file"
)
file_args.add_argument(
"--merge",
"--set-name",
"--name",
"--merge",
dest="merge",
type=str,
default="",
metavar="FILE",
help="merge all input files into one signature file with the " "specified name",
help="name the output sketch as specified; note, merges all input "
"files while sketching",
)
file_args.add_argument(
"--output-dir",
Expand Down
7 changes: 5 additions & 2 deletions src/sourmash/cli/sketch/protein.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,15 @@ def subparser(subparsers):
"--from-file", help="a text file containing a list of sequence files to load"
)
file_args.add_argument(
"--merge",
"--set-name",
"--name",
"--merge",
dest="merge",
type=str,
default="",
metavar="FILE",
help="merge all input files into one signature file with the " "specified name",
help="name the output sketch as specified; note, merges all input "
"files while sketching",
)
file_args.add_argument(
"--output-dir",
Expand Down
7 changes: 5 additions & 2 deletions src/sourmash/cli/sketch/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,15 @@ def subparser(subparsers):
"--from-file", help="a text file containing a list of sequence files to load"
)
file_args.add_argument(
"--merge",
"--set-name",
"--name",
"--merge",
dest="merge",
type=str,
default="",
metavar="FILE",
help="merge all input files into one signature file with the " "specified name",
help="name the output sketch as specified; note, merges all input "
"files while sketching",
)
file_args.add_argument(
"--output-dir",
Expand Down
7 changes: 4 additions & 3 deletions src/sourmash/sig/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ def merge(args):
error("no signatures to merge!?")
sys.exit(-1)

merged_sigobj = sourmash.SourmashSignature(mh, name=args.name)
merged_sigobj = sourmash.SourmashSignature(mh, name=args.set_name)

with sourmash_args.SaveSignaturesToLocation(args.output) as save_sigs:
save_sigs.add(merged_sigobj)
Expand Down Expand Up @@ -582,7 +582,8 @@ def intersect(args):

intersect_mh = intersect_mh.inflate(abund_sig.minhash)

intersect_sigobj = sourmash.SourmashSignature(intersect_mh)
intersect_sigobj = sourmash.SourmashSignature(intersect_mh, name=args.set_name)

with sourmash_args.SaveSignaturesToLocation(args.output) as save_sigs:
save_sigs.add(intersect_sigobj)

Expand Down Expand Up @@ -703,7 +704,7 @@ def subtract(args):

subtract_mh = subtract_mh.inflate(abund_sig.minhash)

subtract_sigobj = sourmash.SourmashSignature(subtract_mh)
subtract_sigobj = sourmash.SourmashSignature(subtract_mh, name=args.set_name)

with sourmash_args.SaveSignaturesToLocation(args.output) as save_sigs:
save_sigs.add(subtract_sigobj)
Expand Down
53 changes: 49 additions & 4 deletions tests/test_cmd_signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,11 +332,13 @@ def test_sig_filter_1(c):

assert len(filtered_sigs) == 2

mh47 = load_one_signature_from_json(sig47).minhash
mh63 = load_one_signature_from_json(sig63).minhash
ss47 = load_one_signature_from_json(sig47)
ss63 = load_one_signature_from_json(sig63)

assert filtered_sigs[0].minhash == mh47
assert filtered_sigs[1].minhash == mh63
assert filtered_sigs[0].minhash == ss47.minhash
assert filtered_sigs[0].name == ss47.name
assert filtered_sigs[1].minhash == ss63.minhash
assert filtered_sigs[1].name == ss63.name


@utils.in_tempdir
Expand Down Expand Up @@ -473,6 +475,27 @@ def test_sig_intersect_1(runtmp):
assert actual_intersect_sig.minhash == test_intersect_sig.minhash


def test_sig_intersect_1_rename(runtmp):
# intersect of 47 and 63 should be intersection of mins
sig47 = utils.get_test_data("47.fa.sig")
sig63 = utils.get_test_data("63.fa.sig")
sig47and63 = utils.get_test_data("47+63-intersect.fa.sig")
runtmp.run_sourmash("sig", "intersect", sig47, sig63, "--set-name", "footest")

# stdout should be new signature
out = runtmp.last_result.out

test_intersect_sig = load_one_signature_from_json(sig47and63)
actual_intersect_sig = load_one_signature_from_json(out)

print(test_intersect_sig.minhash)
print(actual_intersect_sig.minhash)
print(out)

assert actual_intersect_sig.minhash == test_intersect_sig.minhash
assert actual_intersect_sig.name == "footest"


def test_sig_intersect_1_fromfile_picklist(runtmp):
c = runtmp

Expand Down Expand Up @@ -793,6 +816,26 @@ def test_sig_subtract_1(runtmp):
assert set(actual_subtract_sig.minhash.hashes.keys()) == set(mins)


def test_sig_subtract_1_name(runtmp):
# subtract of 63 from 47; rename
sig47 = utils.get_test_data("47.fa.sig")
sig63 = utils.get_test_data("63.fa.sig")
runtmp.run_sourmash("sig", "subtract", sig47, sig63, "--set-name", "footest")

# stdout should be new signature
out = runtmp.last_result.out

test1_sig = load_one_signature_from_json(sig47)
test2_sig = load_one_signature_from_json(sig63)
actual_subtract_sig = load_one_signature_from_json(out)

mins = set(test1_sig.minhash.hashes.keys())
mins -= set(test2_sig.minhash.hashes.keys())

assert set(actual_subtract_sig.minhash.hashes.keys()) == set(mins)
assert actual_subtract_sig.name == "footest"


def test_sig_subtract_1_sigzip(runtmp):
c = runtmp
# subtract of 63 from 47
Expand Down Expand Up @@ -3032,6 +3075,7 @@ def test_sig_flatten_1(runtmp):

test_flattened = load_one_signature_from_json(sig47)
assert test_flattened.minhash == siglist[0].minhash
assert test_flattened.name == siglist[0].name


def test_sig_flatten_1_from_file(runtmp):
Expand Down Expand Up @@ -3136,6 +3180,7 @@ def test_sig_downsample_1_scaled(c):
test_mh = test_downsample_sig.minhash.downsample(scaled=10000)

assert actual_downsample_sig.minhash == test_mh
assert actual_downsample_sig.name == test_downsample_sig.name


@utils.in_tempdir
Expand Down

0 comments on commit a133e68

Please sign in to comment.