Skip to content

Commit

Permalink
[MRG] create ComputeParameters.from_manifest_row in support of `sou…
Browse files Browse the repository at this point in the history
…rmash sketch fromfile` (#1886)

* add ComputeParameters.from_manifest_row

* moar

* add tests for ComputeParameters.from_manifest_row
  • Loading branch information
ctb authored Mar 19, 2022
1 parent 8f4c94c commit dd5c013
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 8 deletions.
26 changes: 26 additions & 0 deletions src/sourmash/command_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,32 @@ def __init__(self, ksizes, seed, protein, dayhoff, hp, dna, num_hashes, track_ab
self.track_abundance = track_abundance
self.scaled = scaled

@classmethod
def from_manifest_row(cls, row):
"convert a CollectionManifest row into a ComputeParameters object"
is_dna = is_protein = is_dayhoff = is_hp = False
if row['moltype'] == 'DNA':
is_dna = True
elif row['moltype'] == 'protein':
is_protein = True
elif row['moltype'] == 'hp':
is_hp = True
elif row['moltype'] == 'dayhoff':
is_dayhoff = True
else:
assert 0

if is_dna:
ksize = row['ksize']
else:
ksize = row['ksize'] * 3

p = cls([ksize], 42, is_protein, is_dayhoff, is_hp, is_dna,
row['num'], row['with_abundance'], row['scaled'])

return p


def to_param_str(self):
"Convert object to equivalent params str."
pi = []
Expand Down
10 changes: 2 additions & 8 deletions src/sourmash/command_sketch.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,8 @@ def _parse_params_str(params_str):
if len(item) < 6 or item[4] != '=':
raise ValueError("seed takes a parameter, e.g. 'seed=42'")
params['seed'] = int(item[5:])
elif item == 'protein':
moltype = 'protein'
elif item == 'dayhoff':
moltype = 'dayhoff'
elif item == 'hp':
moltype = 'hp'
elif item == 'dna':
moltype = 'dna'
elif item in ('protein', 'dayhoff', 'hp', 'dna'):
moltype = item
else:
raise ValueError(f"unknown component '{item}' in params string")

Expand Down
71 changes: 71 additions & 0 deletions tests/test_sourmash_sketch.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,77 @@ def test_compute_parameters_to_param_str(input_param_str, expected_output):
expected_output)


def test_manifest_row_to_compute_parameters_1():
# test ComputeParameters.from_manifest_row with moltype 'DNA'
row = dict(moltype='DNA',
ksize=21,
num=0, scaled=1000,
with_abundance=1)
p = ComputeParameters.from_manifest_row(row)
assert p.dna
assert not p.protein
assert not p.dayhoff
assert not p.hp
assert p.num_hashes == 0
assert p.scaled == 1000
assert p.ksizes == [21]
assert p.track_abundance
assert p.seed == 42


def test_manifest_row_to_compute_parameters_2():
# test ComputeParameters.from_manifest_row with moltype 'protein'
row = dict(moltype='protein',
ksize=10,
num=0, scaled=200,
with_abundance=1)
p = ComputeParameters.from_manifest_row(row)
assert not p.dna
assert p.protein
assert not p.dayhoff
assert not p.hp
assert p.num_hashes == 0
assert p.scaled == 200
assert p.ksizes == [30]
assert p.track_abundance
assert p.seed == 42


def test_manifest_row_to_compute_parameters_3():
# test ComputeParameters.from_manifest_row with moltype 'dayhoff'
row = dict(moltype='dayhoff',
ksize=12,
num=0, scaled=200,
with_abundance=0)
p = ComputeParameters.from_manifest_row(row)
assert not p.dna
assert not p.protein
assert p.dayhoff
assert not p.hp
assert p.num_hashes == 0
assert p.scaled == 200
assert p.ksizes == [36]
assert not p.track_abundance
assert p.seed == 42


def test_manifest_row_to_compute_parameters_4():
# test ComputeParameters.from_manifest_row with moltype 'hp'
row = dict(moltype='hp',
ksize=32,
num=0, scaled=200,
with_abundance=0)
p = ComputeParameters.from_manifest_row(row)
assert not p.dna
assert not p.protein
assert not p.dayhoff
assert p.hp
assert p.num_hashes == 0
assert p.scaled == 200
assert p.ksizes == [96]
assert not p.track_abundance
assert p.seed == 42

### command line tests


Expand Down

0 comments on commit dd5c013

Please sign in to comment.