sourmash-bio · ctb · Aug 12, 2020 · Aug 8, 2020 · Aug 8, 2020 · Aug 9, 2020
diff --git a/doc/api-example.md b/doc/api-example.md
@@ -162,8 +162,8 @@ First, load two signatures:
 Then, get the hashes, and (e.g.) compute the union:
 
 ```
->>> hashes1 = set(sig1.minhash.get_mins())
->>> hashes2 = set(sig2.minhash.get_mins())
+>>> hashes1 = set(sig1.minhash.hashes.keys())
+>>> hashes2 = set(sig2.minhash.hashes.keys())
 >>> hash_union = hashes1.union(hashes2)
 >>> print('{} hashes in union of {} and {}'.format(len(hash_union), len(hashes1), len(hashes2)))
 1000 hashes in union of 500 and 500
@@ -242,7 +242,7 @@ sections.
 MinHash objects have the following methods and attributes:
 
 * `ksize`, `num`, and `scaled` - the basic parameters used to create a MinHash object.
-* `get_mins()` - retrieve all of the hashes contained in this object.
+* `hashes` - retrieve all of the hashes contained in this object.
 * `add_sequence(seq)` - hash sequence and add hash values.
 * `add(hash)` and `add_many(hashvals)` - add hash values directly.
 * `similarity(other)` - calculate Jaccard similarity with the other MinHash object.
@@ -279,7 +279,7 @@ We can downsample this to 500 by extracting the hashes and using
 `add_many` to add them to a new MinHash like so:
 
 ```
->>> hashvals = larger.get_mins()
+>>> hashvals = larger.hashes.keys()
 >>> smaller = sourmash.MinHash(n=500, ksize=31)
 >>> smaller.add_many(hashvals)
 >>> len(smaller)
@@ -304,7 +304,7 @@ The same can be done with scaled MinHashes:
 >>> len(large_scaled)
 459
 >>> small_scaled = sourmash.MinHash(n=0, ksize=31, scaled=500)
->>> small_scaled.add_many(large_scaled.get_mins())
+>>> small_scaled.add_many(large_scaled.hashes.keys())
 >>> len(small_scaled)
 69
 
@@ -341,7 +341,7 @@ your MinHash, and then extract the hash values:
 ```
 >>> num_mh = sourmash.MinHash(n=1000, ksize=31)
 >>> num_mh.add_sequence(sequence)
->>> hashvals = num_mh.get_mins()
+>>> hashvals = num_mh.hashes.keys()
 
 ```
 
@@ -359,7 +359,7 @@ The same works in reverse, of course:
 ```
 >>> scaled_mh = sourmash.MinHash(n=0, ksize=31, scaled=50)
 >>> scaled_mh.add_sequence(sequence)
->>> hashvals = scaled_mh.get_mins()
+>>> hashvals = scaled_mh.hashes.keys()
 >>> num_mh = sourmash.MinHash(n=500, ksize=31)
 >>> num_mh.add_many(hashvals)
 

diff --git a/sourmash/minhash.py b/sourmash/minhash.py
@@ -3,7 +3,7 @@
 
 import math
 import copy
-import collections
+from collections.abc import Mapping
 
 from . import VERSION
 from ._lowlevel import ffi, lib
@@ -82,7 +82,7 @@ def translate_codon(codon):
         raise ValueError(e.message)
 
 
-class _HashesWrapper(collections.Mapping):
+class _HashesWrapper(Mapping):
     "A read-only view of the hashes contained by a MinHash object."
     def __init__(self, h):
         self._data = h
@@ -214,7 +214,7 @@ def __getstate__(self):
             self.is_protein,
             self.dayhoff,
             self.hp,
-            self.get_mins(with_abundance=self.track_abundance),
+            self.hashes,
             None,
             self.track_abundance,
             self.max_hash,
@@ -290,39 +290,41 @@ def get_mins(self, with_abundance=False):
         """Return list of hashes or if ``with_abundance`` a list
         of (hash, abund).
         """
+        mins = self.hashes
+        if not with_abundance:
+            return mins.keys()
+        return mins
+
+
+    @deprecated(deprecated_in="3.5", removed_in="5.0",
+                current_version=VERSION,
+                details='Use .hashes property instead.')
+    def get_hashes(self):
+        "Return the list of hashes."
+        return self.hashes.keys()
+
+    @property
+    def hashes(self):
         size = ffi.new("uintptr_t *")
         mins_ptr = self._methodcall(lib.kmerminhash_get_mins, size)
         size = size[0]
 
         try:
-            if with_abundance and self.track_abundance:
+            if self.track_abundance:
                 size_abunds = ffi.new("uintptr_t *")
                 abunds_ptr = self._methodcall(lib.kmerminhash_get_abunds, size_abunds)
                 size_abunds = size_abunds[0]
                 assert size == size_abunds
                 result = dict(zip(ffi.unpack(mins_ptr, size), ffi.unpack(abunds_ptr, size)))
                 lib.kmerminhash_slice_free(abunds_ptr, size)
+                return _HashesWrapper(result)
             else:
-                result = ffi.unpack(mins_ptr, size)
+                d = ffi.unpack(mins_ptr, size)
+                return _HashesWrapper({ k : 1 for k in d })
+
         finally:
             lib.kmerminhash_slice_free(mins_ptr, size)
 
-        return result
-
-    @deprecated(deprecated_in="3.5", removed_in="5.0",
-                current_version=VERSION,
-                details='Use .hashes property instead.')
-    def get_hashes(self):
-        "Return the list of hashes."
-        return self.get_mins()
-
-    @property
-    def hashes(self):
-        if self.track_abundance:
-            return _HashesWrapper(self.get_mins(with_abundance=True))
-        else:
-            d = self.get_mins()
-            return _HashesWrapper({ k : 1 for k in d })
 
     @property
     def seed(self):
@@ -446,7 +448,7 @@ def downsample(self, num=None, scaled=None):
         )
         # copy over hashes:
         if self.track_abundance:
-            a.set_abundances(self.get_mins(with_abundance=True))
+            a.set_abundances(self.hashes)
         else:
             a.add_many(self)
 

diff --git a/sourmash/search.py b/sourmash/search.py
@@ -118,7 +118,7 @@ def gather_databases(query, databases, threshold_bp, ignore_abundance):
     # track original query information for later usage.
     track_abundance = query.minhash.track_abundance and not ignore_abundance
     orig_query_mh = query.minhash
-    orig_query_mins = orig_query_mh.get_hashes()
+    orig_query_mins = orig_query_mh.hashes.keys()
 
     # do we pay attention to abundances?
     orig_query_abunds = { k: 1 for k in orig_query_mins }
@@ -137,8 +137,8 @@ def gather_databases(query, databases, threshold_bp, ignore_abundance):
             break
 
         # subtract found hashes from search hashes, construct new search
-        query_mins = set(query.minhash.get_hashes())
-        found_mins = best_match.minhash.get_hashes()
+        query_mins = set(query.minhash.hashes.keys())
+        found_mins = best_match.minhash.hashes.keys()
 
         # Is the best match computed with scaled? Die if not.
         match_scaled = best_match.minhash.scaled

diff --git a/tests/test__minhash.py b/tests/test__minhash.py
@@ -1459,12 +1459,13 @@ def test_get_mins_deprecated(track_abundance):
     mh.add_many(mins)
     mh.add_many(mins)
 
-    assert set(mh.get_mins()) == set(mins)
-    if track_abundance:
-        d = mh.get_mins(with_abundance=True)
-        for k in mins:
-            assert d[k] == 4
-        assert len(d) == len(mins)
+    with pytest.warns(DeprecationWarning):
+        assert set(mh.get_mins()) == set(mins)
+        if track_abundance:
+            d = mh.get_mins(with_abundance=True)
+            for k in mins:
+                assert d[k] == 4
+            assert len(d) == len(mins)
 
 
 def test_get_hashes_deprecated(track_abundance):
@@ -1476,7 +1477,8 @@ def test_get_hashes_deprecated(track_abundance):
     mh.add_many(mins)
     mh.add_many(mins)
 
-    assert set(mh.get_hashes()) == set(mins)
+    with pytest.warns(DeprecationWarning):
+        assert set(mh.get_hashes()) == set(mins)
 
 
 def test_downsample_num(track_abundance):

diff --git a/tests/test__minhash_hypothesis.py b/tests/test__minhash_hypothesis.py
@@ -17,7 +17,7 @@ def test_set_abundance_num_hypothesis(hashes, abundances, sketch_size):
 
     a.set_abundances(oracle)
 
-    mins = a.get_mins(with_abundance=True)
+    mins = a.hashes
     size = min(sum(1 for v in oracle.values() if v > 0), sketch_size)
     assert len(mins) == size
 
@@ -38,7 +38,7 @@ def test_set_abundance_scaled_hypothesis(hashes, abundances, scaled):
     max_hash = _get_max_hash_for_scaled(scaled)
     below_max_hash = sum(1 for (k, v) in oracle.items() if k <= max_hash and v > 0)
 
-    mins = a.get_mins(with_abundance=True)
+    mins = a.hashes
     assert len(mins) == below_max_hash
 
     for k, v in mins.items():

diff --git a/tests/test_cmd_signature.py b/tests/test_cmd_signature.py
@@ -209,11 +209,11 @@ def test_sig_filter_2(c):
     filtered_sig = sourmash.load_one_signature(out)
     test_sig = sourmash.load_one_signature(sig47)
 
-    abunds = test_sig.minhash.get_mins(True)
+    abunds = test_sig.minhash.hashes
     abunds = { k: v for (k, v) in abunds.items() if v >= 2 and v <= 5 }
     assert abunds
 
-    assert filtered_sig.minhash.get_mins(True) == abunds
+    assert filtered_sig.minhash.hashes == abunds
 
 
 @utils.in_tempdir
@@ -228,11 +228,11 @@ def test_sig_filter_3(c):
     filtered_sig = sourmash.load_one_signature(out)
     test_sig = sourmash.load_one_signature(sig47)
 
-    abunds = test_sig.minhash.get_mins(True)
+    abunds = test_sig.minhash.hashes
     abunds = { k: v for (k, v) in abunds.items() if v >= 2 }
     assert abunds
 
-    assert filtered_sig.minhash.get_mins(True) == abunds
+    assert filtered_sig.minhash.hashes == abunds
 
 
 @utils.in_tempdir
@@ -247,11 +247,11 @@ def test_sig_filter_3_ksize_select(c):
     filtered_sig = sourmash.load_one_signature(out)
     test_sig = sourmash.load_one_signature(psw_mag, ksize=31)
 
-    abunds = test_sig.minhash.get_mins(True)
+    abunds = test_sig.minhash.hashes
     abunds = { k: v for (k, v) in abunds.items() if v >= 2 }
     assert abunds
 
-    assert filtered_sig.minhash.get_mins(True) == abunds
+    assert filtered_sig.minhash.hashes == abunds
 
 
 @utils.in_tempdir
@@ -356,8 +356,8 @@ def test_sig_intersect_3(c):
     # actually do an intersection ourselves for the test
     mh47 = sourmash.load_one_signature(sig47).minhash
     mh63 = sourmash.load_one_signature(sig63).minhash
-    mh47_abunds = mh47.get_mins(with_abundance=True)
-    mh63_mins = set(mh63.get_mins())
+    mh47_abunds = mh47.hashes
+    mh63_mins = set(mh63.hashes.keys())
 
     # get the set of mins that are in common
     mh63_mins.intersection_update(mh47_abunds)
@@ -388,8 +388,8 @@ def test_sig_intersect_4(c):
     # actually do an intersection ourselves for the test
     mh47 = sourmash.load_one_signature(sig47).minhash
     mh63 = sourmash.load_one_signature(sig63).minhash
-    mh47_abunds = mh47.get_mins(with_abundance=True)
-    mh63_mins = set(mh63.get_mins())
+    mh47_abunds = mh47.hashes
+    mh63_mins = set(mh63.hashes.keys())
 
     # get the set of mins that are in common
     mh63_mins.intersection_update(mh47_abunds)
@@ -486,10 +486,10 @@ def test_sig_subtract_1(c):
     test2_sig = sourmash.load_one_signature(sig63)
     actual_subtract_sig = sourmash.load_one_signature(out)
 
-    mins = set(test1_sig.minhash.get_mins())
-    mins -= set(test2_sig.minhash.get_mins())
+    mins = set(test1_sig.minhash.hashes.keys())
+    mins -= set(test2_sig.minhash.hashes.keys())
 
-    assert set(actual_subtract_sig.minhash.get_mins()) == set(mins)
+    assert set(actual_subtract_sig.minhash.hashes.keys()) == set(mins)
 
 
 @utils.in_tempdir
@@ -504,7 +504,7 @@ def test_sig_subtract_1_multisig(c):
 
     actual_subtract_sig = sourmash.load_one_signature(out)
 
-    assert not set(actual_subtract_sig.minhash.get_mins())
+    assert not set(actual_subtract_sig.minhash.hashes.keys())
 
 
 @utils.in_tempdir
@@ -1067,12 +1067,12 @@ def test_sig_downsample_1_scaled_to_num(c):
     out = c.last_result.out
 
     actual_downsample_sig = sourmash.load_one_signature(out)
-    actual_mins = actual_downsample_sig.minhash.get_mins()
+    actual_mins = actual_downsample_sig.minhash.hashes.keys()
     actual_mins = list(actual_mins)
     actual_mins.sort()
 
     test_downsample_sig = sourmash.load_one_signature(sig47)
-    test_mins = test_downsample_sig.minhash.get_mins()
+    test_mins = test_downsample_sig.minhash.hashes.keys()
     test_mins = list(test_mins)
     test_mins.sort()
     test_mins = test_mins[:500]           # take 500 smallest
@@ -1130,8 +1130,8 @@ def test_sig_downsample_2_num_to_scaled(c):
                                                       select_moltype='DNA')
     actual_downsample_sig = sourmash.load_one_signature(out)
 
-    test_mins = test_downsample_sig.minhash.get_mins()
-    actual_mins = actual_downsample_sig.minhash.get_mins()
+    test_mins = test_downsample_sig.minhash.hashes.keys()
+    actual_mins = actual_downsample_sig.minhash.hashes.keys()
 
     # select those mins that are beneath the new max hash...
     max_hash = actual_downsample_sig.minhash.max_hash

diff --git a/tests/test_index.py b/tests/test_index.py
@@ -234,7 +234,7 @@ def test_linear_gather_threshold_1():
     # now construct query signatures with specific numbers of hashes --
     # note, these signatures all have scaled=1000.
 
-    mins = list(sorted(sig2.minhash.get_mins()))
+    mins = list(sorted(sig2.minhash.hashes.keys()))
     new_mh = sig2.minhash.copy_and_clear()
 
     # query with empty hashes
@@ -289,7 +289,7 @@ def test_linear_gather_threshold_5():
     # now construct query signatures with specific numbers of hashes --
     # note, these signatures all have scaled=1000.
 
-    mins = list(sorted(sig2.minhash.get_mins()))
+    mins = list(sorted(sig2.minhash.hashes.keys()))
     new_mh = sig2.minhash.copy_and_clear()
 
     # add five hashes

diff --git a/tests/test_jaccard.py b/tests/test_jaccard.py
@@ -76,10 +76,10 @@ def test_dna_mh(track_abundance):
     for i in range(len(seq) - 3):
         e2.add_kmer(seq[i:i + 4])
 
-    assert e1.get_mins() == e2.get_mins()
-    print(e1.get_mins())
-    assert 726311917625663847 in e1.get_mins()
-    assert 3697418565283905118 in e1.get_mins()
+    assert e1.hashes.keys() == e2.hashes.keys()
+    print(e1.hashes.keys())
+    assert 726311917625663847 in e1.hashes.keys()
+    assert 3697418565283905118 in e1.hashes.keys()
 
 
 def test_protein_mh(track_abundance):
@@ -95,8 +95,8 @@ def test_protein_mh(track_abundance):
         kmer = seq[i:i + 6]
         e2.add_kmer(kmer)
 
-    assert e1.get_mins() == e2.get_mins()
-    assert 901193879228338100 in e1.get_mins()
+    assert e1.hashes.keys() == e2.hashes.keys()
+    assert 901193879228338100 in e1.hashes.keys()
 
 
 def test_pickle(track_abundance):
@@ -116,8 +116,7 @@ def test_pickle(track_abundance):
     fp2 = BytesIO(fp.getvalue())
     e2 = pickle.load(fp2)
 
-    assert e1.get_mins(with_abundance=track_abundance) == \
-           e2.get_mins(with_abundance=track_abundance)
+    assert e1.hashes == e2.hashes
     assert e1.num == e2.num
     assert e1.ksize == e2.ksize
     assert e1.is_protein == e2.is_protein