From 30870c0a5cef4fbbbd7080645c0005029d4b32d6 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 4 Dec 2018 17:15:54 -0800 Subject: [PATCH 1/2] fix divide by zero issue in contained_by --- sourmash/_minhash.pyx | 2 ++ tests/test__minhash.py | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/sourmash/_minhash.pyx b/sourmash/_minhash.pyx index b4aeb5ba48..9a83ff8dac 100644 --- a/sourmash/_minhash.pyx +++ b/sourmash/_minhash.pyx @@ -364,6 +364,8 @@ cdef class MinHash(object): """\ Calculate how much of self is contained by other. """ + if not self.get_mins(): + return 0.0 return self.count_common(other) / len(self.get_mins()) def similarity_ignore_maxhash(self, MinHash other): diff --git a/tests/test__minhash.py b/tests/test__minhash.py index d8592eff8c..00ac5a6e91 100644 --- a/tests/test__minhash.py +++ b/tests/test__minhash.py @@ -70,6 +70,26 @@ def test_basic_dna(track_abundance): assert len(b) == 1 +def test_div_zero(track_abundance): + # verify that empty MHs do not yield divide by zero errors for similarity + mh = MinHash(1, 4, track_abundance=track_abundance) + mh2 = mh.copy_and_clear() + + mh.add_sequence('ATGC') + assert mh.similarity(mh2) == 0 + assert mh2.similarity(mh) == 0 + + +def test_div_zero_contained(track_abundance): + # verify that empty MHs do not yield divide by zero errors for contained_by + mh = MinHash(1, 4, track_abundance=track_abundance) + mh2 = mh.copy_and_clear() + + mh.add_sequence('ATGC') + assert mh.contained_by(mh2) == 0 + assert mh2.contained_by(mh) == 0 + + def test_bytes_dna(track_abundance): mh = MinHash(1, 4, track_abundance=track_abundance) mh.add_sequence('ATGC') From 93b83436206b026ad62fba6f4b8085f61b9d1e13 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 4 Dec 2018 17:25:26 -0800 Subject: [PATCH 2/2] fix per luiz --- sourmash/_minhash.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sourmash/_minhash.pyx b/sourmash/_minhash.pyx index 9a83ff8dac..5df822c0c9 100644 --- a/sourmash/_minhash.pyx +++ b/sourmash/_minhash.pyx @@ -364,7 +364,7 @@ cdef class MinHash(object): """\ Calculate how much of self is contained by other. """ - if not self.get_mins(): + if not len(self): return 0.0 return self.count_common(other) / len(self.get_mins())