diff --git a/.coveragerc b/.coveragerc
index d284e1014c..09b80d8275 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -6,6 +6,5 @@ omit =
     doc/conf.py
     setup.py
     tests/*
-    third-party/smhasher/MurmurHash3.cc
     .tox/*
     benchmarks/*
diff --git a/.gitignore b/.gitignore
index 0aa0e7a6f1..0b797c7bf5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,14 +13,9 @@ dist
 build
 sourmash.egg-info
 .ipynb_checkpoints
-_minhash.so
 .cache
 *.so
 .coverage
-sourmash_lib/_minhash.cpp
-sourmash/_minhash.cpp
-.asv/
-.eggs/
 .pytest_cache
 .python-version
 sourmash/version.py
@@ -30,6 +25,9 @@ sourmash/_lowlevel*.py
 .env
 Pipfile
 Pipfile.lock
-ocf/target/
 target/
 Cargo.lock
+.eggs
+.asv
+pkg/
+wasm-pack.log
diff --git a/.travis.yml b/.travis.yml
index 937b4b6c0b..7483db938e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,16 +7,18 @@ cache:
     - "$HOME/.cache/pip"
     - "$HOME/.cargo"
     - "target"
-    - ".tox"
 
 branches:
   only:
   - master
   - "/^v.*$/"
 
-script: tox
+script: tox -vv
 
-install: pip install tox-travis
+install:
+  - source .travis/install_cargo.sh
+
+before_script: pip install tox-travis
 
 jobs:
   allow_failures:
@@ -28,12 +30,14 @@ jobs:
 
     - &test
       stage: test
-      python: 2.7
+      python: 3.6
     - <<: *test
       os: osx
+      osx_image: xcode10.1
+      python: 3.7
       language: generic
       env:
-        - TOXENV=py36
+        - TOXENV=py37
     - <<: *test
       python: 3.7
       name: integration (ipfs/redis)
@@ -45,7 +49,7 @@ jobs:
         - redis-server
         - docker
     - <<: *test
-      python: 3.6
+      python: 2.7
     - <<: *test
       python: 3.5
 
@@ -55,12 +59,16 @@ jobs:
       services:
         - docker
       env:
-        - PIP=pip
+        - CIBW_BUILD='cp37-*'
         - CIBW_SKIP='*-manylinux_i686'
-      install: skip
+        - CIBW_BEFORE_BUILD='source .travis/install_cargo.sh'
+        - CIBW_ENVIRONMENT='PATH="$HOME/.cargo/bin:$PATH"'
+        - CIBW_ENVIRONMENT_MACOS='MACOSX_DEPLOYMENT_TARGET=10.11'
+      before_script: skip
       script:
-        - sudo $PIP install cibuildwheel==1.0.0
-        - cibuildwheel --output-dir wheelhouse
+        - python -m pip install -U pip setuptools
+        - python -m pip install cibuildwheel==1.1.0
+        - python -m cibuildwheel --output-dir wheelhouse
       deploy:
         provider: releases
         api_key:
@@ -73,12 +81,7 @@ jobs:
     - <<: *wheel
       os: osx
       osx_image: xcode10.1
-      language: generic
-      before_script:
-        - sudo $PIP install -U pip setuptools
-      env:
-        - PIP=pip2
-        - CIBW_ENVIRONMENT_MACOS='MACOSX_DEPLOYMENT_TARGET=10.11'
+      language: shell
 
 stages:
   - check
diff --git a/MANIFEST.in b/MANIFEST.in
index 69df9b2280..1d206dc896 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,10 +1,16 @@
 include LICENSE Makefile Dockerfile LICENSE Makefile README.md requirements.txt
 include index.ipynb
+include sourmash VERSION
 recursive-include sourmash_lib *
 recursive-include sourmash *
-recursive-include third-party *.cc *.h
-exclude tests/*
+recursive-include src *.rs
+recursive-include benches *.rs
+include Cargo.toml
+include include/sourmash.h
+prune .eggs
+global-exclude *.rlib
 global-exclude *.orig
 global-exclude *.pyc
 global-exclude *.so
 prune tests/test-data/
+global-exclude *.git/
diff --git a/Makefile b/Makefile
index 8bb87d29dd..559ad81cbc 100644
--- a/Makefile
+++ b/Makefile
@@ -1,10 +1,13 @@
 PYTHON ?= python
 
-all:
-	$(PYTHON) setup.py build_ext -i
+all: build
 
 .PHONY:
 
+build:
+	$(PYTHON) setup.py build_ext -i
+	cargo build
+
 clean:
 	$(PYTHON) setup.py clean --all
 	rm -f sourmash/*.so
@@ -19,6 +22,7 @@ dist: FORCE
 test: all
 	pip install -e '.[test]'
 	$(PYTHON) -m pytest
+	cargo test
 
 doc: .PHONY
 	cd doc && make html
@@ -29,12 +33,12 @@ include/sourmash.h: src/lib.rs src/ffi/minhash.rs src/ffi/signature.rs src/ffi/n
 	rustup override set stable
 
 coverage: all
-	$(PYTHON) setup.py clean --all
-	SOURMASH_COVERAGE=1 $(PYTHON) setup.py build_ext -i
+	$(PYTHON) setup.py build_ext -i
 	$(PYTHON) -m pytest --cov=. --cov-report term-missing
 
 benchmark:
 	asv continuous master `git rev-parse HEAD`
+	cargo bench
 
 check:
 	cargo build
diff --git a/README.md b/README.md
index e2d6619cb3..b1297dade1 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,19 @@
+<meta charset="utf-8"/>
+
 # sourmash
 
 [![Documentation](https://readthedocs.org/projects/sourmash/badge/?version=latest)](http://sourmash.readthedocs.io/en/latest/)
 [![Build Status](https://travis-ci.com/dib-lab/sourmash.svg?branch=master)](https://travis-ci.com/dib-lab/sourmash)
+<a href="https://pypi.org/project/sourmash/"><img alt="PyPI" src="https://badge.fury.io/py/sourmash.svg"></a>
 [![codecov](https://codecov.io/gh/dib-lab/sourmash/branch/master/graph/badge.svg)](https://codecov.io/gh/dib-lab/sourmash)
 [![DOI](http://joss.theoj.org/papers/10.21105/joss.00027/status.svg)](http://joss.theoj.org/papers/10.21105/joss.00027)
+<a href="https://github.com/dib-lab/sourmash/blob/master/LICENSE"><img alt="License: 3-Clause BSD" src="https://img.shields.io/badge/License-BSD%203--Clause-blue.svg"></a>
+
+🦀
+[![](http://meritbadge.herokuapp.com/sourmash)](https://crates.io/crates/sourmash)
+[![Rust API Documentation on docs.rs](https://docs.rs/sourmash/badge.svg)](https://docs.rs/sourmash)
+
+---
 
 Compute MinHash signatures for nucleotide (DNA/RNA) and protein sequences.
 
@@ -13,7 +23,7 @@ Usage:
     sourmash compare *.sig -o distances
     sourmash plot distances
 
-Sourmash 1.0 is [published on JOSS](https://doi.org/10.21105/joss.00027); please cite that paper if you use sourmash (`doi: 10.21105/joss.00027`):.
+sourmash 1.0 is [published on JOSS](https://doi.org/10.21105/joss.00027); please cite that paper if you use sourmash (`doi: 10.21105/joss.00027`):.
 
 ----
 
@@ -48,9 +58,10 @@ A quickstart tutorial [is available](https://sourmash.readthedocs.io/en/latest/t
 ### Requirements
 
 sourmash runs under both Python 2.7.x and Python 3.5+.  The base
-requirements are screed and ijson, together with a C++ development
-environment and the CPython development headers and libraries (for the
-C++ extension).
+requirements are screed and ijson, together with a Rust environment (for the
+extension code). We suggest using `rustup` to install the Rust environment:
+
+    curl https://sh.rustup.rs -sSf | sh
 
 The comparison code (`sourmash compare`) uses numpy, and the plotting
 code uses matplotlib and scipy, but most of the code is usable without
diff --git a/doc/developer.md b/doc/developer.md
index cd4ab13847..79f2411e9f 100644
--- a/doc/developer.md
+++ b/doc/developer.md
@@ -7,7 +7,13 @@ You can get the latest development master branch with:
 ```
 git clone https://github.com/dib-lab/sourmash.git
 ```
-To install all of the necessary dependencies, do:
+sourmash runs under both Python 2.7.x and Python 3.5+.  The base
+requirements are screed and ijson, together with a Rust environment (for the
+extension code). We suggest using `rustup` to install the Rust environment:
+
+    curl https://sh.rustup.rs -sSf | sh
+
+To install all of the necessary Python dependencies, do:
 ```
 pip install -r requirements.txt
 ```
@@ -25,13 +31,6 @@ pip install -e .
 
 We use [Travis][0] for continuous integration.
 
-Code coverage calculation is enabled (on Linux only) by running
-`make coverage`.  This recompiles the C++ extension without
-optimization and with coverage configured.  See `setup.py` for
-more information on this; the environment variable
-`SOURMASH_COVERAGE` controls whether the C++ extension is
-compiled with code coverage analysis enabled.
-
 Code coverage can be viewed interactively at [codecov.io][1].
 
 [0]:https://travis-ci.org/dib-lab/sourmash
diff --git a/netlify.toml b/netlify.toml
new file mode 100644
index 0000000000..994c94d665
--- /dev/null
+++ b/netlify.toml
@@ -0,0 +1,12 @@
+# Configuration for pull request documentation previews via Netlify
+
+[build]
+publish = "_build/html"
+base = "doc"
+command = '''
+    cd .. && \
+    curl https://sh.rustup.rs -sSf | sh -s -- -y && \
+    source $HOME/.cargo/env && \
+    pip install -e .[doc] && \
+    make doc
+'''
diff --git a/setup.py b/setup.py
index 1f1720821d..2850339eda 100644
--- a/setup.py
+++ b/setup.py
@@ -1,11 +1,32 @@
 from __future__ import print_function
-import sys
-from setuptools import setup, find_packages
-from setuptools import Extension
 import os
+from setuptools import setup, find_packages
+import sys
+
+
+DEBUG_BUILD = os.environ.get("SOURMASH_DEBUG") == "1"
+
+
+def build_native(spec):
+    cmd = ["cargo", "build", "--lib"]
+
+    target = "debug"
+    if not DEBUG_BUILD:
+        cmd.append("--release")
+        target = "release"
+
+    build = spec.add_external_build(cmd=cmd, path=".")
+
+    rtld_flags = ["NOW"]
+    if sys.platform == "darwin":
+        rtld_flags.append("NODELETE")
+    spec.add_cffi_module(
+        module_path="sourmash._lowlevel",
+        dylib=lambda: build.find_dylib("sourmash", in_path="target/%s" % target),
+        header_filename=lambda: build.find_header("sourmash.h", in_path="include"),
+        rtld_flags=rtld_flags,
+    )
 
-EXTRA_COMPILE_ARGS = ['-std=c++11', '-pedantic']
-EXTRA_LINK_ARGS=[]
 
 CLASSIFIERS = [
     "Environment :: Console",
@@ -15,7 +36,7 @@
     "Natural Language :: English",
     "Operating System :: POSIX :: Linux",
     "Operating System :: MacOS :: MacOS X",
-    "Programming Language :: C++",
+    "Programming Language :: Rust",
     "Programming Language :: Python :: 2.7",
     "Programming Language :: Python :: 3.5",
     "Programming Language :: Python :: 3.6",
@@ -24,24 +45,10 @@
 
 CLASSIFIERS.append("Development Status :: 5 - Production/Stable")
 
-if sys.platform == 'darwin':              # Mac OS X?
-    # force 64bit only builds
-    EXTRA_COMPILE_ARGS.extend(['-arch', 'x86_64', '-mmacosx-version-min=10.7',
-                               '-stdlib=libc++'])
-
-else:                                     # ...likely Linux
-   if os.environ.get('SOURMASH_COVERAGE'):
-      print('Turning on coverage analysis.')
-      EXTRA_COMPILE_ARGS.extend(['-g', '--coverage', '-lgcov'])
-      EXTRA_LINK_ARGS.extend(['--coverage', '-lgcov'])
-   else:
-      EXTRA_COMPILE_ARGS.append('-O3')
-
-with open('README.md', 'r') as readme:
+with open("README.md", "r") as readme:
     LONG_DESCRIPTION = readme.read()
 
-SETUP_METADATA = \
-               {
+SETUP_METADATA = {
     "name": "sourmash",
     "description": "tools for comparing DNA sequences with MinHash sketches",
     "long_description": LONG_DESCRIPTION,
@@ -55,20 +62,18 @@
         'sourmash = sourmash.__main__:main'
         ]
     },
-    "ext_modules": [Extension("sourmash._minhash",
-                               sources=["sourmash/_minhash.pyx",
-                                        "third-party/smhasher/MurmurHash3.cc"],
-                               depends=["sourmash/kmer_min_hash.hh"],
-                               include_dirs=["./sourmash",
-                                             "./third-party/smhasher/"],
-                               language="c++",
-                               extra_compile_args=EXTRA_COMPILE_ARGS,
-                               extra_link_args=EXTRA_LINK_ARGS)],
     "install_requires": ["screed>=0.9", "ijson>=2.5.1", "khmer>=2.1", 'numpy',
+                         "cffi",
                          'matplotlib', 'scipy', "deprecation>=2.0.6"],
-    "setup_requires": ['Cython>=0.25.2', "setuptools>=38.6.0",
-                       'setuptools_scm', 'setuptools_scm_git_archive'],
+    "setup_requires": [
+        "setuptools>=38.6.0",
+        "milksnake",
+        "setuptools_scm",
+        "setuptools_scm_git_archive",
+    ],
     "use_scm_version": {"write_to": "sourmash/version.py"},
+    "zip_safe": False,
+    "platforms": "any",
     "extras_require": {
         'test' : ['pytest', 'pytest-cov'],
         'demo' : ['jupyter', 'jupyter_client', 'ipython'],
@@ -76,13 +81,10 @@
                  "sphinxcontrib-napoleon", "nbsphinx"],
         '10x': ['bam2fasta==1.0.1'],
         'storage': ["ipfshttpclient", "redis"]
-        },
-    "include_package_data": True,
-    "package_data": {
-        "sourmash": ['*.pxd']
     },
-    "classifiers": CLASSIFIERS
-    }
+    "include_package_data": True,
+    "classifiers": CLASSIFIERS,
+    "milksnake_tasks": [build_native],
+}
 
 setup(**SETUP_METADATA)
-
diff --git a/sourmash/__init__.py b/sourmash/__init__.py
index 2893023d2a..319ccd1e75 100644
--- a/sourmash/__init__.py
+++ b/sourmash/__init__.py
@@ -7,12 +7,21 @@
 import math
 import os
 
-from ._minhash import (MinHash, get_minhash_default_seed, get_minhash_max_hash)
+from ._lowlevel import ffi, lib
+
+ffi.init_once(lib.sourmash_init, "init")
+
+from ._minhash import MinHash, get_minhash_default_seed, get_minhash_max_hash
+
 DEFAULT_SEED = get_minhash_default_seed()
 MAX_HASH = get_minhash_max_hash()
 
-from .signature import (load_signatures, load_one_signature, SourmashSignature,
-                        save_signatures)
+from .signature import (
+    load_signatures,
+    load_one_signature,
+    SourmashSignature,
+    save_signatures,
+)
 from .sbtmh import load_sbt_index, search_sbt_index, create_sbt_index
 from . import lca
 from . import sbt
@@ -21,6 +30,7 @@
 from . import signature
 
 from pkg_resources import get_distribution, DistributionNotFound
+
 try:
     VERSION = get_distribution(__name__).version
 except DistributionNotFound:  # pragma: no cover
diff --git a/sourmash/_compat.py b/sourmash/_compat.py
new file mode 100644
index 0000000000..86b4e97f98
--- /dev/null
+++ b/sourmash/_compat.py
@@ -0,0 +1,24 @@
+import sys
+
+
+PY2 = sys.version_info[0] == 2
+
+if PY2:
+    text_type = unicode
+    int_types = (int, long)
+    string_types = (str, unicode)
+    range_type = xrange
+    itervalues = lambda x: x.itervalues()
+    NUL = '\x00'
+    def implements_to_string(cls):
+        cls.__unicode__ = cls.__str__
+        cls.__str__ = lambda x: x.__unicode__().encode('utf-8')
+        return cls
+else:
+    text_type = str
+    int_types = (int,)
+    string_types = (str,)
+    range_type = range
+    itervalues = lambda x: x.values()
+    NUL = 0
+    implements_to_string = lambda x: x
diff --git a/sourmash/_minhash.pxd b/sourmash/_minhash.pxd
deleted file mode 100644
index a5aa1f4e04..0000000000
--- a/sourmash/_minhash.pxd
+++ /dev/null
@@ -1,70 +0,0 @@
-# -*- coding: UTF-8 -*-
-# cython: language_level=3, c_string_type=str, c_string_encoding=ascii
-
-from __future__ import unicode_literals
-
-from libcpp cimport bool
-from libcpp.map cimport map
-from libcpp.memory cimport unique_ptr
-from libcpp.set cimport set as cppset
-from libcpp.string cimport string
-from libc.stdint cimport uint32_t, uint64_t
-from libcpp.vector cimport vector
-
-
-cdef extern from "kmer_min_hash.hh":
-    ctypedef uint64_t HashIntoType;
-    ctypedef vector[HashIntoType] CMinHashType;
-
-
-    cdef uint64_t _hash_murmur(const string, uint32_t seed)
-    cdef uint64_t _hash_murmur(const char *, unsigned int, uint32_t)
-
-    cdef cppclass KmerMinHash:
-        const uint32_t seed;
-        const unsigned int num;
-        const unsigned int ksize;
-        const bool is_protein;
-        const bool dayhoff;
-        const bool hp;
-        const HashIntoType max_hash;
-        CMinHashType mins;
-
-        KmerMinHash(unsigned int, unsigned int, bool, bool, bool, uint32_t, HashIntoType)
-        void add_hash(HashIntoType) except +ValueError
-        void remove_hash(HashIntoType) except +ValueError
-        void add_word(const string& word) except +ValueError
-        void add_word(const char * word) except +ValueError
-        void add_sequence(const string&, bool) except +ValueError
-        void merge(const KmerMinHash&) except +ValueError
-        string aa_to_dayhoff(string aa) except +ValueError
-        string aa_to_hp(string aa) except +ValueError
-        string translate_codon(string codon) except +ValueError
-        unsigned int count_common(const KmerMinHash&) except +ValueError
-        unsigned long size()
-
-
-    cdef cppclass KmerMinAbundance(KmerMinHash):
-        CMinHashType abunds;
-
-        KmerMinAbundance(unsigned int, unsigned int, bool, bool, bool, uint32_t, HashIntoType)
-        void add_hash(HashIntoType) except +ValueError
-        void remove_hash(HashIntoType) except +ValueError
-        void add_word(string word) except +ValueError
-        void add_word(const char * word) except +ValueError
-        void add_sequence(const string&, bool) except +ValueError
-        void merge(const KmerMinAbundance&) except +ValueError
-        void merge(const KmerMinHash&) except +ValueError
-        string aa_to_dayhoff(string aa) except +ValueError
-        string aa_to_hp(string aa) except +ValueError
-        string translate_codon(string codon) except +ValueError
-        unsigned int count_common(const KmerMinAbundance&) except +ValueError
-        unsigned long size()
-
-
-cdef class MinHash(object):
-    cdef unique_ptr[KmerMinHash] _this
-    cdef bool _track_abundance
-
-    cpdef get_mins(self, bool with_abundance=*)
-    cpdef set_abundances(self, dict)
diff --git a/sourmash/_minhash.py b/sourmash/_minhash.py
new file mode 100644
index 0000000000..2557fd218b
--- /dev/null
+++ b/sourmash/_minhash.py
@@ -0,0 +1,527 @@
+# -*- coding: UTF-8 -*-
+from __future__ import unicode_literals, division
+
+import math
+import copy
+
+from ._compat import string_types, range_type
+from ._lowlevel import ffi, lib
+from .utils import RustObject, rustcall, decode_str
+from .exceptions import SourmashError
+
+# default MurmurHash seed
+MINHASH_DEFAULT_SEED = 42
+
+
+def get_minhash_default_seed():
+    return MINHASH_DEFAULT_SEED
+
+
+# we use the 64-bit hash space of MurmurHash only
+# this is 2 ** 64 - 1 in hexadecimal
+MINHASH_MAX_HASH = 0xFFFFFFFFFFFFFFFF
+
+
+def get_minhash_max_hash():
+    return MINHASH_MAX_HASH
+
+
+def get_max_hash_for_scaled(scaled):
+    if scaled == 0:
+        return 0
+    elif scaled == 1:
+        return get_minhash_max_hash()
+
+    return int(round(get_minhash_max_hash() / scaled, 0))
+
+
+def get_scaled_for_max_hash(max_hash):
+    if max_hash == 0:
+        return 0
+    return int(round(get_minhash_max_hash() / max_hash, 0))
+
+
+def to_bytes(s):
+    # Allow for strings, bytes or int
+    # Single item of byte string = int
+
+    if isinstance(s, bytes):
+        return s
+
+    if not isinstance(s, string_types + (bytes, int)):
+        raise TypeError("Requires a string-like sequence")
+
+    if isinstance(s, string_types):
+        s = s.encode("utf-8")
+    elif isinstance(s, int):
+        s = bytes([s])
+
+    return s
+
+
+def hash_murmur(kmer, seed=MINHASH_DEFAULT_SEED):
+    "hash_murmur(string, [,seed])\n\n"
+    "Compute a hash for a string, optionally using a seed (an integer). "
+    "The current default seed is returned by hash_seed()."
+
+    return lib.hash_murmur(to_bytes(kmer), seed)
+
+
+def dotproduct(a, b, normalize=True):
+    """
+    Compute the dot product of two dictionaries {k: v} where v is
+    abundance.
+    """
+
+    if normalize:
+        norm_a = math.sqrt(sum([x * x for x in a.values()]))
+        norm_b = math.sqrt(sum([x * x for x in b.values()]))
+
+        if norm_a == 0.0 or norm_b == 0.0:
+            return 0.0
+    else:
+        norm_a = 1.0
+        norm_b = 1.0
+
+    prod = 0.0
+    for k, abundance in a.items():
+        prod += (float(abundance) / norm_a) * (b.get(k, 0) / norm_b)
+
+    return prod
+
+
+class MinHash(RustObject):
+    def __init__(
+        self,
+        n,
+        ksize,
+        is_protein=False,
+        dayhoff=False,
+        hp=False,
+        track_abundance=False,
+        seed=MINHASH_DEFAULT_SEED,
+        max_hash=0,
+        mins=None,
+        scaled=0,
+    ):
+        if max_hash and scaled:
+            raise ValueError("cannot set both max_hash and scaled")
+        elif scaled:
+            max_hash = get_max_hash_for_scaled(scaled)
+
+        if max_hash and n:
+            raise ValueError("cannot set both n and max_hash")
+
+        if not n and not (max_hash or scaled):
+            raise ValueError("cannot omit both n and scaled")
+
+        if dayhoff or hp:
+            is_protein = False
+
+        self._objptr = lib.kmerminhash_new(
+            n, ksize, is_protein, dayhoff, hp, seed, int(max_hash), track_abundance
+        )
+        self.__dealloc_func__ = lib.kmerminhash_free
+
+        if mins:
+            if track_abundance:
+                self.set_abundances(mins)
+            else:
+                self.add_many(mins)
+
+    def __copy__(self):
+        a = MinHash(
+            self.num,
+            self.ksize,
+            is_protein=self.is_protein,
+            dayhoff=self.dayhoff,
+            hp=self.hp,
+            track_abundance=self.track_abundance,
+            seed=self.seed,
+            max_hash=self.max_hash,
+        )
+        a.merge(self)
+        return a
+
+    def __getstate__(self):  # enable pickling
+        return (
+            self.num,
+            self.ksize,
+            self.is_protein,
+            self.dayhoff,
+            self.hp,
+            self.get_mins(with_abundance=self.track_abundance),
+            None,
+            self.track_abundance,
+            self.max_hash,
+            self.seed,
+        )
+
+    def __setstate__(self, tup):
+        (n, ksize, is_protein, dayhoff, hp, mins, _, track_abundance, max_hash, seed) = tup
+
+        self.__del__()
+        self._objptr = lib.kmerminhash_new(
+            n, ksize, is_protein, dayhoff, hp, seed, max_hash, track_abundance
+        )
+        if track_abundance:
+            self.set_abundances(mins)
+        else:
+            self.add_many(mins)
+
+    def __reduce__(self):
+        return (
+            MinHash,
+            (
+                self.num,
+                self.ksize,
+                self.is_protein,
+                self.dayhoff,
+                self.hp,
+                self.track_abundance,
+                self.seed,
+                self.max_hash,
+                self.get_mins(with_abundance=self.track_abundance),
+                0,
+            ),
+        )
+
+    def __eq__(self, other):
+        return self.__getstate__() == other.__getstate__()
+
+    def copy_and_clear(self):
+        a = MinHash(
+            self.num,
+            self.ksize,
+            self.is_protein,
+            self.dayhoff,
+            self.hp,
+            self.track_abundance,
+            self.seed,
+            self.max_hash,
+        )
+        return a
+
+    def add_sequence(self, sequence, force=False):
+        self._methodcall(lib.kmerminhash_add_sequence, to_bytes(sequence), force)
+
+    def add(self, kmer):
+        "Add kmer into sketch."
+        self.add_sequence(kmer)
+
+    def add_many(self, hashes):
+        "Add many hashes in at once."
+        if isinstance(hashes, MinHash):
+            self._methodcall(lib.kmerminhash_add_from, hashes._objptr)
+        else:
+            for hash in hashes:
+                self._methodcall(lib.kmerminhash_add_hash, hash)
+
+    def remove_many(self, hashes):
+        "Add many hashes in at once."
+        self._methodcall(lib.kmerminhash_remove_many, list(hashes), len(hashes))
+
+    def update(self, other):
+        "Update this estimator from all the hashes from the other."
+        self.add_many(other)
+
+    def __len__(self):
+        return self._methodcall(lib.kmerminhash_get_mins_size)
+
+    def get_mins(self, with_abundance=False):
+        size = self._methodcall(lib.kmerminhash_get_mins_size)
+        mins_ptr = self._methodcall(lib.kmerminhash_get_mins)
+
+        if with_abundance and self.track_abundance:
+            abunds_ptr = self._methodcall(lib.kmerminhash_get_abunds)
+            return dict(zip(ffi.unpack(mins_ptr, size), ffi.unpack(abunds_ptr, size)))
+        else:
+            return ffi.unpack(mins_ptr, size)
+
+    def get_hashes(self):
+        return self.get_mins()
+
+    def subtract_mins(self, other):
+        a = set(self.get_mins())
+        b = set(other.get_mins())
+        return a - b
+
+    @property
+    def seed(self):
+        return self._methodcall(lib.kmerminhash_seed)
+
+    @property
+    def num(self):
+        return self._methodcall(lib.kmerminhash_num)
+
+    @property
+    def scaled(self):
+        if self.max_hash:
+            return get_scaled_for_max_hash(self.max_hash)
+        return 0
+
+    @property
+    def is_dna(self):
+        return not (self.is_protein or self.dayhoff or self.hp)
+
+    @property
+    def is_protein(self):
+        return self._methodcall(lib.kmerminhash_is_protein)
+
+    @property
+    def dayhoff(self):
+        return self._methodcall(lib.kmerminhash_dayhoff)
+
+    @property
+    def hp(self):
+        return self._methodcall(lib.kmerminhash_hp)
+
+    @property
+    def ksize(self):
+        return self._methodcall(lib.kmerminhash_ksize)
+
+    @property
+    def max_hash(self):
+        return self._methodcall(lib.kmerminhash_max_hash)
+
+    @property
+    def track_abundance(self):
+        return self._methodcall(lib.kmerminhash_track_abundance)
+
+    @track_abundance.setter
+    def track_abundance(self, b):
+        if self.track_abundance == b:
+            return
+
+        if b is False:
+            self._methodcall(lib.kmerminhash_disable_abundance)
+        elif len(self) > 0:
+            raise RuntimeError("Can only set track_abundance=True if the MinHash is empty")
+        else:
+            self._methodcall(lib.kmerminhash_enable_abundance)
+
+    def add_hash(self, h):
+        return self._methodcall(lib.kmerminhash_add_hash, h)
+
+    def translate_codon(self, codon):
+        try:
+            return rustcall(lib.sourmash_translate_codon,
+                            to_bytes(codon)).decode('utf-8')
+        except SourmashError as e:
+            raise ValueError(e.message)
+
+    def count_common(self, other):
+        if not isinstance(other, MinHash):
+            raise TypeError("Must be a MinHash!")
+        return self._methodcall(lib.kmerminhash_count_common, other._get_objptr())
+
+    def downsample_n(self, new_num):
+        if self.num and self.num < new_num:
+            raise ValueError("new sample n is higher than current sample n")
+
+        a = MinHash(
+            new_num, self.ksize, self.is_protein, self.dayhoff, self.hp, self.track_abundance, self.seed, 0
+        )
+        if self.track_abundance:
+            a.set_abundances(self.get_mins(with_abundance=True))
+        else:
+            a.add_many(self)
+
+        return a
+
+    def downsample_max_hash(self, *others):
+        max_hashes = [x.max_hash for x in others]
+        new_max_hash = min(self.max_hash, *max_hashes)
+        new_scaled = get_scaled_for_max_hash(new_max_hash)
+
+        return self.downsample_scaled(new_scaled)
+
+    def downsample_scaled(self, new_num):
+        if self.num:
+            raise ValueError("num != 0 - cannot downsample a standard MinHash")
+
+        max_hash = self.max_hash
+        if max_hash is None:
+            raise ValueError("no max_hash available - cannot downsample")
+
+        old_scaled = get_scaled_for_max_hash(self.max_hash)
+        if old_scaled > new_num:
+            raise ValueError(
+                "new scaled {} is lower than current sample scaled {}".format(
+                    new_num, old_scaled
+                )
+            )
+
+        new_max_hash = get_max_hash_for_scaled(new_num)
+
+        a = MinHash(
+            0,
+            self.ksize,
+            self.is_protein,
+            self.dayhoff,
+            self.hp,
+            self.track_abundance,
+            self.seed,
+            new_max_hash,
+        )
+        if self.track_abundance:
+            a.set_abundances(self.get_mins(with_abundance=True))
+        else:
+            a.add_many(self)
+
+        return a
+
+    def intersection(self, other, in_common=False):
+        if not isinstance(other, MinHash):
+            raise TypeError("Must be a MinHash!")
+
+        if self.num != other.num:
+            err = "must have same num: {} != {}".format(self.num, other.num)
+            raise TypeError(err)
+
+        if in_common:
+            # TODO: copy from buffer to Python land instead,
+            # this way involves more moving data around.
+            combined_mh = self.copy_and_clear()
+            combined_mh.merge(self)
+            combined_mh.merge(other)
+
+            size = len(combined_mh)
+            common = set(self.get_mins())
+            common.intersection_update(other.get_mins())
+            common.intersection_update(combined_mh.get_mins())
+        else:
+            size = self._methodcall(lib.kmerminhash_intersection, other._get_objptr())
+            common = set()
+
+        return common, max(size, 1)
+
+    def compare(self, other):
+        if self.num != other.num:
+            err = "must have same num: {} != {}".format(self.num, other.num)
+            raise TypeError(err)
+        return self._methodcall(lib.kmerminhash_compare, other._get_objptr())
+
+    def jaccard(self, other):
+        return self.compare(other)
+
+    def similarity(self, other, ignore_abundance=False):
+        """Calculate similarity of two sketches.
+
+        If the sketches are not abundance weighted, or ignore_abundance=True,
+        compute Jaccard similarity.
+
+        If the sketches are abundance weighted, calculate a distance metric
+        based on the cosine similarity.
+
+        Note, because the term frequencies (tf-idf weights) cannot be negative,
+        the angle will never be < 0deg or > 90deg.
+
+        See https://en.wikipedia.org/wiki/Cosine_similarity
+        """
+
+        # if either signature is flat, calculate Jaccard only.
+        if not (self.track_abundance and other.track_abundance) or ignore_abundance:
+            return self.jaccard(other)
+        else:
+            # can we merge? if not, raise exception.
+            aa = copy.copy(self)
+            aa.merge(other)
+
+            a = self.get_mins(with_abundance=True)
+            b = other.get_mins(with_abundance=True)
+
+            prod = dotproduct(a, b)
+            prod = min(1.0, prod)
+
+            distance = 2 * math.acos(prod) / math.pi
+            return 1.0 - distance
+
+    def contained_by(self, other):
+        """\
+        Calculate how much of self is contained by other.
+        """
+        if not len(self):
+            return 0.0
+
+        return self.count_common(other) / len(self)
+
+    def containment_ignore_maxhash(self, other):
+        if len(self) == 0:
+            return 0.0
+
+        if not isinstance(other, MinHash):
+            raise TypeError("Must be a MinHash!")
+
+        return self._methodcall(lib.kmerminhash_containment_ignore_maxhash, other._get_objptr())
+
+    def __iadd__(self, other):
+        if not isinstance(other, MinHash):
+            raise TypeError("Must be a MinHash!")
+        self._methodcall(lib.kmerminhash_merge, other._get_objptr())
+        return self
+
+    merge = __iadd__
+
+    def set_abundances(self, values):
+        if self.track_abundance:
+            added = 0
+
+            for k, v in sorted(values.items()):
+                if not self.max_hash or k <= self.max_hash:
+                    self._methodcall(lib.kmerminhash_mins_push, k)
+                    self._methodcall(lib.kmerminhash_abunds_push, v)
+                    added += 1
+                    if self.num > 0 and added >= self.num:
+                        break
+        else:
+            raise RuntimeError(
+                "Use track_abundance=True when constructing "
+                "the MinHash to use set_abundances."
+            )
+
+    def add_protein(self, sequence):
+        ksize = self.ksize // 3
+        if len(sequence) < ksize:
+            return
+
+        aa_kmers = (sequence[i:i + ksize] for i in range(0, len(sequence) - ksize + 1))
+        if self.is_protein:
+            for aa_kmer in aa_kmers:
+                self._methodcall(
+                    lib.kmerminhash_add_word, to_bytes(aa_kmer)
+        )
+        elif self.dayhoff:
+            for aa_kmer in aa_kmers:
+                dayhoff_kmer = ''
+                for aa in aa_kmer:
+                    data = rustcall(lib.sourmash_aa_to_dayhoff, to_bytes(aa))
+                    dayhoff_letter = data.decode('utf-8')
+                    dayhoff_kmer += dayhoff_letter
+                self._methodcall(
+                    lib.kmerminhash_add_word, to_bytes(dayhoff_kmer)
+                )
+        elif self.hp:
+            for aa_kmer in aa_kmers:
+                hp_kmer = ''
+                for aa in aa_kmer:
+                    data = rustcall(lib.sourmash_aa_to_hp, to_bytes(aa))
+                    hp_letter = data.decode('utf-8')
+                    hp_kmer += hp_letter
+                self._methodcall(
+                    lib.kmerminhash_add_word, to_bytes(hp_kmer)
+                )
+        else:
+            raise ValueError("Invalid protein type")
+
+    def is_molecule_type(self, molecule):
+        if self.is_protein and molecule == 'protein':
+            return True
+        elif self.dayhoff and molecule == 'dayhoff':
+            return True
+        elif self.hp and molecule == 'hp':
+            return True
+        elif molecule.upper() == "DNA" and self.is_dna:
+            return True
+
+        return False
diff --git a/sourmash/_minhash.pyx b/sourmash/_minhash.pyx
deleted file mode 100644
index 66d6a357ee..0000000000
--- a/sourmash/_minhash.pyx
+++ /dev/null
@@ -1,512 +0,0 @@
-# -*- coding: UTF-8 -*-
-# cython: language_level=3, c_string_type=str, c_string_encoding=ascii
-
-from __future__ import unicode_literals
-
-from cython.operator cimport dereference as deref, address
-
-from libcpp cimport bool
-from libc.stdint cimport uint32_t
-
-from ._minhash cimport KmerMinHash, KmerMinAbundance, _hash_murmur
-import math
-import copy
-
-
-# default MurmurHash seed
-cdef uint32_t MINHASH_DEFAULT_SEED = 42
-
-
-def get_minhash_default_seed():
-    return MINHASH_DEFAULT_SEED
-
-
-# we use the 64-bit hash space of MurmurHash only
-cdef uint64_t MINHASH_MAX_HASH = 2**64 - 1
-
-
-def get_minhash_max_hash():
-    return MINHASH_MAX_HASH
-
-
-def get_max_hash_for_scaled(scaled):
-    if scaled == 0:
-        return 0
-    elif scaled == 1:
-        return get_minhash_max_hash()
-
-    return int(round(get_minhash_max_hash() / scaled, 0))
-
-
-def get_scaled_for_max_hash(max_hash):
-    if max_hash == 0:
-        return 0
-    return int(round(get_minhash_max_hash() / max_hash, 0))
-
-
-cdef bytes to_bytes(s):
-    # Allow for strings, bytes or int
-    # Single item of byte string = int
-    if not isinstance(s, (basestring, bytes, int)):
-        raise TypeError("Requires a string-like sequence")
-
-    if isinstance(s, unicode):
-        s = s.encode('utf-8')
-    if isinstance(s, int):
-        s = bytes([s])
-    return s
-
-
-def hash_murmur(kmer, uint32_t seed=MINHASH_DEFAULT_SEED):
-    "hash_murmur(string, [,seed])\n\n"
-    "Compute a hash for a string, optionally using a seed (an integer). "
-    "The current default seed is returned by hash_seed()."
-
-    return _hash_murmur(to_bytes(kmer), seed)
-
-
-def dotproduct(a, b, normalize=True):
-    """
-    Compute the dot product of two dictionaries {k: v} where v is
-    abundance.
-    """
-
-    if normalize:
-        norm_a = math.sqrt(sum([ x*x for x in a.values() ]))
-        norm_b = math.sqrt(sum([ x*x for x in b.values() ]))
-
-        if norm_a == 0.0 or norm_b == 0.0:
-            return 0.0
-    else:
-        norm_a = 1.0
-        norm_b = 1.0
-
-    prod = 0.
-    for k, abundance in a.items():
-        prod += (float(abundance) / norm_a) * (b.get(k, 0) / norm_b)
-
-    return prod
-
-
-cdef class MinHash(object):
-
-    def __init__(self, unsigned int n, unsigned int ksize,
-                       bool is_protein=False,
-                       bool dayhoff=False,
-                       bool hp=False,
-                       bool track_abundance=False,
-                       uint32_t seed=MINHASH_DEFAULT_SEED,
-                       HashIntoType max_hash=0,
-                       mins=None, HashIntoType scaled=0):
-        self._track_abundance = track_abundance
-
-        if max_hash and scaled:
-            raise ValueError('cannot set both max_hash and scaled')
-        elif scaled:
-            max_hash = get_max_hash_for_scaled(scaled)
-
-        if max_hash and n:
-            raise ValueError('cannot set both n and max_hash')
-
-        if not n and not (max_hash or scaled):
-            raise ValueError("cannot omit both n and scaled")
-
-        cdef KmerMinHash *mh = NULL
-        if track_abundance:
-            mh = new KmerMinAbundance(n, ksize, is_protein, dayhoff, hp, seed, max_hash)
-        else:
-            mh = new KmerMinHash(n, ksize, is_protein, dayhoff, hp, seed, max_hash)
-
-        self._this.reset(mh)
-
-        if mins:
-            if track_abundance:
-                self.set_abundances(mins)
-            else:
-                self.add_many(mins)
-
-
-    def __copy__(self):
-        a = MinHash(deref(self._this).num, deref(self._this).ksize,
-                    deref(self._this).is_protein, deref(self._this).dayhoff,
-                    deref(self._this).hp,
-                    self.track_abundance,
-                    deref(self._this).seed, deref(self._this).max_hash)
-        a.merge(self)
-        return a
-
-    def __getstate__(self):             # enable pickling
-        with_abundance = False
-        if self.track_abundance:
-            with_abundance = True
-
-        return (deref(self._this).num,
-                deref(self._this).ksize,
-                deref(self._this).is_protein,
-                deref(self._this).dayhoff,
-                deref(self._this).hp,
-                self.get_mins(with_abundance=with_abundance),
-                None, self.track_abundance, deref(self._this).max_hash,
-                deref(self._this).seed)
-
-    def __setstate__(self, tup):
-        (n, ksize, is_protein, dayhoff, hp, mins, _, track_abundance, max_hash, seed) =\
-          tup
-
-        self._track_abundance = track_abundance
-
-        cdef KmerMinHash *mh = NULL
-        if track_abundance:
-            mh = new KmerMinAbundance(n, ksize, is_protein, dayhoff, hp, seed, max_hash)
-            self._this.reset(mh)
-            self.set_abundances(mins)
-        else:
-            mh = new KmerMinHash(n, ksize, is_protein, dayhoff, hp, seed, max_hash)
-            self._this.reset(mh)
-            self.add_many(mins)
-
-    def __reduce__(self):
-        return (MinHash,
-               (deref(self._this).num,
-                deref(self._this).ksize,
-                deref(self._this).is_protein,
-                deref(self._this).dayhoff,
-                deref(self._this).hp,
-                self.track_abundance,
-                deref(self._this).seed,
-                deref(self._this).max_hash,
-                self.get_mins(with_abundance=self.track_abundance),
-                0))
-
-    def __richcmp__(self, other, op):
-        if op == 2:
-            return self.__getstate__() == other.__getstate__()
-        raise Exception("undefined comparison")
-
-    def copy_and_clear(self):
-        a = MinHash(deref(self._this).num, deref(self._this).ksize,
-                    deref(self._this).is_protein, deref(self._this).dayhoff,
-                    deref(self._this).hp, self.track_abundance,
-                    deref(self._this).seed, deref(self._this).max_hash)
-        return a
-
-    def add_sequence(self, sequence, bool force=False):
-        deref(self._this).add_sequence(to_bytes(sequence), force)
-
-    def add(self, kmer):
-        "Add kmer into sketch."
-        self.add_sequence(kmer)
-
-    def add_many(self, hashes):
-        "Add many hashes in at once."
-        for hash in hashes:
-            self.add_hash(hash)
-
-    def remove_many(self, hashes):
-        "Remove many hashes at once."
-        for hash in hashes:
-            deref(self._this).remove_hash(hash)
-
-    def update(self, other):
-        "Update this estimator from all the hashes from the other."
-        self.add_many(other.get_mins())
-
-    def __len__(self):
-        return deref(self._this).mins.size()
-
-    cpdef get_mins(self, bool with_abundance=False):
-        cdef KmerMinAbundance *mh = <KmerMinAbundance*>address(deref(self._this))
-        if with_abundance and self.track_abundance:
-            return dict(zip(mh.mins, mh.abunds))
-        else:
-            return deref(self._this).mins
-
-    def get_hashes(self):
-        return self.get_mins()
-
-    def subtract_mins(self, other):
-        a = set(self.get_mins())
-        b = set(other.get_mins())
-        return a - b
-
-    @property
-    def seed(self):
-        return deref(self._this).seed
-
-    @property
-    def num(self):
-        return deref(self._this).num
-
-    @property
-    def scaled(self):
-        if self.max_hash:
-            return get_scaled_for_max_hash(self.max_hash)
-        return 0
-
-    @property
-    def is_protein(self):
-        return deref(self._this).is_protein
-
-    @property
-    def dayhoff(self):
-        return deref(self._this).dayhoff
-
-    @property
-    def hp(self):
-        return deref(self._this).hp
-
-    @property
-    def ksize(self):
-        return deref(self._this).ksize
-
-    @property
-    def max_hash(self):
-        return deref(self._this).max_hash
-
-    @property
-    def track_abundance(self):
-        return self._track_abundance
-
-    @track_abundance.setter
-    def track_abundance(self, v):
-        cdef KmerMinHash *mh = NULL
-
-        if v == self._track_abundance:
-            return
-
-        if v is True and len(self) != 0:
-            raise RuntimeError("Can only set track_abundance=True if the MinHash is empty")
-
-        if v:
-            mh = new KmerMinAbundance(self.num, self.ksize, self.is_protein,
-                                      self.dayhoff, self.hp, self.seed, self.max_hash)
-            self._this.reset(mh)
-
-        # At this point, if we are changing from track_abundance=True to False,
-        # keep the underlying Abundance MH (to avoid copying data to a new one).
-
-        self._track_abundance = v
-
-    def add_hash(self, uint64_t h):
-        deref(self._this).add_hash(h)
-
-    def translate_codon(self, codon):
-        return deref(self._this).translate_codon(to_bytes(codon))
-
-    def count_common(self, MinHash other):
-        return deref(self._this).count_common(deref(other._this))
-
-    def downsample_n(self, new_num):
-        if self.num and self.num < new_num:
-            raise ValueError('new sample n is higher than current sample n')
-
-        a = MinHash(new_num, deref(self._this).ksize,
-                    deref(self._this).is_protein, deref(self._this).dayhoff,
-                    deref(self._this).hp,
-                    self.track_abundance,
-                    deref(self._this).seed, 0)
-        if self.track_abundance:
-            a.set_abundances(self.get_mins(with_abundance=True))
-        else:
-            a.add_many(self.get_mins())
-
-        return a
-
-    def downsample_max_hash(self, *others):
-        max_hashes = [ x.max_hash for x in others ]
-        new_max_hash = min(self.max_hash, *max_hashes)
-        new_scaled = get_scaled_for_max_hash(new_max_hash)
-
-        return self.downsample_scaled(new_scaled)
-
-    def downsample_scaled(self, new_num):
-        if self.num:
-            raise ValueError('num != 0 - cannot downsample a standard MinHash')
-
-        max_hash = self.max_hash
-        if max_hash is None:
-            raise ValueError('no max_hash available - cannot downsample')
-
-        old_scaled = get_scaled_for_max_hash(self.max_hash)
-        if old_scaled > new_num:
-            raise ValueError('new scaled {} is lower than current sample scaled {}'.format(new_num, old_scaled))
-
-        new_max_hash = get_max_hash_for_scaled(new_num)
-
-        a = MinHash(0, deref(self._this).ksize,
-                    deref(self._this).is_protein, deref(self._this).dayhoff,
-                    deref(self._this).hp,
-                    self.track_abundance,
-                    deref(self._this).seed, new_max_hash)
-        if self.track_abundance:
-            a.set_abundances(self.get_mins(with_abundance=True))
-        else:
-            a.add_many(self.get_mins())
-
-        return a
-
-    def intersection(self, MinHash other):
-        if self.num != other.num:
-            err = 'must have same num: {} != {}'.format(self.num,
-                                                            other.num)
-            raise TypeError(err)
-        else:
-            num = self.num
-
-        if self.track_abundance and other.track_abundance:
-            combined_mh = new KmerMinAbundance(num,
-                                          deref(self._this).ksize,
-                                          deref(self._this).is_protein,
-                                          deref(self._this).dayhoff,
-                                          deref(self._this).hp,
-                                          deref(self._this).seed,
-                                          deref(self._this).max_hash)
-
-        else:
-            combined_mh = new KmerMinHash(num,
-                                          deref(self._this).ksize,
-                                          deref(self._this).is_protein,
-                                          deref(self._this).dayhoff,
-                                          deref(self._this).hp,
-                                          deref(self._this).seed,
-                                          deref(self._this).max_hash)
-
-        combined_mh.merge(deref(self._this))
-        combined_mh.merge(deref(other._this))
-
-        common = set(self.get_mins())
-        common.intersection_update(other.get_mins())
-        common.intersection_update(combined_mh.mins)
-
-        size = max(combined_mh.size(), 1)
-        del combined_mh
-
-        return common, size
-
-    def compare(self, MinHash other):
-        common, size = self.intersection(other)
-        n = len(common)
-        return n / size
-
-    def jaccard(self, MinHash other):
-        return self.compare(other)
-
-    def similarity(self, other, ignore_abundance=False):
-        """\
-        Calculate similarity of two sketches.
-
-        If the sketches are not abundance weighted, or ignore_abundance=True,
-        compute Jaccard similarity.
-
-        If the sketches are abundance weighted, calculate a distance metric
-        based on the cosine similarity.
-
-        Note, because the term frequencies (tf-idf weights) cannot be negative,
-        the angle will never be < 0deg or > 90deg.
-
-        See https://en.wikipedia.org/wiki/Cosine_similarity
-        """
-
-        # if either signature is flat, calculate Jaccard only.
-        if not (self.track_abundance and other.track_abundance) or \
-          ignore_abundance:
-            return self.jaccard(other)
-        else:
-            # can we merge? if not, raise exception.
-            aa = copy.copy(self)
-            aa.merge(other)
-
-            a = self.get_mins(with_abundance=True)
-            b = other.get_mins(with_abundance=True)
-
-            prod = dotproduct(a, b)
-            prod = min(1.0, prod)
-
-            distance = 2*math.acos(prod) / math.pi
-            return 1.0 - distance
-
-    def contained_by(self, other):
-        """\
-        Calculate how much of self is contained by other.
-        """
-        if not len(self):
-            return 0.0
-        return self.count_common(other) / len(self.get_mins())
-
-    def containment_ignore_maxhash(self, MinHash other):
-        a = set(self.get_mins())
-        if not a:
-            return 0.0
-
-        b = set(other.get_mins())
-
-        overlap = a.intersection(b)
-        return float(len(overlap)) / float(len(a))
-
-    def __iadd__(self, MinHash other):
-        cdef KmerMinAbundance *mh = <KmerMinAbundance*>address(deref(self._this))
-        cdef KmerMinAbundance *other_mh = <KmerMinAbundance*>address(deref(other._this))
-
-        if self.track_abundance and other.track_abundance:
-            deref(mh).merge(deref(other_mh))
-        else:
-            deref(self._this).merge(deref(other._this))
-
-        return self
-    merge = __iadd__
-
-    cpdef set_abundances(self, dict values):
-        if self.track_abundance:
-            added = 0
-
-            for k, v in sorted(values.items()):
-                if not self.max_hash or k <= self.max_hash:
-                    deref(self._this).mins.push_back(k)
-                    (<KmerMinAbundance*>address(deref(self._this))).abunds.push_back(v)
-                    added += 1
-                    if self.num > 0 and added >= self.num:
-                        break
-        else:
-            raise RuntimeError("Use track_abundance=True when constructing "
-                               "the MinHash to use set_abundances.")
-
-    def add_protein(self, sequence):
-        cdef uint32_t ksize = deref(self._this).ksize // 3
-        if len(sequence) < ksize:
-            return
-
-        if not deref(self._this).is_protein:
-            raise ValueError("cannot add amino acid sequence to DNA MinHash!")
-
-        aa_kmers = (sequence[i:i + ksize] for i in range(0, len(sequence) - ksize + 1))
-        if not self.dayhoff and not self.hp:
-            for aa_kmer in aa_kmers:
-                deref(self._this).add_word(to_bytes(aa_kmer))
-        elif self.dayhoff:
-            for aa_kmer in aa_kmers:
-                dayhoff_kmer = ''
-                for aa in aa_kmer:
-                    dayhoff_letter = deref(self._this).aa_to_dayhoff(to_bytes(aa))
-                    dayhoff_kmer += dayhoff_letter
-                # dayhoff_kmer = ''.join( for aa in aa_kmer)
-                deref(self._this).add_word(to_bytes(dayhoff_kmer))
-        else:
-            for aa_kmer in aa_kmers:
-                hp_kmer = ''
-                for aa in aa_kmer:
-                    hp_letter = deref(self._this).aa_to_hp(to_bytes(aa))
-                    hp_kmer += hp_letter
-                # hp_kmer = ''.join( for aa in aa_kmer)
-                deref(self._this).add_word(to_bytes(hp_kmer))
-
-    def is_molecule_type(self, molecule):
-        if molecule.upper() == 'DNA' and not self.is_protein:
-            return True
-        elif self.is_protein and molecule == 'protein' and not any((self.dayhoff, self.hp)):
-            return True
-        elif self.dayhoff and molecule == 'dayhoff':
-            return True
-        elif self.hp and molecule == 'hp':
-            return True
-
-        return False
diff --git a/sourmash/exceptions.py b/sourmash/exceptions.py
new file mode 100644
index 0000000000..8254a21762
--- /dev/null
+++ b/sourmash/exceptions.py
@@ -0,0 +1,44 @@
+from ._compat import implements_to_string
+from ._lowlevel import lib
+
+
+__all__ = ['SourmashError']
+exceptions_by_code = {}
+
+
+@implements_to_string
+class SourmashError(Exception):
+    code = None
+
+    def __init__(self, msg):
+        Exception.__init__(self)
+        self.message = msg
+        self.rust_info = None
+
+    def __str__(self):
+        rv = self.message
+        if self.rust_info is not None:
+            return u'%s\n\n%s' % (rv, self.rust_info)
+        return rv
+
+
+def _make_exceptions():
+    for attr in dir(lib):
+        if not attr.startswith('SOURMASH_ERROR_CODE_'):
+            continue
+
+        class Exc(SourmashError):
+            pass
+
+        code = getattr(lib, attr)
+        if code < 100 or code > 10000:
+            Exc.__name__ = attr[20:].title().replace('_', '')
+            Exc.code = getattr(lib, attr)
+            globals()[Exc.__name__] = Exc
+            Exc.code = code
+            exceptions_by_code[code] = Exc
+            __all__.append(Exc.__name__)
+        else:
+            exceptions_by_code[code] = ValueError
+
+_make_exceptions()
diff --git a/sourmash/kmer_min_hash.hh b/sourmash/kmer_min_hash.hh
deleted file mode 100644
index 47decc8105..0000000000
--- a/sourmash/kmer_min_hash.hh
+++ /dev/null
@@ -1,648 +0,0 @@
-#ifndef KMER_MIN_HASH_HH
-#define KMER_MIN_HASH_HH
-
-#include <algorithm>
-#include <iostream>
-#include <set>
-#include <map>
-#include <queue>
-#include <exception>
-#include <string>
-
-#include "../third-party/smhasher/MurmurHash3.h"
-
-#define tbl \
-  "                                                                "\
-  /*ABCDEFGHIJKLMNOPQRSTUVWXYZ      abcdefghijklmnopqrstuvwxyz    */\
-  " TVGH FCD  M KN   YSAABW R       TVGH FCD  M KN   YSAABW R"
-
-inline uint64_t _hash_murmur(const std::string& kmer,
-                             const uint32_t seed) {
-    uint64_t out[2];
-    out[0] = 0; out[1] = 0;
-    MurmurHash3_x64_128((void *)kmer.c_str(), kmer.size(), seed, &out);
-    return out[0];
-}
-
-/**
- * @Synopsis  Unsafe hash overload. Takes a const char * 
- *            and assumes it has length ksize.
- *
- * @Param kmer The k-mer.
- * @Param ksize Length of the k-mer.
- * @Param seed Hashing seed.
- *
- * @Returns   The hash value.
- */
-inline uint64_t _hash_murmur(const char * kmer,
-                             unsigned int ksize,
-                             const uint32_t seed) {
-    uint64_t out[2];
-    out[0] = 0; out[1] = 0;
-    MurmurHash3_x64_128((void *)kmer, ksize, seed, &out);
-    return out[0];
-}
-
-typedef uint64_t HashIntoType;
-
-typedef std::vector<HashIntoType> CMinHashType;
-
-class minhash_exception : public std::exception
-{
-public:
-    explicit minhash_exception(const std::string& msg = "Generic minhash exception")
-        : _msg(msg) { }
-
-    virtual ~minhash_exception() throw() { }
-    virtual const char* what() const throw ()
-    {
-        return _msg.c_str();
-    }
-
-protected:
-    const std::string _msg;
-};
-
-// Looks like a iterator but all it does is counts push_backs
-struct Counter {
-  struct value_type {
-    template <typename T> value_type(const T &) {}
-  };
-  void push_back(const value_type &) { ++count; }
-  size_t count = 0;
-};
-
-
-class KmerMinHash
-{
-public:
-    const unsigned int num;
-    const unsigned int ksize;
-    const bool is_protein;
-    const bool dayhoff;
-    const bool hp;
-    const uint32_t seed;
-    const HashIntoType max_hash;
-    CMinHashType mins;
-
-    KmerMinHash(unsigned int n, unsigned int k, bool prot, bool dyhoff, bool hp, uint32_t s,
-                HashIntoType mx)
-        : num(n), ksize(k), is_protein(prot), dayhoff(dyhoff), hp(hp), seed(s), max_hash(mx) {
-      if (n > 0) {
-        mins.reserve(num + 1);
-      }
-      // only reserve a finite amount of space for unbounded MinHashes
-      else {
-        mins.reserve(1000);
-      }
-    };
-
-    void check_compatible(const KmerMinHash& other) {
-        if (ksize != other.ksize) {
-            throw minhash_exception("different ksizes cannot be compared");
-        }
-        if (is_protein != other.is_protein) {
-            throw minhash_exception("DNA/prot minhashes cannot be compared");
-        }
-        if (dayhoff != other.dayhoff) {
-            throw minhash_exception("DNA/prot minhashes cannot be compared");
-        }
-        if (hp != other.hp) {
-            throw minhash_exception("DNA/prot minhashes cannot be compared");
-        }
-        if (max_hash != other.max_hash) {
-            throw minhash_exception("mismatch in max_hash; comparison fail");
-        }
-        if (seed != other.seed) {
-            throw minhash_exception("mismatch in seed; comparison fail");
-        }
-    }
-
-    virtual void add_hash(const HashIntoType h) {
-      if ((max_hash and h <= max_hash) or not max_hash) {
-        if (mins.size() == 0) {
-          mins.push_back(h);
-          return;
-        }
-        else if (h <= max_hash or mins.back() > h or mins.size() < num) {
-          auto pos = std::lower_bound(std::begin(mins), std::end(mins), h);
-
-          // must still be growing, we know the list won't get too long
-          if (pos == mins.cend()) {
-            mins.push_back(h);
-          }
-          // inserting somewhere in the middle, if this value isn't already
-          // in mins store it and shrink list if needed
-          else if (*pos != h) {
-            mins.insert(pos, h);
-            if (num and mins.size() > num) {
-              mins.pop_back();
-            }
-          }
-        }
-      }
-    }
-
-    virtual void remove_hash(const HashIntoType h) {
-        auto pos = std::lower_bound(std::begin(mins), std::end(mins), h);
-        if (pos != mins.cend() and *pos == h) {
-          mins.erase(pos);
-        }
-    }
-
-    void add_word(const std::string& word) {
-        const HashIntoType hash = _hash_murmur(word, seed);
-        add_hash(hash);
-    }
-
-    /**
-     * @Synopsis  Unsafe overload: calls _hash_murmur assuming
-     *            word is length ksize.
-     *
-     * @Param word k-mer to add.
-     */
-    void add_word(const char * word, unsigned int size) {
-        const HashIntoType hash = _hash_murmur(word, size, seed);
-        add_hash(hash);
-    }
-
-    void _invalid_kmer(const std::string& kmer) {
-        std::string msg = "invalid DNA character in input k-mer: ";
-        msg += kmer;
-        throw minhash_exception(msg); 
-    }
-
-    void add_sequence(std::string& seq, bool force=false) {
-
-        if (seq.length() < ksize) {
-            return;
-        }
-
-        std::transform(seq.begin(), seq.end(), seq.begin(), ::toupper);
-
-        if (!is_protein) {
-            auto rc = _revcomp(seq);
-            for (unsigned int i = 0; i < seq.length() - ksize + 1; i++) {
-        auto fw_kmer = seq.c_str() + i;
-                auto rc_kmer = rc.c_str() + rc.length() - ksize - i;
-
-                if (! _checkdna(fw_kmer, ksize)) {
-                    if (force) {
-                        continue;
-                    } else {
-                        _invalid_kmer(fw_kmer);
-                    }
-                }
-
-                if (std::lexicographical_compare(fw_kmer,
-                                                 fw_kmer + ksize,
-                                                 rc_kmer,
-                                                 rc_kmer + ksize)) {
-                    add_word(fw_kmer, ksize);
-                } else {
-                    add_word(rc_kmer, ksize);
-                }
-            }
-        } else {                      // protein
-            std::string rc = _revcomp(seq);
-            for (unsigned int i = 0; i < 3; i++) {
-                std::string aa = _dna_to_aa(seq.substr(i, seq.length() - i));
-                unsigned int aa_ksize = int(ksize / 3);
-                std::string kmer;
-
-                for (unsigned int j = 0; j < aa.length() - aa_ksize + 1; j++) {
-                    kmer = aa.substr(j, aa_ksize);
-                    add_word(kmer);
-                }
-
-                aa = _dna_to_aa(rc.substr(i, rc.length() - i));
-                aa_ksize = int(ksize / 3);
-
-                for (unsigned int j = 0; j < aa.length() - aa_ksize + 1; j++) {
-                    kmer = aa.substr(j, aa_ksize);
-                    add_word(kmer);
-                }
-            }
-        }
-    }
-
-    std::string translate_codon(std::string& codon) {
-        std::string residue;
-
-        if (codon.length() >= 2 && codon.length() <= 3){
-            // If codon is length 2, pad with an N for ambiguous codon amino acids
-            if (codon.length() == 2) {
-                codon += "N";
-            }
-            auto translated = _codon_table.find(codon);
-
-            if (translated != _codon_table.end()) {
-                // "second" is the element mapped to by the codon
-                // Because .find returns an iterator
-                residue = translated -> second;
-            } else {
-                // Otherwise, assign the "X" or "unknown" amino acid
-                residue = "X";
-            }
-        } else if (codon.length() == 1){
-            // Then we only have one nucleotides and the amino acid is unknown
-            residue = "X";
-        } else {
-            std::string msg = "Codon is invalid length: ";
-            msg += codon;
-            throw minhash_exception(msg);
-        }
-        return residue;
-    }
-
-    std::string _dna_to_aa(const std::string& dna) {
-        std::string aa;
-        std::string codon;
-        std::string residue;
-        unsigned int dna_size = (dna.size() / 3) * 3; // floor it
-        for (unsigned int j = 0; j < dna_size; j += 3) {
-
-            codon = dna.substr(j, 3);
-
-            residue = translate_codon(codon);
-
-            // Use dayhoff encoding of amino acids
-            if (dayhoff) {
-                std::string new_letter = aa_to_dayhoff(residue);
-                aa += new_letter;
-            // Use hp encoding of amino acids
-            } else if (hp) {
-                std::string new_letter = aa_to_hp(residue);
-                aa += new_letter;
-            }
-            else {
-                aa += residue;
-            }
-
-        }
-        return aa;
-    }
-
-    /**
-     * @Synopsis  Check that a single char is a DNA base.
-     *
-     * @Param c The character.
-     *
-     * @Returns   True if valid, false otherwise.
-     */
-    bool _checkdna(const char c) const {
-        switch(c) {
-            case 'A':
-            case 'C':
-            case 'G':
-            case 'T':
-                break;
-            default:
-                return false;
-        }
-        return true;
-    }
-
-    /**
-     * @Synopsis  Safe DNA sanity check for a sequence of
-     *            arbitrary length.
-     *
-     * @Param seq The sequence.
-     *
-     * @Returns   True if valid, false otherwise.
-     */
-    bool _checkdna(const std::string& seq) const {
-
-        for (size_t i=0; i < seq.length(); ++i) {
-            if (!_checkdna(seq[i])) {
-                return false;
-            }
-        }
-        return true;
-    }
-
-    /**
-     * @Synopsis  Unsafe k-mer DNA sanity check: doesn't check length.
-     *
-     * @Param kmer k-mer to check.
-     *
-     * @Returns   true if sane; false otherwise. 
-     */
-    bool _checkdna(const char * kmer, unsigned int length) const {
-
-        for (size_t i=0; i < length; ++i) {
-            if (!_checkdna(*(kmer + i))) {
-                return false;
-            }
-        }
-        return true;
-    }
-
-    std::string _revcomp(const std::string& kmer) const {
-        std::string out = kmer;
-
-        auto from = out.begin();
-        auto to = out.end();
-
-        char c;
-        for (to--; from <= to; from++, to--) {
-            c = tbl[(int)*from];
-            *from = tbl[(int)*to];
-            *to = c;
-        }
-
-        return out;
-    }
-
-    std::string aa_to_dayhoff(const std::string& aa) const {
-        // Convert an amino acid letter to dayhoff encoding
-        std::string new_letter;
-
-        auto dayhoff_encoded = _dayhoff_table.find(aa);
-        if (dayhoff_encoded != _dayhoff_table.end()) {
-            // "second" is the element mapped to by the codon
-            // Because .find returns an iterator
-            new_letter = dayhoff_encoded -> second;
-        } else {
-            // Otherwise, assign the "X" or "unknown" amino acid
-            new_letter = "X";
-        }
-        return new_letter;
-    }
-
-    std::string aa_to_hp(const std::string& aa) const {
-        // Convert an amino acid letter to hp encoding
-        std::string new_letter;
-
-        auto hp_encoded = _hp_table.find(aa);
-        if (hp_encoded != _hp_table.end()) {
-            // "second" is the element mapped to by the codon
-            // Because .find returns an iterator
-            new_letter = hp_encoded -> second;
-        } else {
-            // Otherwise, assign the "X" or "unknown" amino acid
-            new_letter = "X";
-        }
-        return new_letter;
-    }
-
-    virtual void merge(const KmerMinHash& other) {
-        check_compatible(other);
-
-        CMinHashType merged;
-        merged.reserve(other.mins.size() + mins.size());
-        std::set_union(other.mins.begin(), other.mins.end(),
-                       mins.begin(), mins.end(),
-                       std::back_inserter(merged));
-        if (merged.size() < num or !num) {
-          mins = merged;
-        }
-        else {
-          mins = CMinHashType(std::begin(merged), std::begin(merged) + num);
-        }
-    }
-
-    virtual unsigned int count_common(const KmerMinHash& other) {
-        check_compatible(other);
-
-        Counter counter;
-        std::set_intersection(mins.begin(), mins.end(),
-                              other.mins.begin(), other.mins.end(),
-                              std::back_inserter(counter));
-        return counter.count;
-    }
-
-    virtual size_t size() {
-        return mins.size();
-    }
-
-    virtual ~KmerMinHash() throw() { }
-
-private:
-    std::map<std::string, std::string> _codon_table = {
-        {"TTT", "F"}, {"TTC", "F"},
-        {"TTA", "L"}, {"TTG", "L"},
-
-        {"TCT", "S"}, {"TCC", "S"}, {"TCA", "S"}, {"TCG", "S"}, {"TCN", "S"},
-
-        {"TAT", "Y"}, {"TAC", "Y"},
-        {"TAA", "*"}, {"TAG", "*"},
-
-        {"TGT", "C"}, {"TGC", "C"},
-        {"TGA", "*"},
-        {"TGG", "W"},
-
-        {"CTT", "L"}, {"CTC", "L"}, {"CTA", "L"}, {"CTG", "L"}, {"CTN", "L"},
-
-        {"CCT", "P"}, {"CCC", "P"}, {"CCA", "P"}, {"CCG", "P"}, {"CCN", "P"},
-
-        {"CAT", "H"}, {"CAC", "H"},
-        {"CAA", "Q"}, {"CAG", "Q"},
-
-        {"CGT", "R"}, {"CGC", "R"}, {"CGA", "R"}, {"CGG", "R"}, {"CGN", "R"},
-
-        {"ATT", "I"}, {"ATC", "I"}, {"ATA", "I"},
-        {"ATG", "M"},
-
-        {"ACT", "T"}, {"ACC", "T"}, {"ACA", "T"}, {"ACG", "T"}, {"ACN", "T"},
-
-        {"AAT", "N"}, {"AAC", "N"},
-        {"AAA", "K"}, {"AAG", "K"},
-
-        {"AGT", "S"}, {"AGC", "S"},
-        {"AGA", "R"}, {"AGG", "R"},
-
-        {"GTT", "V"}, {"GTC", "V"}, {"GTA", "V"}, {"GTG", "V"}, {"GTN", "V"},
-
-        {"GCT", "A"}, {"GCC", "A"}, {"GCA", "A"}, {"GCG", "A"}, {"GCN", "A"},
-
-        {"GAT", "D"}, {"GAC", "D"},
-        {"GAA", "E"}, {"GAG", "E"},
-
-        {"GGT", "G"}, {"GGC", "G"}, {"GGA", "G"}, {"GGG", "G"}, {"GGN", "G"}
-    };
-
-
-// Dayhoff table from
-// Peris, P., López, D., & Campos, M. (2008).
-// IgTM: An algorithm to predict transmembrane domains and topology in
-// proteins. BMC Bioinformatics, 9(1), 1029–11.
-// http://doi.org/10.1186/1471-2105-9-367
-//
-// Original source:
-// Dayhoff M. O., Schwartz R. M., Orcutt B. C. (1978).
-// A model of evolutionary change in proteins,
-// in Atlas of Protein Sequence and Structure,
-// ed Dayhoff M. O., editor.
-// (Washington, DC: National Biomedical Research Foundation; ), 345–352.
-//
-// | Amino acid    | Property              | Dayhoff |
-// |---------------|-----------------------|---------|
-// | C             | Sulfur polymerization | a       |
-// | A, G, P, S, T | Small                 | b       |
-// | D, E, N, Q    | Acid and amide        | c       |
-// | H, K, R       | Basic                 | d       |
-// | I, L, M, V    | Hydrophobic           | e       |
-// | F, W, Y       | Aromatic              | f       |
-    std::map<std::string, std::string> _dayhoff_table = {
-        {"C", "a"},
-
-        {"A", "b"}, {"G", "b"}, {"P", "b"}, {"S", "b"}, {"T", "b"},
-
-        {"D", "c"}, {"E", "c"}, {"N", "c"}, {"Q", "c"},
-
-        {"H", "d"}, {"K", "d"}, {"R", "d"},
-
-        {"I", "e"}, {"L", "e"}, {"M", "e"}, {"V", "e"},
-
-        {"F", "f"}, {"W", "f"}, {"Y", "f"}
-
-    };
-
-// HP Hydrophobic/hydrophilic mapping
-// From: Phillips, R., Kondev, J., Theriot, J. (2008). 
-// Physical Biology of the Cell. New York: Garland Science, Taylor & Francis Group. ISBN: 978-0815341635 
-
-//
-// | Amino acid                            | HP 
-// |---------------------------------------|---------|
-// | A, F, G, I, L, M, P, V, W, Y          | h       |
-// | N, C, S, T, D, E, R, H, K, Q          | p       |     
-    std::map<std::string, std::string> _hp_table = {
-      {"A", "h"}, {"F", "h"}, {"G", "h"}, {"I", "h"},
-      {"L", "h"}, {"M", "h"}, {"P", "h"}, {"V", "h"},
-      {"W", "h"}, {"Y", "h"}, {"N", "p"}, {"C", "p"},
-      {"S", "p"}, {"T", "p"}, {"D", "p"}, {"E", "p"},
-      {"R", "p"}, {"H", "p"}, {"K", "p"}, {"Q", "p"}
-    };
-
-};
-
-class KmerMinAbundance: public KmerMinHash {
- public:
-    CMinHashType abunds;
-
-    KmerMinAbundance(unsigned int n, unsigned int k, bool prot, bool dayhoff,
-                     bool hp, uint32_t seed, HashIntoType mx) :
-        KmerMinHash(n, k, prot, dayhoff, hp, seed, mx) { };
-
-    virtual void add_hash(HashIntoType h) {
-      if ((max_hash and h <= max_hash) or not max_hash) {
-        // empty? add it, if within range / no range specified.
-        if (mins.size() == 0) {
-          mins.push_back(h);
-          abunds.push_back(1);
-          return;
-        } else if (h <= max_hash or mins.back() > h or mins.size() < num) {
-          // "good" hash - within range, smaller than current entry, or
-          // still space.
-          auto pos = std::lower_bound(std::begin(mins), std::end(mins), h);
-
-          // at end -- must still be growing, we know the list won't get too
-          // long
-          if (pos == mins.cend()) {
-            mins.push_back(h);
-            abunds.push_back(1);
-          } else if (*pos != h) {
-          // didn't find hash already in mins, so
-          // inserting somewhere in the middle; shrink list if needed.
-
-            // calculate distance for use w/abunds *before* insert, as
-            // 'mins.insert' may invalidate 'pos'.
-            size_t dist = std::distance(begin(mins), pos);
-            mins.insert(pos, h);
-            abunds.insert(begin(abunds) + dist, 1);
-
-            // now too big? if so, continue.
-            if (mins.size() > num and not max_hash) {
-              mins.pop_back();
-              abunds.pop_back();
-            }
-          } else { // *pos == h - hash value already there, increment count.
-            auto p = std::distance(begin(mins), pos);
-            abunds[p] += 1;
-          }
-        }
-      }
-    }
-
-    virtual void remove_hash(const HashIntoType h) {
-        auto pos = std::lower_bound(std::begin(mins), std::end(mins), h);
-        if (pos != mins.cend() and *pos == h) {
-          mins.erase(pos);
-          size_t dist = std::distance(begin(mins), pos);
-          abunds.erase(begin(abunds) + dist);
-        }
-    }
-
-    virtual void merge(const KmerMinAbundance& other) {
-        check_compatible(other);
-
-        CMinHashType merged_mins;
-        CMinHashType merged_abunds;
-        size_t max_size = other.mins.size() + mins.size();
-
-        merged_mins.reserve(max_size);
-        merged_abunds.reserve(max_size);
-
-        auto it1_m = mins.begin();
-        auto it2_m = other.mins.begin();
-        auto out_m = std::back_inserter(merged_mins);
-
-        auto it1_a = abunds.begin();
-        auto it2_a = other.abunds.begin();
-        auto out_a = std::back_inserter(merged_abunds);
-
-        for (; it1_m != mins.end(); ++out_m, ++out_a) {
-            if (it2_m == other.mins.end()) {
-                /* we reached the end of other.mins,
-                   so just copy the remainder of mins to the output */
-                std::copy(it1_m, mins.end(), out_m);
-                std::copy(it1_a, abunds.end(), out_a);
-                break;
-            }
-            if (*it2_m < *it1_m) {
-                /* other.mins is smaller than mins,
-                   so copy it to output and advance other.mins iterators */
-                *out_m = *it2_m;
-                *out_a = *it2_a;
-                ++it2_m;
-                ++it2_a;
-            } else if (*it2_m == *it1_m) {
-                /* same value in both mins, so sums the abundances
-                   on the output and advances all iterators */
-                *out_m = *it1_m;
-                *out_a = *it1_a + *it2_a;
-                ++it1_m; ++it1_a;
-                ++it2_m; ++it2_a;
-            } else {
-                /* mins is smaller than other.mins,
-                   so copy it to output and advance the mins iterators */
-                *out_m = *it1_m;
-                *out_a = *it1_a;
-                ++it1_m;
-                ++it1_a;
-            }
-        }
-        /* we reached the end of mins/abunds,
-           so just copy the remainder of other to the output
-           (other might already be at the end, in this case nothing happens) */
-        std::copy(it2_m, other.mins.end(), out_m);
-        std::copy(it2_a, other.abunds.end(), out_a);
-
-        if (merged_mins.size() < num || !num) {
-          mins = merged_mins;
-          abunds = merged_abunds;
-        } else {
-          mins = CMinHashType(std::begin(merged_mins), std::begin(merged_mins) + num);
-          abunds = CMinHashType(std::begin(merged_abunds), std::begin(merged_abunds) + num);
-        }
-    }
-
-    virtual size_t size() {
-        return mins.size();
-    }
-
-};
-
-#endif // KMER_MIN_HASH_HH
diff --git a/sourmash/sbtmh.py b/sourmash/sbtmh.py
index 2289e0cb94..0f7b46ae9c 100644
--- a/sourmash/sbtmh.py
+++ b/sourmash/sbtmh.py
@@ -54,10 +54,11 @@ def save(self, path):
             return self.storage.save(path, buf.getvalue())
 
     def update(self, parent):
-        for v in self.data.minhash.get_mins():
+        mh = self.data.minhash
+        for v in mh.get_mins():
             parent.data.count(v)
         min_n_below = parent.metadata.get('min_n_below', sys.maxsize)
-        min_n_below = min(len(self.data.minhash), min_n_below)
+        min_n_below = min(len(mh), min_n_below)
 
         if min_n_below == 0:
             min_n_below = 1
diff --git a/sourmash/sig/__main__.py b/sourmash/sig/__main__.py
index b0d29a8efe..10c9765107 100644
--- a/sourmash/sig/__main__.py
+++ b/sourmash/sig/__main__.py
@@ -268,10 +268,13 @@ def merge(args):
                     mh.track_abundance = False
 
             try:
+                sigobj_mh = sigobj.minhash
                 if not args.flatten:
                     _check_abundance_compatibility(first_sig, sigobj)
+                else:
+                    sigobj_mh.track_abundance = False
 
-                mh.merge(sigobj.minhash)
+                mh.merge(sigobj_mh)
             except:
                 error("ERROR when merging signature '{}' ({}) from file {}",
                       sigobj.name(), sigobj.md5sum()[:8], sigfile)
diff --git a/sourmash/sourmash_args.py b/sourmash/sourmash_args.py
index 542559a9a5..e496173737 100644
--- a/sourmash/sourmash_args.py
+++ b/sourmash/sourmash_args.py
@@ -215,11 +215,12 @@ def __iter__(self):
                 self.ksizes.add(query_ksize)
                 self.moltypes.add(query_moltype)
 
-                yield filename, query, query_moltype, query_ksize
-
             if len(self.ksizes) > 1 or len(self.moltypes) > 1:
                 raise ValueError('multiple k-mer sizes/molecule types present')
 
+            for query in sl:
+                yield filename, query, query_moltype, query_ksize
+
 
 def traverse_find_sigs(dirnames, yield_all_files=False):
     for dirname in dirnames:
diff --git a/sourmash/utils.py b/sourmash/utils.py
new file mode 100644
index 0000000000..4fb2835fd8
--- /dev/null
+++ b/sourmash/utils.py
@@ -0,0 +1,78 @@
+import weakref
+
+from ._lowlevel import ffi, lib
+from .exceptions import exceptions_by_code, SourmashError
+
+attached_refs = weakref.WeakKeyDictionary()
+
+
+class RustObject(object):
+    __dealloc_func__ = None
+    _objptr = None
+    _shared = False
+
+    def __init__(self):
+        raise TypeError("Cannot instanciate %r objects" % self.__class__.__name__)
+
+    @classmethod
+    def _from_objptr(cls, ptr, shared=False):
+        rv = object.__new__(cls)
+        rv._objptr = ptr
+        rv._shared = shared
+        return rv
+
+    def _methodcall(self, func, *args):
+        return rustcall(func, self._get_objptr(), *args)
+
+    def _get_objptr(self):
+        if not self._objptr:
+            raise RuntimeError("Object is closed")
+        return self._objptr
+
+    def __del__(self):
+        if self._objptr is None or self._shared:
+            return
+        f = self.__class__.__dealloc_func__
+        if f is not None:
+            rustcall(f, self._objptr)
+            self._objptr = None
+
+
+def decode_str(s, free=False):
+    """Decodes a SourmashStr"""
+    try:
+        if s.len == 0:
+            return u""
+        return ffi.unpack(s.data, s.len).decode("utf-8", "replace")
+    finally:
+        if free:
+            lib.sourmash_str_free(ffi.addressof(s))
+
+
+def encode_str(s):
+    """Encodes a SourmashStr"""
+    rv = ffi.new("SourmashStr *")
+    if isinstance(s, text_type):
+        s = s.encode("utf-8")
+    rv.data = ffi.from_buffer(s)
+    rv.len = len(s)
+    # we have to hold a weak reference here to ensure our string does not
+    # get collected before the string is used.
+    attached_refs[rv] = s
+    return rv
+
+
+def rustcall(func, *args):
+    """Calls rust method and does some error handling."""
+    lib.sourmash_err_clear()
+    rv = func(*args)
+    err = lib.sourmash_err_get_last_code()
+    if not err:
+        return rv
+    msg = lib.sourmash_err_get_last_message()
+    cls = exceptions_by_code.get(err, SourmashError)
+    exc = cls(decode_str(msg))
+    backtrace = decode_str(lib.sourmash_err_get_backtrace())
+    if backtrace:
+        exc.rust_info = backtrace
+    raise exc
diff --git a/src/bin/smrs.rs b/src/bin/smrs.rs
index b58f2c44ee..b3fee5264d 100644
--- a/src/bin/smrs.rs
+++ b/src/bin/smrs.rs
@@ -1,3 +1,4 @@
+use std::convert::TryInto;
 use std::fs::File;
 use std::io;
 use std::path::Path;
@@ -24,6 +25,7 @@ use sourmash::index::search::{
 use sourmash::index::storage::{FSStorage, Storage};
 use sourmash::index::{Comparable, Index, MHBT};
 use sourmash::signature::{Signature, SigsTrait};
+use sourmash::sketch::minhash::HashFunctions;
 use sourmash::sketch::Sketch;
 
 pub fn index(
@@ -126,6 +128,12 @@ fn load_query_signature(
     moltype: Option<&str>,
     scaled: Option<u64>,
 ) -> Result<Query<Signature>, Error> {
+    let moltype: Option<HashFunctions> = if let Some(mol) = moltype {
+        Some(mol.try_into()?)
+    } else {
+        None
+    };
+
     let mut reader = io::BufReader::new(File::open(query)?);
     let sigs = Signature::load_signatures(&mut reader, ksize, moltype, scaled)?;
 
diff --git a/src/ffi/minhash.rs b/src/ffi/minhash.rs
index 2f84fab499..d9ec05397b 100644
--- a/src/ffi/minhash.rs
+++ b/src/ffi/minhash.rs
@@ -297,7 +297,7 @@ unsafe fn kmerminhash_enable_abundance(ptr: *mut KmerMinHash) -> Result<()> {
         &mut *ptr
     };
 
-    if mh.mins.is_empty() {
+    if !mh.mins.is_empty() {
       return Err(SourmashError::NonEmptyMinHash { message: "track_abundance=True".into()}.into());
     }
 
diff --git a/src/ffi/signature.rs b/src/ffi/signature.rs
index 56e147ebe3..2d6984347b 100644
--- a/src/ffi/signature.rs
+++ b/src/ffi/signature.rs
@@ -1,3 +1,4 @@
+use std::convert::TryInto;
 use std::ffi::CStr;
 use std::io;
 use std::os::raw::c_char;
@@ -8,7 +9,7 @@ use serde_json;
 
 use crate::ffi::utils::SourmashStr;
 use crate::signature::Signature;
-use crate::sketch::minhash::KmerMinHash;
+use crate::sketch::minhash::{HashFunctions, KmerMinHash};
 use crate::sketch::Sketch;
 
 // Signature methods
@@ -233,12 +234,11 @@ unsafe fn signatures_load_path(ptr: *const c_char,
         CStr::from_ptr(ptr)
     };
 
-    let moltype = {
-        if select_moltype.is_null() {
+    let moltype: Option<HashFunctions> = if select_moltype.is_null() {
           None
         } else {
-          Some(CStr::from_ptr(select_moltype).to_str()?)
-        }
+          let mol = CStr::from_ptr(select_moltype).to_str()?;
+          Some(mol.try_into()?)
     };
 
     // TODO: implement ignore_md5sum
@@ -274,12 +274,11 @@ unsafe fn signatures_load_buffer(ptr: *const c_char,
         slice::from_raw_parts(ptr as *mut u8, insize)
     };
 
-    let moltype = {
-        if select_moltype.is_null() {
+    let moltype: Option<HashFunctions> = if select_moltype.is_null() {
           None
         } else {
-          Some(CStr::from_ptr(select_moltype).to_str()?)
-        }
+          let mol = CStr::from_ptr(select_moltype).to_str()?;
+          Some(mol.try_into()?)
     };
 
     let k = match ksize {
diff --git a/src/index/bigsi.rs b/src/index/bigsi.rs
index 6fac529a05..141211f584 100644
--- a/src/index/bigsi.rs
+++ b/src/index/bigsi.rs
@@ -155,6 +155,7 @@ impl<'a> Index<'a> for BIGSI<Signature> {
 
 #[cfg(test)]
 mod test {
+    use std::convert::TryInto;
     use std::fs::File;
     use std::io::BufReader;
     use std::path::PathBuf;
@@ -179,8 +180,13 @@ mod test {
         filename.push("tests/test-data/.sbt.v3/60f7e23c24a8d94791cc7a8680c493f9");
 
         let mut reader = BufReader::new(File::open(filename).unwrap());
-        let sigs =
-            Signature::load_signatures(&mut reader, Some(31), Some("DNA".into()), None).unwrap();
+        let sigs = Signature::load_signatures(
+            &mut reader,
+            Some(31),
+            Some("DNA".try_into().unwrap()),
+            None,
+        )
+        .unwrap();
         let sig_data = sigs[0].clone();
 
         let leaf: SigStore<_> = sig_data.into();
diff --git a/src/index/sbt/mhbt.rs b/src/index/sbt/mhbt.rs
index b26975f6de..565468a278 100644
--- a/src/index/sbt/mhbt.rs
+++ b/src/index/sbt/mhbt.rs
@@ -153,14 +153,15 @@ impl ReadData<Nodegraph> for Node<Nodegraph> {
 
 #[cfg(test)]
 mod test {
+    use std::convert::TryInto;
     use std::fs::File;
     use std::io::{BufReader, Seek, SeekFrom};
     use std::path::PathBuf;
     use std::rc::Rc;
-    use tempfile;
 
     use assert_matches::assert_matches;
     use lazy_init::Lazy;
+    use tempfile;
 
     use super::Factory;
 
@@ -207,8 +208,13 @@ mod test {
         filename.push("tests/test-data/.sbt.v3/60f7e23c24a8d94791cc7a8680c493f9");
 
         let mut reader = BufReader::new(File::open(filename).unwrap());
-        let sigs =
-            Signature::load_signatures(&mut reader, Some(31), Some("DNA".into()), None).unwrap();
+        let sigs = Signature::load_signatures(
+            &mut reader,
+            Some(31),
+            Some("DNA".try_into().unwrap()),
+            None,
+        )
+        .unwrap();
         let sig_data = sigs[0].clone();
 
         let data = Lazy::new();
@@ -290,7 +296,12 @@ mod test {
         filename.push("tests/test-data/.sbt.v3/60f7e23c24a8d94791cc7a8680c493f9");
 
         let mut reader = BufReader::new(File::open(filename)?);
-        let sigs = Signature::load_signatures(&mut reader, Some(31), Some("DNA".into()), None)?;
+        let sigs = Signature::load_signatures(
+            &mut reader,
+            Some(31),
+            Some("DNA".try_into().unwrap()),
+            None,
+        )?;
         let sig_data = sigs[0].clone();
 
         let leaf: SigStore<_> = sig_data.into();
diff --git a/src/signature.rs b/src/signature.rs
index 07823a43c3..914c962f3e 100644
--- a/src/signature.rs
+++ b/src/signature.rs
@@ -14,6 +14,7 @@ use typed_builder::TypedBuilder;
 
 use crate::errors::SourmashError;
 use crate::index::storage::ToWriter;
+use crate::sketch::minhash::HashFunctions;
 use crate::sketch::Sketch;
 
 pub trait SigsTrait {
@@ -156,7 +157,7 @@ impl Signature {
     pub fn load_signatures<R>(
         buf: &mut R,
         ksize: Option<usize>,
-        moltype: Option<&str>,
+        moltype: Option<HashFunctions>,
         _scaled: Option<u64>,
     ) -> Result<Vec<Signature>, Error>
     where
@@ -190,9 +191,7 @@ impl Signature {
 
                             match moltype {
                                 Some(x) => {
-                                    if (x.to_lowercase() == "dna" && !mh.is_protein())
-                                        || (x.to_lowercase() == "protein" && mh.is_protein())
-                                    {
+                                    if mh.hash_function() == x {
                                         return true;
                                     }
                                 }
@@ -208,7 +207,7 @@ impl Signature {
 
                             match moltype {
                                 Some(x) => {
-                                    if x.to_lowercase() == "dna" {
+                                    if x == HashFunctions::murmur64_DNA {
                                         return true;
                                     } else {
                                         // TODO: draff only supports dna for now
@@ -285,6 +284,7 @@ impl PartialEq for Signature {
 
 #[cfg(test)]
 mod test {
+    use std::convert::TryInto;
     use std::fs::File;
     use std::io::BufReader;
     use std::path::PathBuf;
@@ -297,8 +297,13 @@ mod test {
         filename.push("tests/test-data/.sbt.v3/60f7e23c24a8d94791cc7a8680c493f9");
 
         let mut reader = BufReader::new(File::open(filename).unwrap());
-        let sigs =
-            Signature::load_signatures(&mut reader, Some(31), Some("DNA".into()), None).unwrap();
+        let sigs = Signature::load_signatures(
+            &mut reader,
+            Some(31),
+            Some("DNA".try_into().unwrap()),
+            None,
+        )
+        .unwrap();
         let _sig_data = sigs[0].clone();
         // TODO: check sig_data
     }
diff --git a/src/sketch/minhash.rs b/src/sketch/minhash.rs
index 427ccb977d..088d2a4fd3 100644
--- a/src/sketch/minhash.rs
+++ b/src/sketch/minhash.rs
@@ -1,14 +1,14 @@
-use serde::de::{Deserialize, Deserializer};
-use serde::ser::{Serialize, SerializeStruct, Serializer};
-use serde_derive::Deserialize;
-
 use std::cmp::Ordering;
 use std::collections::HashMap;
+use std::convert::TryFrom;
 use std::iter::{Iterator, Peekable};
 use std::str;
 
 use failure::Error;
 use lazy_static::lazy_static;
+use serde::de::{Deserialize, Deserializer};
+use serde::ser::{Serialize, SerializeStruct, Serializer};
+use serde_derive::Deserialize;
 
 use crate::_hash_murmur;
 use crate::errors::SourmashError;
@@ -27,6 +27,20 @@ pub enum HashFunctions {
     murmur64_hp = 4,
 }
 
+impl TryFrom<&str> for HashFunctions {
+    type Error = Error;
+
+    fn try_from(moltype: &str) -> Result<Self, Self::Error> {
+        match moltype.to_lowercase().as_ref() {
+            "dna" => Ok(HashFunctions::murmur64_DNA),
+            "dayhoff" => Ok(HashFunctions::murmur64_dayhoff),
+            "hp" => Ok(HashFunctions::murmur64_hp),
+            "protein" => Ok(HashFunctions::murmur64_protein),
+            _ => unimplemented!(),
+        }
+    }
+}
+
 #[cfg_attr(all(target_arch = "wasm32", target_vendor = "unknown"), wasm_bindgen)]
 #[derive(Debug, Clone, PartialEq)]
 pub struct KmerMinHash {
@@ -177,6 +191,10 @@ impl KmerMinHash {
         self.hash_function == HashFunctions::murmur64_protein
     }
 
+    fn is_dna(&self) -> bool {
+        self.hash_function == HashFunctions::murmur64_DNA
+    }
+
     pub fn seed(&self) -> u64 {
         self.seed
     }
@@ -523,8 +541,7 @@ impl SigsTrait for KmerMinHash {
             .map(|&x| (x as char).to_ascii_uppercase() as u8)
             .collect();
         if sequence.len() >= (self.ksize as usize) {
-            if !self.is_protein() {
-                // dna
+            if self.is_dna() {
                 for kmer in sequence.windows(self.ksize as usize) {
                     if _checkdna(kmer) {
                         let rc = revcomp(kmer);
diff --git a/tests/minhash.rs b/tests/minhash.rs
index af0853e69f..8b38d5b8ac 100644
--- a/tests/minhash.rs
+++ b/tests/minhash.rs
@@ -73,6 +73,6 @@ fn dayhoff() {
     a.add_sequence(b"ACTGAC", false).unwrap();
     b.add_sequence(b"ACTGAC", false).unwrap();
 
-    assert_eq!(a.size(), 1);
+    assert_eq!(a.size(), 2);
     assert_eq!(b.size(), 2);
 }
diff --git a/tests/smrs_cmd.rs b/tests/smrs_cmd.rs
index c203099158..54ff329286 100644
--- a/tests/smrs_cmd.rs
+++ b/tests/smrs_cmd.rs
@@ -2,7 +2,6 @@ use std::fs;
 use std::process::Command;
 
 use assert_cmd::prelude::*;
-use predicates::prelude::*;
 use predicates::str::contains;
 use tempfile::TempDir;
 
diff --git a/tests/test__minhash.py b/tests/test__minhash.py
index e994fcfde2..1c18a57bed 100644
--- a/tests/test__minhash.py
+++ b/tests/test__minhash.py
@@ -42,12 +42,17 @@
 import pytest
 
 import sourmash
-from sourmash._minhash import (MinHash, hash_murmur, dotproduct,
-                                   get_scaled_for_max_hash,
-                                   get_max_hash_for_scaled)
-from . import sourmash_tst_utils as utils
+from sourmash._minhash import (
+    MinHash,
+    hash_murmur,
+    dotproduct,
+    get_scaled_for_max_hash,
+    get_max_hash_for_scaled,
+)
 from sourmash import signature
 
+from . import sourmash_tst_utils as utils
+
 # add:
 # * get default params from Python
 # * keyword args for minhash constructor
@@ -69,6 +74,7 @@ def test_basic_dna(track_abundance):
     print(a, b)
     assert a == b
     assert len(b) == 1
+    assert a[0] == b[0] == 12415348535738636339
 
 
 def test_div_zero(track_abundance):
@@ -95,12 +101,12 @@ def test_bytes_dna(track_abundance):
     mh = MinHash(1, 4, track_abundance=track_abundance)
     mh.add_sequence('ATGC')
     mh.add_sequence(b'ATGC')
-    mh.add_sequence(u'ATGC')
+    mh.add_sequence('ATGC')
     a = mh.get_mins()
 
     mh.add_sequence('GCAT')             # this will not get added; hash > ATGC
     mh.add_sequence(b'GCAT')             # this will not get added; hash > ATGC
-    mh.add_sequence(u'GCAT')             # this will not get added; hash > ATGC
+    mh.add_sequence('GCAT')             # this will not get added; hash > ATGC
     b = mh.get_mins()
 
     print(a, b)
@@ -112,7 +118,7 @@ def test_bytes_protein_dayhoff(track_abundance, dayhoff):
     # verify that we can hash protein/aa sequences
     mh = MinHash(10, 6, True, dayhoff=dayhoff, hp=False, track_abundance=track_abundance)
     mh.add_protein('AGYYG')
-    mh.add_protein(u'AGYYG')
+    mh.add_protein('AGYYG')
     mh.add_protein(b'AGYYG')
 
     assert len(mh.get_mins()) == 4
@@ -254,6 +260,13 @@ def test_max_hash_conversion():
     assert new_scaled == SCALED
 
 
+def test_max_hash_and_scaled_zero():
+    max_hash = get_max_hash_for_scaled(0)
+    new_scaled = get_scaled_for_max_hash(0)
+    assert max_hash == new_scaled
+    assert max_hash == 0
+
+
 def test_max_hash_and_scaled_error(track_abundance):
     # test behavior when supplying both max_hash and scaled
     with pytest.raises(ValueError):
@@ -369,6 +382,28 @@ def test_compare_1(track_abundance):
     assert b.compare(b) == 1.0
 
 
+def test_intersection_errors(track_abundance):
+    a = MinHash(20, 10, track_abundance=track_abundance)
+    b = MinHash(20, 10, track_abundance=track_abundance)
+    c = MinHash(30, 10, track_abundance=track_abundance)
+
+    a.add_sequence("TGCCGCCCAGCA")
+    b.add_sequence("TGCCGCCCAGCA")
+
+    common = set(a.get_mins())
+    combined_size = 3
+
+    intersection, size = a.intersection(b, in_common=False)
+    assert intersection == set()
+    assert combined_size == size
+
+    with pytest.raises(TypeError):
+        a.intersection(set())
+
+    with pytest.raises(TypeError):
+        a.intersection(c)
+
+
 def test_intersection_1(track_abundance):
     a = MinHash(20, 10, track_abundance=track_abundance)
     b = MinHash(20, 10, track_abundance=track_abundance)
@@ -379,38 +414,38 @@ def test_intersection_1(track_abundance):
     common = set(a.get_mins())
     combined_size = 3
 
-    intersection, size = a.intersection(b)
+    intersection, size = a.intersection(b, in_common=True)
     assert intersection == common
     assert combined_size == size
 
-    intersection, size = b.intersection(b)
+    intersection, size = b.intersection(b, in_common=True)
     assert intersection == common
     assert combined_size == size
 
-    intersection, size = b.intersection(a)
+    intersection, size = b.intersection(a, in_common=True)
     assert intersection == common
     assert combined_size == size
 
-    intersection, size = a.intersection(a)
+    intersection, size = a.intersection(a, in_common=True)
     assert intersection == common
     assert combined_size == size
 
     # add same sequence again
     b.add_sequence('TGCCGCCCAGCA')
 
-    intersection, size = a.intersection(b)
+    intersection, size = a.intersection(b, in_common=True)
     assert intersection == common
     assert combined_size == size
 
-    intersection, size = b.intersection(b)
+    intersection, size = b.intersection(b, in_common=True)
     assert intersection == common
     assert combined_size == size
 
-    intersection, size = b.intersection(a)
+    intersection, size = b.intersection(a, in_common=True)
     assert intersection == common
     assert combined_size == size
 
-    intersection, size = a.intersection(a)
+    intersection, size = a.intersection(a, in_common=True)
     assert intersection == common
     assert combined_size == size
 
@@ -420,18 +455,18 @@ def test_intersection_1(track_abundance):
     new_in_common = set(a.get_mins()).intersection(set(b.get_mins()))
     new_combined_size = 8
 
-    intersection, size = a.intersection(b)
+    intersection, size = a.intersection(b, in_common=True)
     assert intersection == new_in_common
     assert size == new_combined_size
 
-    intersection, size = b.intersection(a)
+    intersection, size = b.intersection(a, in_common=True)
     assert intersection == new_in_common
     assert size == new_combined_size
 
-    intersection, size = a.intersection(a)
+    intersection, size = a.intersection(a, in_common=True)
     assert intersection == set(a.get_mins())
 
-    intersection, size = b.intersection(b)
+    intersection, size = b.intersection(b, in_common=True)
     assert intersection == set(b.get_mins())
 
 
@@ -510,6 +545,20 @@ def test_mh_count_common_diff_ksize(track_abundance):
         a.count_common(b)
 
 
+def test_mh_count_common_notmh(track_abundance):
+    a = MinHash(20, 5, track_abundance=track_abundance)
+    b = set()
+
+    with pytest.raises(TypeError):
+        a.count_common(b)
+
+
+def test_mh_downsample_n_error(track_abundance):
+    a = MinHash(20, 10, track_abundance=track_abundance)
+    with pytest.raises(ValueError):
+        a.downsample_n(30)
+
+
 def test_mh_asymmetric(track_abundance):
     a = MinHash(20, 10, track_abundance=track_abundance)
     for i in range(0, 40, 2):
@@ -531,6 +580,12 @@ def test_mh_asymmetric(track_abundance):
     assert b.compare(a) == 0.5
 
 
+def test_mh_merge_typeerror(track_abundance):
+    a = MinHash(20, 10, track_abundance=track_abundance)
+    with pytest.raises(TypeError):
+        a.merge(set())
+
+
 def test_mh_merge(track_abundance):
     # test merging two identically configured minhashes
     a = MinHash(20, 10, track_abundance=track_abundance)
@@ -596,7 +651,7 @@ def test_mh_merge_check_length(track_abundance):
         b.add_hash(i)
 
     c = a.merge(b)
-    assert(len(c.get_mins()) == 20)
+    assert len(c.get_mins()) == 20
 
 
 def test_mh_merge_check_length2(track_abundance):
@@ -612,7 +667,7 @@ def test_mh_merge_check_length2(track_abundance):
     b.add_hash(4)
 
     c = a.merge(b)
-    assert(len(c.get_mins()) == 3)
+    assert len(c.get_mins()) == 3
 
 
 def test_mh_asymmetric_merge(track_abundance):
diff --git a/tests/test_rustobj.py b/tests/test_rustobj.py
new file mode 100644
index 0000000000..4be7a0e1ee
--- /dev/null
+++ b/tests/test_rustobj.py
@@ -0,0 +1,18 @@
+import pytest
+
+from sourmash.utils import RustObject
+from sourmash._minhash import to_bytes
+
+
+def test_rustobj_init():
+    with pytest.raises(TypeError):
+        RustObject()
+
+
+def test_to_bytes():
+    with pytest.raises(TypeError):
+        to_bytes([9882])
+
+    assert to_bytes(98) == bytes([98])
+    assert to_bytes("abc") == b"abc"
+    assert to_bytes(b"abc") == b"abc"
diff --git a/tests/test_sourmash.py b/tests/test_sourmash.py
index e9c4a5ecc5..c9073d4e4e 100644
--- a/tests/test_sourmash.py
+++ b/tests/test_sourmash.py
@@ -67,7 +67,7 @@ def test_do_serial_compare(c):
 
     cmp_outfile = c.output('cmp')
     assert os.path.exists(cmp_outfile)
-    cmp_out = numpy.load(cmp_outfile.encode('utf-8'))
+    cmp_out = numpy.load(cmp_outfile)
 
     sigs = []
     for fn in testsigs:
diff --git a/third-party/.gitignore b/third-party/.gitignore
deleted file mode 100644
index 2ced0cd228..0000000000
--- a/third-party/.gitignore
+++ /dev/null
@@ -1,6 +0,0 @@
-zlib/example
-zlib/example64
-zlib/examplesh
-zlib/minigzip
-zlib/minigzip64
-zlib/minigzipsh
diff --git a/third-party/smhasher/MurmurHash3.cc b/third-party/smhasher/MurmurHash3.cc
deleted file mode 100644
index 758a230d5d..0000000000
--- a/third-party/smhasher/MurmurHash3.cc
+++ /dev/null
@@ -1,340 +0,0 @@
-//-----------------------------------------------------------------------------
-// MurmurHash3 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-// Note - The x86 and x64 versions do _not_ produce the same results, as the
-// algorithms are optimized for their respective platforms. You can still
-// compile and run any of them on any platform, but your performance with the
-// non-native version will be less than optimal.
-
-#include "MurmurHash3.h"
-
-//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-#define FORCE_INLINE	__forceinline
-
-#include <stdlib.h>
-
-#define ROTL32(x,y)	_rotl(x,y)
-#define ROTL64(x,y)	_rotl64(x,y)
-
-#define BIG_CONSTANT(x) (x)
-
-// Other compilers
-
-#else	// defined(_MSC_VER)
-
-#if defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && GNUC_MINOR >= 4))
-/* gcc version >= 4.4 4.1 = RHEL 5, 4.4 = RHEL 6. Don't inline for RHEL 5 gcc which is 4.1*/
-#define FORCE_INLINE inline __attribute__((always_inline))
-#else
-#define FORCE_INLINE 
-#endif
-
-inline uint32_t rotl32 ( uint32_t x, int8_t r )
-{
-  return (x << r) | (x >> (32 - r));
-}
-
-inline uint64_t rotl64 ( uint64_t x, int8_t r )
-{
-  return (x << r) | (x >> (64 - r));
-}
-
-#define	ROTL32(x,y)	rotl32(x,y)
-#define ROTL64(x,y)	rotl64(x,y)
-
-#define BIG_CONSTANT(x) (x##LLU)
-
-#endif // !defined(_MSC_VER)
-
-//-----------------------------------------------------------------------------
-// Block read - if your platform needs to do endian-swapping or can only
-// handle aligned reads, do the conversion here
-
-FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
-{
-  return p[i];
-}
-
-FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
-{
-  return p[i];
-}
-
-//-----------------------------------------------------------------------------
-// Finalization mix - force all bits of a hash block to avalanche
-
-FORCE_INLINE uint32_t fmix ( uint32_t h )
-{
-  h ^= h >> 16;
-  h *= 0x85ebca6b;
-  h ^= h >> 13;
-  h *= 0xc2b2ae35;
-  h ^= h >> 16;
-
-  return h;
-}
-
-//----------
-
-FORCE_INLINE uint64_t fmix ( uint64_t k )
-{
-  k ^= k >> 33;
-  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
-  k ^= k >> 33;
-  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
-  k ^= k >> 33;
-
-  return k;
-}
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x86_32 ( const void * key, int len,
-                          uint32_t seed, void * out )
-{
-  const uint8_t * data = (const uint8_t*)key;
-  const int nblocks = len / 4;
-
-  uint32_t h1 = seed;
-
-  const uint32_t c1 = 0xcc9e2d51;
-  const uint32_t c2 = 0x1b873593;
-
-  //----------
-  // body
-
-  const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
-
-  for(int i = -nblocks; i; i++)
-  {
-    uint32_t k1 = getblock(blocks,i);
-
-    k1 *= c1;
-    k1 = ROTL32(k1,15);
-    k1 *= c2;
-    
-    h1 ^= k1;
-    h1 = ROTL32(h1,13); 
-    h1 = h1*5+0xe6546b64;
-  }
-
-  //----------
-  // tail
-
-  const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
-
-  uint32_t k1 = 0;
-
-  switch(len & 3)
-  {
-  case 3: k1 ^= tail[2] << 16;
-  case 2: k1 ^= tail[1] << 8;
-  case 1: k1 ^= tail[0];
-          k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
-  };
-
-  //----------
-  // finalization
-
-  h1 ^= len;
-
-  h1 = fmix(h1);
-
-  *(uint32_t*)out = h1;
-} 
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x86_128 ( const void * key, const int len,
-                           uint32_t seed, void * out )
-{
-  const uint8_t * data = (const uint8_t*)key;
-  const int nblocks = len / 16;
-
-  uint32_t h1 = seed;
-  uint32_t h2 = seed;
-  uint32_t h3 = seed;
-  uint32_t h4 = seed;
-
-  const uint32_t c1 = 0x239b961b; 
-  const uint32_t c2 = 0xab0e9789;
-  const uint32_t c3 = 0x38b34ae5; 
-  const uint32_t c4 = 0xa1e38b93;
-
-  //----------
-  // body
-
-  const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
-
-  for(int i = -nblocks; i; i++)
-  {
-    uint32_t k1 = getblock(blocks,i*4+0);
-    uint32_t k2 = getblock(blocks,i*4+1);
-    uint32_t k3 = getblock(blocks,i*4+2);
-    uint32_t k4 = getblock(blocks,i*4+3);
-
-    k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
-
-    h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
-
-    k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
-
-    h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
-
-    k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
-
-    h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
-
-    k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
-
-    h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
-  }
-
-  //----------
-  // tail
-
-  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
-
-  uint32_t k1 = 0;
-  uint32_t k2 = 0;
-  uint32_t k3 = 0;
-  uint32_t k4 = 0;
-
-  switch(len & 15)
-  {
-  case 15: k4 ^= tail[14] << 16;
-  case 14: k4 ^= tail[13] << 8;
-  case 13: k4 ^= tail[12] << 0;
-           k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
-
-  case 12: k3 ^= tail[11] << 24;
-  case 11: k3 ^= tail[10] << 16;
-  case 10: k3 ^= tail[ 9] << 8;
-  case  9: k3 ^= tail[ 8] << 0;
-           k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
-
-  case  8: k2 ^= tail[ 7] << 24;
-  case  7: k2 ^= tail[ 6] << 16;
-  case  6: k2 ^= tail[ 5] << 8;
-  case  5: k2 ^= tail[ 4] << 0;
-           k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
-
-  case  4: k1 ^= tail[ 3] << 24;
-  case  3: k1 ^= tail[ 2] << 16;
-  case  2: k1 ^= tail[ 1] << 8;
-  case  1: k1 ^= tail[ 0] << 0;
-           k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
-  };
-
-  //----------
-  // finalization
-
-  h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
-
-  h1 += h2; h1 += h3; h1 += h4;
-  h2 += h1; h3 += h1; h4 += h1;
-
-  h1 = fmix(h1);
-  h2 = fmix(h2);
-  h3 = fmix(h3);
-  h4 = fmix(h4);
-
-  h1 += h2; h1 += h3; h1 += h4;
-  h2 += h1; h3 += h1; h4 += h1;
-
-  ((uint32_t*)out)[0] = h1;
-  ((uint32_t*)out)[1] = h2;
-  ((uint32_t*)out)[2] = h3;
-  ((uint32_t*)out)[3] = h4;
-}
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x64_128 ( const void * key, const int len,
-                           const uint32_t seed, void * out )
-{
-  const uint8_t * data = (const uint8_t*)key;
-  const int nblocks = len / 16;
-
-  uint64_t h1 = seed;
-  uint64_t h2 = seed;
-
-  const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
-  const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
-
-  //----------
-  // body
-
-  const uint64_t * blocks = (const uint64_t *)(data);
-
-  for(int i = 0; i < nblocks; i++)
-  {
-    uint64_t k1 = getblock(blocks,i*2+0);
-    uint64_t k2 = getblock(blocks,i*2+1);
-
-    k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
-
-    h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
-
-    k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
-
-    h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
-  }
-
-  //----------
-  // tail
-
-  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
-
-  uint64_t k1 = 0;
-  uint64_t k2 = 0;
-
-  switch(len & 15)
-  {
-  case 15: k2 ^= uint64_t(tail[14]) << 48;
-  case 14: k2 ^= uint64_t(tail[13]) << 40;
-  case 13: k2 ^= uint64_t(tail[12]) << 32;
-  case 12: k2 ^= uint64_t(tail[11]) << 24;
-  case 11: k2 ^= uint64_t(tail[10]) << 16;
-  case 10: k2 ^= uint64_t(tail[ 9]) << 8;
-  case  9: k2 ^= uint64_t(tail[ 8]) << 0;
-           k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
-
-  case  8: k1 ^= uint64_t(tail[ 7]) << 56;
-  case  7: k1 ^= uint64_t(tail[ 6]) << 48;
-  case  6: k1 ^= uint64_t(tail[ 5]) << 40;
-  case  5: k1 ^= uint64_t(tail[ 4]) << 32;
-  case  4: k1 ^= uint64_t(tail[ 3]) << 24;
-  case  3: k1 ^= uint64_t(tail[ 2]) << 16;
-  case  2: k1 ^= uint64_t(tail[ 1]) << 8;
-  case  1: k1 ^= uint64_t(tail[ 0]) << 0;
-           k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
-  };
-
-  //----------
-  // finalization
-
-  h1 ^= len; h2 ^= len;
-
-  h1 += h2;
-  h2 += h1;
-
-  h1 = fmix(h1);
-  h2 = fmix(h2);
-
-  h1 += h2;
-  h2 += h1;
-
-  ((uint64_t*)out)[0] = h1;
-  ((uint64_t*)out)[1] = h2;
-}
-
-//-----------------------------------------------------------------------------
-
diff --git a/third-party/smhasher/MurmurHash3.h b/third-party/smhasher/MurmurHash3.h
deleted file mode 100644
index 54e9d3f9e3..0000000000
--- a/third-party/smhasher/MurmurHash3.h
+++ /dev/null
@@ -1,37 +0,0 @@
-//-----------------------------------------------------------------------------
-// MurmurHash3 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-#ifndef _MURMURHASH3_H_
-#define _MURMURHASH3_H_
-
-//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-typedef unsigned char uint8_t;
-typedef unsigned long uint32_t;
-typedef unsigned __int64 uint64_t;
-
-// Other compilers
-
-#else	// defined(_MSC_VER)
-
-#include <stdint.h>
-
-#endif // !defined(_MSC_VER)
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );
-
-void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
-
-void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
-
-//-----------------------------------------------------------------------------
-
-#endif // _MURMURHASH3_H_