diff --git a/.ci/install_cargo.sh b/.ci/install_cargo.sh
index d0696d7fdf..94ec2b9beb 100755
--- a/.ci/install_cargo.sh
+++ b/.ci/install_cargo.sh
@@ -2,3 +2,4 @@
 curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable
 export PATH="$HOME/.cargo/bin:$PATH"
 rustc -V
+rustup target add aarch64-apple-darwin
diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index 64b653f0c1..d15fb8a27e 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -20,6 +20,7 @@ jobs:
           linux-ppc64le,
           linux-s390x,
           macos-x86_64,
+          macos-arm64,
         ]
         include:
           - build: linux-x86_64
@@ -41,7 +42,11 @@ jobs:
           - build: macos-x86_64
             os: macos-latest
             arch: x86_64
-            macos_target: 'MACOSX_DEPLOYMENT_TARGET=10.11'
+            macos_target: 'MACOSX_DEPLOYMENT_TARGET=10.11 CARGO_BUILD_TARGET=x86_64-apple-darwin'
+          - build: macos-arm64
+            os: macos-latest
+            arch: arm64
+            macos_target: 'MACOSX_DEPLOYMENT_TARGET=11 CARGO_BUILD_TARGET=aarch64-apple-darwin'
       fail-fast: false
 
     steps:
@@ -63,12 +68,7 @@ jobs:
       - name: Build wheels
         uses: pypa/cibuildwheel@v2.2.2
         env:
-          CIBW_BUILD: "cp39-*"
-          CIBW_SKIP: "*-win32 *-manylinux_i686 *-musllinux_ppc64le *-musllinux_s390x"
-          CIBW_BEFORE_BUILD: 'source .ci/install_cargo.sh'
-          CIBW_ENVIRONMENT: 'PATH="$HOME/.cargo/bin:$PATH"'
           CIBW_ENVIRONMENT_MACOS: ${{ matrix.macos_target }}
-          CIBW_BUILD_VERBOSITY: 3
           CIBW_ARCHS_LINUX: ${{ matrix.arch }}
           CIBW_ARCHS_MACOS: ${{ matrix.arch }}
 
diff --git a/.github/workflows/khmer.yml b/.github/workflows/khmer.yml
deleted file mode 100644
index 9385647255..0000000000
--- a/.github/workflows/khmer.yml
+++ /dev/null
@@ -1,30 +0,0 @@
-name: khmer compatibility tests
-
-on:
-  push:
-    branches: [latest]
-  pull_request:
-  schedule:
-    - cron: "0 0 * * *" # daily
-
-jobs:
-  test:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v1
-
-      - name: Set up Python 3.10
-        uses: actions/setup-python@v1
-        with:
-          python-version: "3.10"
-
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install tox
-
-      - name: Run tests with latest released khmer
-        run: tox -e khmer,coverage
-
-      - name: Run tests with khmer master branch
-        run: tox -e khmer_master,coverage
diff --git a/Cargo.lock b/Cargo.lock
index ef043fcddb..d634a4a0ef 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2,12 +2,24 @@
 # It is not intended for manual editing.
 version = 3
 
+[[package]]
+name = "Inflector"
+version = "0.11.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3"
+
 [[package]]
 name = "adler"
 version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
 
+[[package]]
+name = "aliasable"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd"
+
 [[package]]
 name = "assert_matches"
 version = "1.5.0"
@@ -635,6 +647,30 @@ version = "11.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
 
+[[package]]
+name = "ouroboros"
+version = "0.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f31a3b678685b150cba82b702dcdc5e155893f63610cf388d30cd988d4ca2bf"
+dependencies = [
+ "aliasable",
+ "ouroboros_macro",
+ "stable_deref_trait",
+]
+
+[[package]]
+name = "ouroboros_macro"
+version = "0.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "084fd65d5dd8b3772edccb5ffd1e4b7eba43897ecd0f9401e330e8c542959408"
+dependencies = [
+ "Inflector",
+ "proc-macro-error",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "piz"
 version = "0.4.0"
@@ -998,6 +1034,7 @@ dependencies = [
  "nohash-hasher",
  "num-iter",
  "once_cell",
+ "ouroboros",
  "piz",
  "primal-check",
  "proptest",
@@ -1015,6 +1052,12 @@ dependencies = [
  "web-sys",
 ]
 
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
+
 [[package]]
 name = "static_assertions"
 version = "1.1.0"
diff --git a/Makefile b/Makefile
index 6e168e261d..10d8c1ec04 100644
--- a/Makefile
+++ b/Makefile
@@ -32,6 +32,7 @@ include/sourmash.h: src/core/src/lib.rs \
                     src/core/src/ffi/nodegraph.rs \
                     src/core/src/ffi/index/mod.rs \
                     src/core/src/ffi/index/revindex.rs \
+                    src/core/src/ffi/storage.rs \
                     src/core/src/errors.rs
 	cd src/core && \
 	RUSTC_BOOTSTRAP=1 cbindgen -c cbindgen.toml . -o ../../$@
diff --git a/asv.conf.json b/asv.conf.json
index c188ad0c04..a036fd2298 100644
--- a/asv.conf.json
+++ b/asv.conf.json
@@ -6,7 +6,6 @@
     "branches": ["latest"],
     "dvcs": "git",
     "environment_type": "virtualenv",
-    "pythons": ["3.10"],
     "env_dir": ".asv/env",
     "results_dir": ".asv/results",
     "html_dir": ".asv/html",
diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py
index 481cb76b43..375981299e 100644
--- a/benchmarks/benchmarks.py
+++ b/benchmarks/benchmarks.py
@@ -1,6 +1,10 @@
+import os
 import random
+from pathlib import Path
+from tempfile import NamedTemporaryFile
 
 
+from sourmash.sbt_storage import ZipStorage
 from sourmash.minhash import MinHash
 
 
@@ -139,3 +143,60 @@ class PeakmemMinAbundanceSuite(PeakmemMinHashSuite):
     def setup(self):
         PeakmemMinHashSuite.setup(self)
         self.mh = MinHash(500, 21, track_abundance=True)
+
+####################
+
+class TimeZipStorageSuite:
+
+    def setup(self):
+        import zipfile
+        self.zipfile = NamedTemporaryFile()
+
+        with zipfile.ZipFile(self.zipfile, mode='w',
+                          compression=zipfile.ZIP_STORED) as storage:
+            for i in range(100_000):
+                # just so we have lots of entries
+                storage.writestr(str(i), b"0")
+            # one big-ish entry
+            storage.writestr("sig1", b"9" * 1_000_000)
+
+    def time_load_from_zipstorage(self):
+        with ZipStorage(self.zipfile.name) as storage:
+            for i in range(20):
+                storage.load("sig1")
+
+    def time_load_small_from_zipstorage(self):
+        with ZipStorage(self.zipfile.name) as storage:
+            for i in range(20):
+                storage.load("99999")
+
+    def teardown(self):
+        self.zipfile.close()
+
+
+class PeakmemZipStorageSuite:
+    def setup(self):
+        import zipfile
+        self.zipfile = NamedTemporaryFile()
+
+        with zipfile.ZipFile(self.zipfile, mode='w',
+                          compression=zipfile.ZIP_STORED) as storage:
+            for i in range(100_000):
+                # just so we have lots of entries
+                storage.writestr(str(i), b"0")
+            # one big-ish entry
+            storage.writestr("sig1", b"9" * 1_000_000)
+
+
+    def peakmem_load_from_zipstorage(self):
+        with ZipStorage(self.zipfile.name) as storage:
+            for i in range(20):
+                storage.load("sig1")
+
+    def peakmem_load_small_from_zipstorage(self):
+        with ZipStorage(self.zipfile.name) as storage:
+            for i in range(20):
+                storage.load("99999")
+
+    def teardown(self):
+        self.zipfile.close()
diff --git a/doc/environment.yml b/doc/environment.yml
index 3864f514bd..f3840b2dce 100644
--- a/doc/environment.yml
+++ b/doc/environment.yml
@@ -3,4 +3,4 @@ channels:
   - defaults
 dependencies:
   - rust
-  - python =3.7
+  - python =3.8
diff --git a/flake.lock b/flake.lock
index f86273ccb8..8e41c96a5e 100644
--- a/flake.lock
+++ b/flake.lock
@@ -49,11 +49,11 @@
         ]
       },
       "locked": {
-        "lastModified": 1639947939,
-        "narHash": "sha256-pGsM8haJadVP80GFq4xhnSpNitYNQpaXk4cnA796Cso=",
+        "lastModified": 1648544490,
+        "narHash": "sha256-EoBDcccV70tfz2LAs5lK0BjC7en5mzUVlgLsd5E6DW4=",
         "owner": "nix-community",
         "repo": "naersk",
-        "rev": "2fc8ce9d3c025d59fee349c1f80be9785049d653",
+        "rev": "e30ef9a5ce9b3de8bb438f15829c50f9525ca730",
         "type": "github"
       },
       "original": {
@@ -64,11 +64,11 @@
     },
     "nixpkgs": {
       "locked": {
-        "lastModified": 1645937171,
-        "narHash": "sha256-n9f9GZBNMe8UMhcgmmaXNObkH01jjgp7INMrUgBgcy4=",
+        "lastModified": 1648219316,
+        "narHash": "sha256-Ctij+dOi0ZZIfX5eMhgwugfvB+WZSrvVNAyAuANOsnQ=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "22dc22f8cedc58fcb11afe1acb08e9999e78be9c",
+        "rev": "30d3d79b7d3607d56546dd2a6b49e156ba0ec634",
         "type": "github"
       },
       "original": {
@@ -150,11 +150,11 @@
         ]
       },
       "locked": {
-        "lastModified": 1645928338,
-        "narHash": "sha256-pNbkG19Nb4QTNRCIWwxv06JKKJNCUrDzgRrriEd7W1A=",
+        "lastModified": 1648866882,
+        "narHash": "sha256-yMs/RKA9pX47a03zupmQp8/RLRmKzdSDk+h5Yt7K9xU=",
         "owner": "oxalica",
         "repo": "rust-overlay",
-        "rev": "4f6e6588b07427cd8ddc99b664bf0fab02799804",
+        "rev": "7c90e17cd7c0b9e81d5b23f78b482088ac9961d1",
         "type": "github"
       },
       "original": {
@@ -165,11 +165,11 @@
     },
     "utils": {
       "locked": {
-        "lastModified": 1644229661,
-        "narHash": "sha256-1YdnJAsNy69bpcjuoKdOYQX0YxZBiCYZo4Twxerqv7k=",
+        "lastModified": 1648297722,
+        "narHash": "sha256-W+qlPsiZd8F3XkzXOzAoR+mpFqzm3ekQkJNa+PIh1BQ=",
         "owner": "numtide",
         "repo": "flake-utils",
-        "rev": "3cecb5b042f7f209c56ffd8371b2711a290ec797",
+        "rev": "0f8662f1319ad6abf89b3380dd2722369fc51ade",
         "type": "github"
       },
       "original": {
diff --git a/flake.nix b/flake.nix
index 9ec1cd8300..ba080af204 100644
--- a/flake.nix
+++ b/flake.nix
@@ -77,6 +77,20 @@
             DYLD_LIBRARY_PATH = "${self.packages.${system}.lib}/lib";
             NO_BUILD = "1";
           };
+          docker =
+            let
+              bin = self.defaultPackage.${system};
+            in
+            pkgs.dockerTools.buildLayeredImage {
+              name = bin.pname;
+              tag = bin.version;
+              contents = [ bin ];
+
+              config = {
+                Cmd = [ "/bin/sourmash" ];
+                WorkingDir = "/";
+              };
+            };
         };
 
         defaultPackage = self.packages.${system}.sourmash;
diff --git a/include/sourmash.h b/include/sourmash.h
index de422efcf5..1f116e681b 100644
--- a/include/sourmash.h
+++ b/include/sourmash.h
@@ -58,6 +58,8 @@ typedef struct SourmashSearchResult SourmashSearchResult;
 
 typedef struct SourmashSignature SourmashSignature;
 
+typedef struct SourmashZipStorage SourmashZipStorage;
+
 /**
  * Represents a string.
  */
@@ -456,4 +458,23 @@ SourmashStr sourmash_str_from_cstr(const char *s);
 
 char sourmash_translate_codon(const char *codon);
 
+SourmashStr **zipstorage_filenames(const SourmashZipStorage *ptr, uintptr_t *size);
+
+void zipstorage_free(SourmashZipStorage *ptr);
+
+SourmashStr **zipstorage_list_sbts(const SourmashZipStorage *ptr, uintptr_t *size);
+
+const uint8_t *zipstorage_load(const SourmashZipStorage *ptr,
+                               const char *path_ptr,
+                               uintptr_t insize,
+                               uintptr_t *size);
+
+SourmashZipStorage *zipstorage_new(const char *ptr, uintptr_t insize);
+
+SourmashStr zipstorage_path(const SourmashZipStorage *ptr);
+
+void zipstorage_set_subdir(SourmashZipStorage *ptr, const char *path_ptr, uintptr_t insize);
+
+SourmashStr zipstorage_subdir(const SourmashZipStorage *ptr);
+
 #endif /* SOURMASH_H_INCLUDED */
diff --git a/pyproject.toml b/pyproject.toml
index eafbab8578..abe31ec81e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,3 +19,10 @@ include_trailing_comma = true
 force_grid_wrap = 0
 line_length = 88
 known_first_party = ["sourmash"]
+
+[tool.cibuildwheel]
+build = "cp39-*"
+skip = "*-win32 *-manylinux_i686 *-musllinux_ppc64le *-musllinux_s390x"
+before-build = "source .ci/install_cargo.sh"
+environment = { PATH="$HOME/.cargo/bin:$PATH" }
+build-verbosity = 3
diff --git a/setup.cfg b/setup.cfg
index bd3aeea86d..c959dc4949 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -20,8 +20,9 @@ classifiers =
     Operating System :: POSIX :: Linux
     Operating System :: MacOS :: MacOS X
     Programming Language :: Rust
-    Programming Language :: Python :: 3.7
     Programming Language :: Python :: 3.8
+    Programming Language :: Python :: 3.9
+    Programming Language :: Python :: 3.10
     Topic :: Scientific/Engineering :: Bio-Informatics
 project_urls = 
     Documentation = https://sourmash.readthedocs.io
@@ -42,7 +43,7 @@ install_requires =
     scipy
     deprecation>=2.0.6
     cachetools>=4,<5
-python_requires = >=3.7
+python_requires = >=3.8
 
 [bdist_wheel]
 universal = 1
diff --git a/setup.py b/setup.py
index 9384aba4b4..e31e09fd1f 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@
 NO_BUILD = os.environ.get("NO_BUILD") == "1"
 
 
-def find_dylib(name, paths):
+def find_dylib_no_build(name, paths):
     to_find = None
     if sys.platform == 'darwin':
         to_find = f'lib{name}.dylib'
@@ -25,6 +25,15 @@ def find_dylib(name, paths):
         raise LookupError('dylib %r not found' % name)
 
 
+def find_dylib(build, target):
+    cargo_target = os.environ.get("CARGO_BUILD_TARGET")
+    if cargo_target:
+        in_path = "target/%s/%s" % (cargo_target, target)
+    else:
+        in_path = "target/%s" % target
+    return build.find_dylib("sourmash", in_path=in_path)
+
+
 def build_native(spec):
     cmd = ["cargo", "build",
            "--manifest-path", "src/core/Cargo.toml",
@@ -41,7 +50,7 @@ def build_native(spec):
         header_filename = lambda: "include/sourmash.h"
     else:
         build = spec.add_external_build(cmd=cmd, path=".")
-        dylib=lambda: build.find_dylib("sourmash", in_path="target/%s" % target)
+        dylib = lambda: find_dylib(build, target)
         header_filename=lambda: build.find_header("sourmash.h", in_path="include")
 
     rtld_flags = ["NOW"]
diff --git a/src/core/Cargo.toml b/src/core/Cargo.toml
index 736e506c20..43ce78c0c8 100644
--- a/src/core/Cargo.toml
+++ b/src/core/Cargo.toml
@@ -47,6 +47,7 @@ twox-hash = "1.6.0"
 vec-collections = "0.3.4"
 piz = "0.4.0"
 memmap2 = "0.5.0"
+ouroboros = "0.15.0"
 
 [dev-dependencies]
 assert_matches = "1.3.0"
diff --git a/src/core/benches/compute.rs b/src/core/benches/compute.rs
index 9083bd910c..3e1bee87d1 100644
--- a/src/core/benches/compute.rs
+++ b/src/core/benches/compute.rs
@@ -76,5 +76,75 @@ fn add_sequence(c: &mut Criterion) {
     });
 }
 
-criterion_group!(compute, add_sequence);
+fn add_sequence_protein(c: &mut Criterion) {
+    let mut cp = ComputeParameters::default();
+    cp.set_protein(true);
+    cp.set_dna(false);
+    cp.set_scaled(200);
+    cp.set_ksizes(vec![30]);
+    let template_sig = Signature::from_params(&cp);
+
+    let mut data: Vec<u8> = vec![];
+    let (mut f, _) = niffler::from_path("../../tests/test-data/genome-s10.fa.gz").unwrap();
+    let _ = f.read_to_end(&mut data);
+
+    let data = data.repeat(10);
+
+    let data_upper = data.to_ascii_uppercase();
+    let data_lower = data.to_ascii_lowercase();
+    let data_errors: Vec<u8> = data
+        .iter()
+        .enumerate()
+        .map(|(i, x)| if i % 89 == 1 { b'N' } else { *x })
+        .collect();
+
+    let mut group = c.benchmark_group("add_sequence_protein");
+    group.sample_size(10);
+
+    group.bench_function("valid", |b| {
+        b.iter(|| {
+            let fasta_data = Cursor::new(data_upper.clone());
+            let mut sig = template_sig.clone();
+            let mut parser = parse_fastx_reader(fasta_data).unwrap();
+            while let Some(rec) = parser.next() {
+                sig.add_protein(&rec.unwrap().seq()).unwrap();
+            }
+        });
+    });
+
+    group.bench_function("lowercase", |b| {
+        b.iter(|| {
+            let fasta_data = Cursor::new(data_lower.clone());
+            let mut sig = template_sig.clone();
+            let mut parser = parse_fastx_reader(fasta_data).unwrap();
+            while let Some(rec) = parser.next() {
+                sig.add_protein(&rec.unwrap().seq()).unwrap();
+            }
+        });
+    });
+
+    group.bench_function("invalid kmers", |b| {
+        b.iter(|| {
+            let fasta_data = Cursor::new(data_errors.clone());
+            let mut sig = template_sig.clone();
+            let mut parser = parse_fastx_reader(fasta_data).unwrap();
+            while let Some(rec) = parser.next() {
+                sig.add_protein(&rec.unwrap().seq()).unwrap();
+            }
+        });
+    });
+
+    group.bench_function("force with valid kmers", |b| {
+        b.iter(|| {
+            let fasta_data = Cursor::new(data_upper.clone());
+            let mut sig = template_sig.clone();
+            let mut parser = parse_fastx_reader(fasta_data).unwrap();
+            while let Some(rec) = parser.next() {
+                sig.add_protein(&rec.unwrap().seq()).unwrap();
+            }
+        });
+    });
+}
+
+criterion_group!(compute, add_sequence, add_sequence_protein);
 criterion_main!(compute);
diff --git a/src/core/src/ffi/mod.rs b/src/core/src/ffi/mod.rs
index e9f276d5e2..a67de37176 100644
--- a/src/core/src/ffi/mod.rs
+++ b/src/core/src/ffi/mod.rs
@@ -12,6 +12,7 @@ pub mod index;
 pub mod minhash;
 pub mod nodegraph;
 pub mod signature;
+pub mod storage;
 
 use std::ffi::CStr;
 use std::os::raw::c_char;
diff --git a/src/core/src/ffi/storage.rs b/src/core/src/ffi/storage.rs
new file mode 100644
index 0000000000..98eca095b2
--- /dev/null
+++ b/src/core/src/ffi/storage.rs
@@ -0,0 +1,140 @@
+use std::os::raw::c_char;
+use std::slice;
+
+use crate::ffi::utils::{ForeignObject, SourmashStr};
+use crate::prelude::*;
+use crate::storage::ZipStorage;
+
+pub struct SourmashZipStorage;
+
+impl ForeignObject for SourmashZipStorage {
+    type RustObject = ZipStorage;
+}
+
+ffi_fn! {
+unsafe fn zipstorage_new(ptr: *const c_char, insize: usize) -> Result<*mut SourmashZipStorage> {
+    let path = {
+        assert!(!ptr.is_null());
+        let path = slice::from_raw_parts(ptr as *mut u8, insize);
+        std::str::from_utf8(path)?
+    };
+    let zipstorage = ZipStorage::from_file(path)?;
+
+    Ok(SourmashZipStorage::from_rust(zipstorage))
+}
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn zipstorage_free(ptr: *mut SourmashZipStorage) {
+    SourmashZipStorage::drop(ptr);
+}
+
+ffi_fn! {
+unsafe fn zipstorage_load(ptr: *const SourmashZipStorage,
+    path_ptr: *const c_char,
+    insize: usize,
+    size: *mut usize) -> Result<*const u8> {
+
+    let storage = SourmashZipStorage::as_rust(ptr);
+
+    let path = {
+        assert!(!path_ptr.is_null());
+        let path = slice::from_raw_parts(path_ptr as *mut u8, insize);
+        std::str::from_utf8(path)?
+    };
+
+    let buffer = storage.load(path)?;
+
+    let b = buffer.into_boxed_slice();
+    *size = b.len();
+
+    Ok(Box::into_raw(b) as *const u8)
+}
+}
+
+ffi_fn! {
+unsafe fn zipstorage_list_sbts(
+    ptr: *const SourmashZipStorage,
+    size: *mut usize,
+) -> Result<*mut *mut SourmashStr> {
+    let storage = SourmashZipStorage::as_rust(ptr);
+
+    let sbts = storage.list_sbts()?;
+
+    // FIXME: use the ForeignObject trait, maybe define new method there...
+    let ptr_sigs: Vec<*mut SourmashStr> = sbts
+        .into_iter()
+        .map(|x| Box::into_raw(Box::new(SourmashStr::from_string(x))) as *mut SourmashStr)
+        .collect();
+
+    let b = ptr_sigs.into_boxed_slice();
+    *size = b.len();
+
+    Ok(Box::into_raw(b) as *mut *mut SourmashStr)
+}
+}
+
+ffi_fn! {
+unsafe fn zipstorage_filenames(
+    ptr: *const SourmashZipStorage,
+    size: *mut usize,
+) -> Result<*mut *mut SourmashStr> {
+    let storage = SourmashZipStorage::as_rust(ptr);
+
+    let files = storage.filenames()?;
+
+    // FIXME: use the ForeignObject trait, maybe define new method there...
+    let ptr_sigs: Vec<*mut SourmashStr> = files
+        .into_iter()
+        .map(|x| Box::into_raw(Box::new(SourmashStr::from_string(x))) as *mut SourmashStr)
+        .collect();
+
+    let b = ptr_sigs.into_boxed_slice();
+    *size = b.len();
+
+    Ok(Box::into_raw(b) as *mut *mut SourmashStr)
+}
+}
+
+ffi_fn! {
+unsafe fn zipstorage_set_subdir(
+    ptr: *mut SourmashZipStorage,
+    path_ptr: *const c_char,
+    insize: usize,
+) -> Result<()> {
+    let storage = SourmashZipStorage::as_rust_mut(ptr);
+
+    let path = {
+        assert!(!path_ptr.is_null());
+        let path = slice::from_raw_parts(path_ptr as *mut u8, insize);
+        std::str::from_utf8(path)?
+    };
+
+    storage.set_subdir(path.to_string());
+    Ok(())
+}
+}
+
+ffi_fn! {
+unsafe fn zipstorage_path(ptr: *const SourmashZipStorage) -> Result<SourmashStr> {
+    let storage = SourmashZipStorage::as_rust(ptr);
+
+    if let Some(ref path) = storage.path() {
+        Ok(path.clone().into())
+    } else {
+        Ok("".into())
+    }
+}
+}
+
+ffi_fn! {
+unsafe fn zipstorage_subdir(ptr: *const SourmashZipStorage) -> Result<SourmashStr> {
+    let storage = SourmashZipStorage::as_rust(ptr);
+
+    if let Some(ref path) = storage.subdir() {
+        Ok(path.clone().into())
+    } else {
+        Ok("".into())
+    }
+}
+}
diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs
index 3121537130..0c6a2676bf 100644
--- a/src/core/src/signature.rs
+++ b/src/core/src/signature.rs
@@ -179,6 +179,7 @@ pub struct SeqToHashes {
 
     prot_configured: bool,
     aa_seq: Vec<u8>,
+    translate_iter_step: usize,
 }
 
 impl SeqToHashes {
@@ -222,17 +223,26 @@ impl SeqToHashes {
             dna_last_position_check: 0,
             prot_configured: false,
             aa_seq: Vec::new(),
+            translate_iter_step: 0,
         }
     }
 }
 
+/*
+Iterator that return a kmer hash for all modes except translate.
+In translate mode:
+    - all the frames are processed at once and converted to hashes.
+    - all the hashes are stored in `hashes_buffer`
+    - after processing all the kmers, `translate_iter_step` is incremented
+      per iteration to iterate over all the indeces of the `hashes_buffer`.
+    - the iterator will die once `translate_iter_step` == length(hashes_buffer)
+More info https://github.com/sourmash-bio/sourmash/pull/1946
+*/
+
 impl Iterator for SeqToHashes {
     type Item = Result<u64, Error>;
 
     fn next(&mut self) -> Option<Self::Item> {
-        // TODO: Remove the hashes buffer
-        // Priority for flushing the hashes buffer
-
         if (self.kmer_index < self.max_index) || !self.hashes_buffer.is_empty() {
             // Processing DNA or Translated DNA
             if !self.is_protein {
@@ -290,18 +300,17 @@ impl Iterator for SeqToHashes {
                     let hash = crate::_hash_murmur(std::cmp::min(kmer, krc), self.seed);
                     self.kmer_index += 1;
                     Some(Ok(hash))
-                } else if self.hashes_buffer.is_empty() {
+                } else if self.hashes_buffer.is_empty() && self.translate_iter_step == 0 {
                     // Processing protein by translating DNA
-                    // TODO: make it a real iterator not a buffer
+                    // TODO: Implement iterator over frames instead of hashes_buffer.
 
-                    // Three frames
-                    for i in 0..3 {
+                    for frame_number in 0..3 {
                         let substr: Vec<u8> = self
                             .sequence
                             .iter()
                             .cloned()
-                            .skip(i)
-                            .take(self.sequence.len() - i)
+                            .skip(frame_number)
+                            .take(self.sequence.len() - frame_number)
                             .collect();
 
                         let aa = to_aa(
@@ -320,8 +329,8 @@ impl Iterator for SeqToHashes {
                             .dna_rc
                             .iter()
                             .cloned()
-                            .skip(i)
-                            .take(self.dna_rc.len() - i)
+                            .skip(frame_number)
+                            .take(self.dna_rc.len() - frame_number)
                             .collect();
                         let aa_rc = to_aa(
                             &rc_substr,
@@ -335,11 +344,16 @@ impl Iterator for SeqToHashes {
                             self.hashes_buffer.push(hash);
                         });
                     }
-                    self.kmer_index = self.max_index;
-                    Some(Ok(self.hashes_buffer.remove(0)))
+                    Some(Ok(0))
                 } else {
-                    let first_element: u64 = self.hashes_buffer.remove(0);
-                    Some(Ok(first_element))
+                    if self.translate_iter_step == self.hashes_buffer.len() {
+                        self.hashes_buffer.clear();
+                        self.kmer_index = self.max_index;
+                        return Some(Ok(0));
+                    }
+                    let curr_idx = self.translate_iter_step;
+                    self.translate_iter_step += 1;
+                    Some(Ok(self.hashes_buffer[curr_idx]))
                 }
             } else {
                 // Processing protein
diff --git a/src/core/src/storage.rs b/src/core/src/storage.rs
index 598f47c7b6..a2269bd6e7 100644
--- a/src/core/src/storage.rs
+++ b/src/core/src/storage.rs
@@ -1,6 +1,8 @@
+use std::collections::BTreeMap;
+use std::ffi::OsStr;
 use std::fs::{DirBuilder, File};
 use std::io::{BufReader, BufWriter, Read, Write};
-use std::path::PathBuf;
+use std::path::{Path, PathBuf};
 use std::sync::{Arc, Mutex};
 
 use serde::{Deserialize, Serialize};
@@ -46,6 +48,12 @@ impl Storage for InnerStorage {
 pub enum StorageError {
     #[error("Path can't be empty")]
     EmptyPathError,
+
+    #[error("Path not found: {0}")]
+    PathNotFoundError(String),
+
+    #[error("Error reading data from {0}")]
+    DataReadError(String),
 }
 
 #[derive(Serialize, Deserialize)]
@@ -155,48 +163,80 @@ impl Storage for FSStorage {
     }
 }
 
-pub struct ZipStorage<'a> {
+#[ouroboros::self_referencing]
+pub struct ZipStorage {
     mapping: Option<memmap2::Mmap>,
-    archive: Option<piz::ZipArchive<'a>>,
-    //metadata: piz::read::DirectoryContents<'a>,
-}
 
-fn load_from_archive<'a>(archive: &'a piz::ZipArchive<'a>, path: &str) -> Result<Vec<u8>, Error> {
-    use piz::read::FileTree;
+    #[borrows(mapping)]
+    #[covariant]
+    archive: piz::ZipArchive<'this>,
 
-    // FIXME error
-    let tree = piz::read::as_tree(archive.entries()).map_err(|_| StorageError::EmptyPathError)?;
-    // FIXME error
-    let entry = tree
-        .lookup(path)
-        .map_err(|_| StorageError::EmptyPathError)?;
+    subdir: Option<String>,
+    path: Option<String>,
 
-    // FIXME error
-    let mut reader = BufReader::new(
-        archive
-            .read(entry)
-            .map_err(|_| StorageError::EmptyPathError)?,
-    );
-    let mut contents = Vec::new();
-    reader.read_to_end(&mut contents)?;
+    #[borrows(archive)]
+    #[covariant]
+    metadata: Metadata<'this>,
+}
 
-    Ok(contents)
+pub type Metadata<'a> = BTreeMap<&'a OsStr, &'a piz::read::FileMetadata<'a>>;
+
+fn lookup<'a, P: AsRef<Path>>(
+    metadata: &'a Metadata,
+    path: P,
+) -> Result<&'a piz::read::FileMetadata<'a>, Error> {
+    let path = path.as_ref();
+    metadata
+        .get(&path.as_os_str())
+        .ok_or_else(|| StorageError::PathNotFoundError(path.to_str().unwrap().into()).into())
+        .map(|entry| *entry)
+}
+
+fn find_subdirs<'a>(archive: &'a piz::ZipArchive<'a>) -> Result<Option<String>, Error> {
+    let subdirs: Vec<_> = archive
+        .entries()
+        .iter()
+        .filter(|entry| entry.is_dir())
+        .collect();
+    if subdirs.len() == 1 {
+        Ok(Some(
+            subdirs[0]
+                .path
+                .to_str()
+                .expect("Error converting path")
+                .into(),
+        ))
+    } else {
+        Ok(None)
+    }
 }
 
-impl<'a> Storage for ZipStorage<'a> {
+impl Storage for ZipStorage {
     fn save(&self, _path: &str, _content: &[u8]) -> Result<String, Error> {
         unimplemented!();
     }
 
     fn load(&self, path: &str) -> Result<Vec<u8>, Error> {
-        if let Some(archive) = &self.archive {
-            load_from_archive(archive, path)
-        } else {
-            //FIXME
-            let archive = piz::ZipArchive::new((&self.mapping.as_ref()).unwrap())
-                .map_err(|_| StorageError::EmptyPathError)?;
-            load_from_archive(&archive, path)
-        }
+        let metadata = self.borrow_metadata();
+
+        let entry = lookup(metadata, path).or_else(|_| {
+            if let Some(subdir) = self.borrow_subdir() {
+                lookup(metadata, subdir.to_owned() + path)
+                    .map_err(|_| StorageError::PathNotFoundError(path.into()))
+            } else {
+                Err(StorageError::PathNotFoundError(path.into()))
+            }
+        })?;
+
+        let mut reader = BufReader::new(
+            self.borrow_archive()
+                .read(entry)
+                .map_err(|_| StorageError::DataReadError(path.into()))?,
+        );
+        let mut contents = Vec::new();
+        reader.read_to_end(&mut contents)?;
+
+        Ok(contents)
     }
 
     fn args(&self) -> StorageArgs {
@@ -204,40 +244,68 @@ impl<'a> Storage for ZipStorage<'a> {
     }
 }
 
-impl<'a> ZipStorage<'a> {
-    pub fn new(location: &str) -> Result<Self, Error> {
+impl ZipStorage {
+    pub fn from_file(location: &str) -> Result<Self, Error> {
         let zip_file = File::open(location)?;
         let mapping = unsafe { memmap2::Mmap::map(&zip_file)? };
 
-        //FIXME
-        //let archive = piz::ZipArchive::new(&mapping).map_err(|_| StorageError::EmptyPathError)?;
+        let mut storage = ZipStorageBuilder {
+            mapping: Some(mapping),
+            archive_builder: |mapping: &Option<memmap2::Mmap>| {
+                piz::ZipArchive::new(mapping.as_ref().unwrap()).unwrap()
+            },
+            metadata_builder: |archive: &piz::ZipArchive| {
+                archive
+                    .entries()
+                    .iter()
+                    .map(|entry| (entry.path.as_os_str(), entry))
+                    .collect()
+            },
+            subdir: None,
+            path: Some(location.to_owned()),
+        }
+        .build();
 
-        //FIXME
-        //  let tree =
-        //      piz::read::as_tree(archive.entries()).map_err(|_| StorageError::EmptyPathError)?;
+        let subdir = find_subdirs(storage.borrow_archive())?;
+        storage.with_mut(|fields| *fields.subdir = subdir);
 
-        Ok(Self {
-            mapping: Some(mapping),
-            archive: None,
-            //metadata: tree,
-        })
-    }
-
-    pub fn from_slice(mapping: &'a [u8]) -> Result<Self, Error> {
-        //FIXME
-        let archive = piz::ZipArchive::new(mapping).map_err(|_| StorageError::EmptyPathError)?;
-
-        //FIXME
-        //let entries: Vec<_> = archive.entries().iter().map(|x| x.to_owned()).collect();
-        //let tree =
-        //    piz::read::as_tree(entries.as_slice()).map_err(|_| StorageError::EmptyPathError)?;
-
-        Ok(Self {
-            archive: Some(archive),
-            mapping: None,
-            /*            metadata: archive
-            .as_tree()
-            .map_err(|_| StorageError::EmptyPathError)?, */
-        })
+        Ok(storage)
+    }
+
+    pub fn path(&self) -> Option<String> {
+        self.borrow_path().clone()
+    }
+
+    pub fn subdir(&self) -> Option<String> {
+        self.borrow_subdir().clone()
+    }
+
+    pub fn set_subdir(&mut self, path: String) {
+        self.with_mut(|fields| *fields.subdir = Some(path))
+    }
+
+    pub fn list_sbts(&self) -> Result<Vec<String>, Error> {
+        Ok(self
+            .borrow_archive()
+            .entries()
+            .iter()
+            .filter_map(|entry| {
+                let path = entry.path.to_str().expect("Error converting path");
+                if path.ends_with(".sbt.json") {
+                    Some(path.into())
+                } else {
+                    None
+                }
+            })
+            .collect())
+    }
+
+    pub fn filenames(&self) -> Result<Vec<String>, Error> {
+        Ok(self
+            .borrow_archive()
+            .entries()
+            .iter()
+            .map(|entry| entry.path.to_str().expect("Error converting path").into())
+            .collect())
     }
 }
diff --git a/src/core/tests/storage.rs b/src/core/tests/storage.rs
index 202ddd3fe3..5a60e02fcc 100644
--- a/src/core/tests/storage.rs
+++ b/src/core/tests/storage.rs
@@ -1,4 +1,3 @@
-use std::fs::File;
 use std::path::PathBuf;
 
 use sourmash::storage::{Storage, ZipStorage};
@@ -8,7 +7,7 @@ fn zipstorage_load_file() -> Result<(), Box<dyn std::error::Error>> {
     let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
     filename.push("../../tests/test-data/v6.sbt.zip");
 
-    let zs = ZipStorage::new(filename.to_str().unwrap())?;
+    let zs = ZipStorage::from_file(filename.to_str().unwrap())?;
 
     let data = zs.load("v6.sbt.json")?;
 
@@ -19,19 +18,27 @@ fn zipstorage_load_file() -> Result<(), Box<dyn std::error::Error>> {
 }
 
 #[test]
-fn zipstorage_load_slice() -> Result<(), Box<dyn std::error::Error>> {
+fn zipstorage_load_manifest() -> Result<(), Box<dyn std::error::Error>> {
     let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
-    filename.push("../../tests/test-data/v6.sbt.zip");
+    filename.push("../../tests/test-data/prot/protein.sbt.zip");
 
-    let zip_file = File::open(filename)?;
-    let mapping = unsafe { memmap2::Mmap::map(&zip_file)? };
+    let zs = ZipStorage::from_file(filename.to_str().unwrap())?;
 
-    let zs = ZipStorage::from_slice(&mapping)?;
+    let _data = zs.load("protein.manifest.csv").expect("error loading file");
 
-    let data = zs.load("v6.sbt.json")?;
+    Ok(())
+}
 
-    let description: serde_json::Value = serde_json::from_slice(&data[..])?;
-    assert_eq!(description["version"], 6);
+#[test]
+fn zipstorage_list_sbts() -> Result<(), Box<dyn std::error::Error>> {
+    let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+    filename.push("../../tests/test-data/v6.sbt.zip");
+
+    let zs = ZipStorage::from_file(filename.to_str().unwrap())?;
+
+    let sbts = zs.list_sbts()?;
+
+    assert_eq!(sbts.len(), 1);
 
     Ok(())
 }
diff --git a/src/sourmash/cli/utils.py b/src/sourmash/cli/utils.py
index 2d3941918c..2063fd09de 100644
--- a/src/sourmash/cli/utils.py
+++ b/src/sourmash/cli/utils.py
@@ -17,20 +17,20 @@ def add_moltype_args(parser):
 
     parser.add_argument(
         '--dayhoff', dest='dayhoff', action='store_true',
-        help='build Dayhoff-encoded amino acid signatures'
+        help='choose Dayhoff-encoded amino acid signatures'
     )
     parser.add_argument(
         '--no-dayhoff', dest='dayhoff', action='store_false',
-        help='do not build Dayhoff-encoded amino acid signatures')
+        help='do not choose Dayhoff-encoded amino acid signatures')
     parser.set_defaults(dayhoff=False)
 
     parser.add_argument(
         '--hp', '--hydrophobic-polar', dest='hp', action='store_true',
-        help='build hydrophobic-polar-encoded amino acid signatures'
+        help='choose hydrophobic-polar-encoded amino acid signatures'
     )
     parser.add_argument(
         '--no-hp', '--no-hydrophobic-polar', dest='hp', action='store_false',
-        help='do not build hydrophobic-polar-encoded amino acid signatures')
+        help='do not choose hydrophobic-polar-encoded amino acid signatures')
     parser.set_defaults(hp=False)
 
     parser.add_argument(
diff --git a/src/sourmash/command_compute.py b/src/sourmash/command_compute.py
index be0d87db00..b028664b53 100644
--- a/src/sourmash/command_compute.py
+++ b/src/sourmash/command_compute.py
@@ -137,10 +137,15 @@ def __init__(self, args):
 
     def __call__(self):
         args = self.args
-        params = ComputeParameters(args.ksizes, args.seed, args.protein,
-                                   args.dayhoff, args.hp, args.dna,
-                                   args.num_hashes,
-                                   args.track_abundance, args.scaled)
+        params = ComputeParameters(ksizes=args.ksizes,
+                                   seed=args.seed,
+                                   protein=args.protein,
+                                   dayhoff=args.dayhoff,
+                                   hp=args.hp,
+                                   dna=args.dna,
+                                   num_hashes=args.num_hashes,
+                                   track_abundance=args.track_abundance,
+                                   scaled=args.scaled)
         sig = SourmashSignature.from_params(params)
         return [sig]
 
@@ -326,7 +331,17 @@ def save_sigs_to_location(siglist, save_sig):
 class ComputeParameters(RustObject):
     __dealloc_func__ = lib.computeparams_free
 
-    def __init__(self, ksizes, seed, protein, dayhoff, hp, dna, num_hashes, track_abundance, scaled):
+    def __init__(self,
+                 *,
+                 ksizes=(21, 31, 51),
+                 seed=42,
+                 protein=False,
+                 dayhoff=False,
+                 hp=False,
+                 dna=True,
+                 num_hashes=500,
+                 track_abundance=False,
+                 scaled=0):
         self._objptr = lib.computeparams_new()
 
         self.seed = seed
@@ -359,8 +374,15 @@ def from_manifest_row(cls, row):
         else:
             ksize = row['ksize'] * 3
 
-        p = cls([ksize], DEFAULT_MMHASH_SEED, is_protein, is_dayhoff, is_hp, is_dna,
-                row['num'], row['with_abundance'], row['scaled'])
+        p = cls(ksizes=[ksize],
+                seed=DEFAULT_MMHASH_SEED,
+                protein=is_protein,
+                dayhoff=is_dayhoff,
+                hp=is_hp,
+                dna=is_dna,
+                num_hashes=row['num'],
+                track_abundance=row['with_abundance'],
+                scaled=row['scaled'])
 
         return p
 
@@ -405,7 +427,7 @@ def to_param_str(self):
         return ",".join(pi)
 
     def __repr__(self):
-        return f"ComputeParameters({self.ksizes}, {self.seed}, {self.protein}, {self.dayhoff}, {self.hp}, {self.dna}, {self.num_hashes}, {self.track_abundance}, {self.scaled})"
+        return f"ComputeParameters(ksizes={self.ksizes}, seed={self.seed}, protein={self.protein}, dayhoff={self.dayhoff}, hp={self.hp}, dna={self.dna}, num_hashes={self.num_hashes}, track_abundance={self.track_abundance}, scaled={self.scaled})"
 
     def __eq__(self, other):
         return (self.ksizes == other.ksizes and
diff --git a/src/sourmash/command_sketch.py b/src/sourmash/command_sketch.py
index dd02dcb8e2..db8aa5a87f 100644
--- a/src/sourmash/command_sketch.py
+++ b/src/sourmash/command_sketch.py
@@ -146,16 +146,17 @@ def get_compute_params(self, *, split_ksizes=False):
             if self.mult_ksize_by_3 and not def_dna:
                 ksizes = [ k*3 for k in ksizes ]
 
-            make_param = lambda ksizes: ComputeParameters(ksizes,
-                                            params_d.get('seed', def_seed),
-                                            def_protein,
-                                            def_dayhoff,
-                                            def_hp,
-                                            def_dna,
-                                            params_d.get('num', def_num),
-                                            params_d.get('track_abundance',
-                                                         def_abund),
-                                            params_d.get('scaled', def_scaled))
+            make_param = lambda ksizes: ComputeParameters(
+                                            ksizes=ksizes,
+                                            seed=params_d.get('seed', def_seed),
+                                            protein=def_protein,
+                                            dayhoff=def_dayhoff,
+                                            hp=def_hp,
+                                            dna=def_dna,
+                                            num_hashes=params_d.get('num', def_num),
+                                            track_abundance=params_d.get('track_abundance',
+                                                                         def_abund),
+                                            scaled=params_d.get('scaled', def_scaled))
 
             if split_ksizes:
                 for ksize in ksizes:
diff --git a/src/sourmash/commands.py b/src/sourmash/commands.py
index 5a4747615f..08ae99c771 100644
--- a/src/sourmash/commands.py
+++ b/src/sourmash/commands.py
@@ -501,12 +501,16 @@ def search(args):
 
     # do the actual search
     if query.minhash.track_abundance:
-        results = search_databases_with_abund_query(query, databases,
-                                   threshold=args.threshold,
-                                   do_containment=args.containment,
-                                   do_max_containment=args.max_containment,
-                                   best_only=args.best_only,
-                                   unload_data=True)
+        try:
+            results = search_databases_with_abund_query(query, databases,
+                                       threshold=args.threshold,
+                                       do_containment=args.containment,
+                                       do_max_containment=args.max_containment,
+                                       best_only=args.best_only,
+                                       unload_data=True)
+        except TypeError as exc:
+            error(f"ERROR: {str(exc)}")
+            sys.exit(-1)
     else:
         results = search_databases_with_flat_query(query, databases,
                                    threshold=args.threshold,
diff --git a/src/sourmash/distance_utils.py b/src/sourmash/distance_utils.py
deleted file mode 100644
index a80952d343..0000000000
--- a/src/sourmash/distance_utils.py
+++ /dev/null
@@ -1,286 +0,0 @@
-"""
-Utility functions for jaccard/containment --> distance estimates
-From https://github.com/KoslickiLab/mutation-rate-ci-calculator
-https://doi.org/10.1101/2022.01.11.475870
-"""
-from scipy.optimize import brentq
-from scipy.stats import norm as scipy_norm
-from scipy.special import hyp2f1
-from numpy import sqrt
-from math import log, exp
-
-from .logging import notify
-
-
-#FROM  mrcc.kmer_mutation_formulas_thm5
-def r1_to_q(k,r1):
-    r1 = float(r1)
-    q = 1-(1-r1)**k
-    return float(q)
-
-
-def var_n_mutated(L,k,r1,q=None):
-	# there are computational issues in the variance formula that we solve here
-	# by the use of higher-precision arithmetic; the problem occurs when r is
-	# very small; for example, with L=10,k=2,r1=1e-6 standard precision
-	# gives varN<0 which is nonsense; by using the mpf type, we get the correct
-	# answer which is about 0.000038.
-	if (r1 == 0): return 0.0
-	r1 = float(r1)
-	if (q == None): # we assume that if q is provided, it is correct for r1
-	    q = r1_to_q(k,r1)
-	varN = L*(1-q)*(q*(2*k+(2/r1)-1)-2*k) \
-	     + k*(k-1)*(1-q)**2 \
-         + (2*(1-q)/(r1**2))*((1+(k-1)*(1-q))*r1-q)
-	if (varN<0.0): # this seems to happen only with super tiny test data
-         raise ValueError('Error: varN <0.0!')
-	return float(varN)
-
-
-def exp_n_mutated(L,k,r1):
-	q = r1_to_q(k,r1)
-	return L*q
-
-
-# FROM mrcc.third_moment_calculator
-def exp_n_mutated_squared(L, k, p):
-    return var_n_mutated(L, k, p) + exp_n_mutated(L, k, p) ** 2
-
-
-def third_moment_nmut_exact(L,k,p):
-    t1 = (L * (-2 + 3*L) * p**2 + 3 * (1 - p)**(2*k) * (2 + (-1 + k - L) * p * (2 + k * p - L * p)) - (1 - p)**k * (6 + p * (-6 + L * (-6 + p + 6 * L * p))))/(p**2)
-    t2 = (-2 + 2 * k - L) * (-1 + 2 * k - L) * (2 * k - L) * (-1 + (1 - p)**k)**3
-    t3 = (1/(p**3))*(-6 * (-1 + k)**2 * (k - L) * p**3 + 6 * (1 - p)**(3 * k) * (2 + (-2 + 2 * k - L) * p) + (1 - p)**(2 * k) * (-12 + 6 * (2 * k + L) * p + 6 * (4 * k**2 + 2 * (1 + L) - 3 * k * (2 + L)) * p**2 - (-1 + k) * k * (-2 + 4 * k - 3 * L) * p**3) + 6 * (-1 + k) * (1 - p)**k * p * (-2 + p * (2 - k + 2 * L + (k * (-2 + 3 * k - 3 * L) + L) * p)))
-    t4 = 6 * (-1 + (1 - p)**k) * ((k + k**2 - 2 * k * L + (-1 + L) * L) * (-1 + 2 * (1 - p)**k) * hyp2f1(1, 2 + k - L, k - L, 1) + (k + k**2 - 2 * k * L + (-1 + L) * L) * (1 - p)**k * (-1 + p) * hyp2f1(1, 2 + k - L, k - L, 1 - p) - (-2 * k + 4 * k**2 + L - 4 * k * L + L**2) * ((-1 + 2 * (1 - p)**k) * hyp2f1(1, 1 + 2 * k - L, -1 + 2 * k - L, 1)- (-1 + p)**(2 * k) * hyp2f1(1, 1 + 2 * k - L, -1 + 2 * k - L, 1 - p)))
-    return t1+t2+t3+t4
-
-
-def exp_n_mutated_cubed(L, k, p):
-    return third_moment_nmut_exact(L, k, p)
-
-
-#from mrcc.p_from_scaled_containment
-def probit(p):
-    return scipy_norm.ppf(p)
-
-
-def sequence_len_to_n_kmers(sequence_len_bp, ksize):
-    n_kmers = sequence_len_bp - (ksize-1)
-    return n_kmers
-
-
-def distance_to_identity(dist,d_low=None,d_high=None):
-    """
-    ANI = 1-distance
-    """
-    if not 0 <= dist <= 1:
-        raise ValueError(f"Error: distance value {dist} is not between 0 and 1!")
-    ident = 1-dist
-    result = ident
-    id_low,id_high=None,None
-    if d_low is not None and d_high is not None:
-        if (0<=d_low<=1) and (0<=d_high<=1):
-            id_high = 1-d_low
-            id_low = 1-d_high
-            result = [ident, id_low, id_high]
-    return result
-
-
-def get_expected_log_probability(n_unique_kmers, ksize, mutation_rate, scaled_fraction):
-    '''helper function
-       Note that scaled here needs to be between 0 and 1
-       (e.g. scaled 1000 --> scaled_fraction 0.001)
-    '''
-    exp_nmut = exp_n_mutated(n_unique_kmers, ksize, mutation_rate)
-    try:
-        return (n_unique_kmers - exp_nmut) * log(1.0 - scaled_fraction)
-    except:
-        return float('-inf')
-
-
-def get_exp_probability_nothing_common(mutation_rate, ksize, scaled, n_unique_kmers=None, sequence_len_bp=None):
-    """
-    Given parameters, calculate the expected probability that nothing will be common
-    between a fracminhash sketch of a original sequence and a fracminhash sketch of a mutated
-    sequence. If this is above a threshold, we should suspect that the two sketches may have
-    nothing in common. The threshold needs to be set with proper insights.
-
-    Arguments: n_unique_kmers, ksize, mutation_rate, scaled
-    Returns: float - expected likelihood that nothing is common between sketches
-    """
-    # NTP note: do we have any checks for ksize >=1 in sourmash_args? The rest should be taken care of.
-    # assert 0.0 <= mutation_rate <= 1.0 and ksize >= 1 and scaled >= 1
-    if sequence_len_bp and not n_unique_kmers:
-        n_unique_kmers = sequence_len_to_n_kmers(sequence_len_bp, ksize)
-    f_scaled = 1.0 / float(scaled)
-    if mutation_rate == 1.0:
-        return 1.0
-    return exp( get_expected_log_probability(n_unique_kmers, ksize, mutation_rate, f_scaled) )
-
-
-def containment_to_distance(containment, ksize, scaled, n_unique_kmers=None, sequence_len_bp=None, confidence=0.95, return_identity=False, return_ci = False, prob_threshold = 10.0**(-3)):
-    """
-    Containment --> distance CI (one step)
-    """
-    sol1,sol2,point_estimate=None,None,None
-    if sequence_len_bp and not n_unique_kmers:
-        n_unique_kmers = sequence_len_to_n_kmers(sequence_len_bp, ksize)
-    if containment <= 0.0001: # changed from 0.0, to mirror jaccard_to_distance_CI_one_step
-        sol2 = sol1 = point_estimate = 1.0
-    elif containment >= 0.9999:  # changed from 1.0, to mirror jaccard_to_distance_CI_one_step
-        sol1 = sol2 = point_estimate = 0.0
-    else:
-        point_estimate = 1.0-containment**(1.0/ksize)
-        if return_ci:
-            try:
-                alpha = 1 - confidence
-                z_alpha = probit(1-alpha/2)
-                f_scaled = 1.0/scaled # these use scaled as a fraction between 0 and 1
-
-                bias_factor = 1 - (1 - f_scaled) ** n_unique_kmers
-
-                term_1 = (1.0-f_scaled) / (f_scaled * n_unique_kmers**3 * bias_factor**2)
-                term_2 = lambda pest: n_unique_kmers * exp_n_mutated(n_unique_kmers, ksize, pest) - exp_n_mutated_squared(n_unique_kmers, ksize, pest)
-                term_3 = lambda pest: var_n_mutated(n_unique_kmers, ksize, pest) / (n_unique_kmers**2)
-
-                var_direct = lambda pest: term_1 * term_2(pest) + term_3(pest)
-
-                f1 = lambda pest: (1-pest)**ksize + z_alpha * sqrt(var_direct(pest)) - containment
-                f2 = lambda pest: (1-pest)**ksize - z_alpha * sqrt(var_direct(pest)) - containment
-
-                sol1 = brentq(f1, 0.0000001, 0.9999999)
-                sol2 = brentq(f2, 0.0000001, 0.9999999)
-
-            except ValueError as exc:
-                # afaict, this only happens with extremely small test data
-                notify("WARNING: Cannot estimate ANI from containment. Do your sketches contain enough hashes?")
-                notify(str(exc))
-                return None, None, None
-
-    # Do this here, so that we don't need to reconvert distance <--> identity later.
-    prob_nothing_in_common = get_exp_probability_nothing_common(point_estimate, ksize, scaled, n_unique_kmers=n_unique_kmers)
-    if prob_nothing_in_common >= prob_threshold:
-        #  keep count, suggest mod decrease scaled, maybe decrease ksize
-        notify('WARNING: These sketches may have no hashes in common based on chance alone.')
-       
-    if return_identity:
-        point_estimate,sol2,sol1 = distance_to_identity(point_estimate,sol2,sol1)
-
-    if return_ci:
-        return point_estimate, sol2, sol1, prob_nothing_in_common
-
-    return point_estimate, prob_nothing_in_common
-
-
-#from from mrcc.p_from_scaled_jaccard
-def variance_scaled_jaccard(L, p, k, s):
-    exp_n_mut = exp_n_mutated(L, k, p)
-    exp_n_mut_squared = exp_n_mutated_squared(L, k, p)
-    exp_n_mut_cubed = exp_n_mutated_cubed(L, k, p)
-    bias_factor = 1 - (1 - s) ** ( int(L + exp_n_mut) )
-
-    factor1 = (1-s)/(s * bias_factor**2)
-    factor2 = (2 * L * exp_n_mut - 2 * exp_n_mut_squared) / (L ** 3 + 3*L*exp_n_mut_squared + 3*L*L*exp_n_mut + exp_n_mut_cubed)
-    term1 = factor1 * factor2
-    term2 = (L**2 - 2 * L * exp_n_mut + exp_n_mut_squared) / (L**2 + 2 * L * exp_n_mut + exp_n_mut_squared)
-    term3 = ((L - exp_n_mut) / (L + exp_n_mut))**2
-
-    return term1 + term2 - term3
-
-
-def jaccard_to_distance_orig(jaccard, ksize, scaled, n_unique_kmers=None, sequence_len_bp=None, confidence=0.95, return_identity=False, return_ci=False):
-    """
-    Scaled Jaccard to distance estimate and CI (one step)
-    """
-    if sequence_len_bp and not n_unique_kmers:
-        n_unique_kmers = sequence_len_to_n_kmers(sequence_len_bp, ksize)
-    if jaccard <= 0.0001:
-        sol2 = sol1 = point_estimate = 1.0
-    elif jaccard >= 0.9999:
-        sol1 = sol2 = point_estimate = 0.0
-    else:
-        point_estimate = 1.0 - (2.0 - 2.0/(jaccard + 1))**(1.0/ksize)
-        if return_ci:
-            try:
-                alpha = 1 - confidence
-                z_alpha = probit(1-alpha/2)
-                f_scaled = 1.0/scaled # these use scaled as a fraction between 0 and 1
-
-                var_direct = lambda pest: variance_scaled_jaccard(n_unique_kmers, pest, ksize, f_scaled)
-
-                f1 = lambda pest: 2.0/(2- (1-pest)**ksize ) - 1 + z_alpha * sqrt(var_direct(pest)) - jaccard
-                f2 = lambda pest: 2.0/(2- (1-pest)**ksize ) - 1 - z_alpha * sqrt(var_direct(pest)) - jaccard
-
-                sol1 = brentq(f1, 0.0000001, 0.9999999)
-                sol2 = brentq(f2, 0.0000001, 0.9999999)
-
-            except ValueError as exc:
-                # afaict, this only happens with extremely small test data
-                notify("WARNING: Cannot estimate ANI from jaccard. Do your sketches contain enough hashes?")
-                notify(str(exc))
-                return None, None, None
-
-    # Do this here, so that we don't need to reconvert distance <--> identity later.
-    prob_nothing_in_common = get_exp_probability_nothing_common(point_estimate, ksize, scaled, n_unique_kmers=n_unique_kmers)
-    if prob_nothing_in_common >= prob_threshold:
-        notify('WARNING: These sketches may have no hashes in common based on chance alone.')
-
-    if return_identity:
-        point_estimate,sol2,sol1 = distance_to_identity(point_estimate,sol2,sol1)
-
-    if return_ci:
-        return point_estimate, sol2, sol1, prob_nothing_in_common
-
-    return point_estimate, prob_nothing_in_common
-
-
-def jaccard_to_distance(jaccard, ksize, scaled, n_unique_kmers=None, sequence_len_bp=None, return_identity=False, prob_threshold = 10.0**(-3)):
-    """
-    Given parameters, calculate point estimate for mutation rate from jaccard index.
-    First checks if parameters are valid (checks are not exhaustive). Then uses formulas
-    derived mathematically to compute the point estimate. The formula uses approximations,
-    therefore a tiny error is associated with it. A lower bound of that error is also returned.
-    A high error indicates that the point estimate cannot be trusted. Threshold of the error
-    is open to interpretation, but suggested that > 10^-4 should be handled with caution.
-
-    Note that the error is NOT a mutation rate, and therefore cannot be considered in
-    something like mut.rate +/- error.
-
-    Arguments: jaccard, ksize, scaled, n_unique_kmers
-    # Returns: tuple (point_estimate_of_mutation_rate, lower_bound_of_error)
-
-    # Returns: point_estimate_of_mutation_rate
-    """
-    # NTP question: does this equation consider variance of jaccard due to scaled? 
-    # Or was that only used for estimating the CI around the pt estimate?
-    error_lower_bound = None
-    if sequence_len_bp and not n_unique_kmers:
-        n_unique_kmers = sequence_len_to_n_kmers(sequence_len_bp, ksize)
-    if jaccard <= 0.0001:
-        point_estimate = 1.0
-    elif jaccard >= 0.9999:
-        point_estimate = 0.0
-    else:
-        point_estimate = 1.0 - ( 2.0 * jaccard / float(1+jaccard) ) ** (1.0/float(ksize))
-
-        exp_n_mut = exp_n_mutated(n_unique_kmers, ksize, point_estimate)
-        var_n_mut = var_n_mutated(n_unique_kmers, ksize, point_estimate)
-        error_lower_bound = 1.0 * n_unique_kmers * var_n_mut / (n_unique_kmers + exp_n_mut)**3
-    
-    if error_lower_bound is not None and error_lower_bound > 10.0**(-4.0): # does this need to be modifiable?
-        #print(f"err: ({error_lower_bound})")
-        notify(f"WARNING: Error on Jaccard distance point estimate is too high ({error_lower_bound}).")
-        # @NTP: ruturn ANI estimate anyway, NA later instead? Probably want this value for `sig overlap, at least?`
-        notify(f"Returning 'NA' for this comparison and continuing.")
-        point_estimate = "NA"
-    
-    # get probability nothing in common
-    prob_nothing_in_common = get_exp_probability_nothing_common(point_estimate, ksize, scaled, n_unique_kmers=n_unique_kmers)
-    if prob_nothing_in_common >= prob_threshold:
-        notify('WARNING: These sketches may have no hashes in common based on chance alone.')
-
-    if return_identity:
-        point_estimate = distance_to_identity(point_estimate)
-
-    return point_estimate, prob_nothing_in_common, error_lower_bound
-
diff --git a/src/sourmash/index/__init__.py b/src/sourmash/index/__init__.py
index 60f1940c36..5fa521db66 100644
--- a/src/sourmash/index/__init__.py
+++ b/src/sourmash/index/__init__.py
@@ -554,7 +554,7 @@ def _load_manifest(self):
         "Load a manifest if one exists"
         try:
             manifest_data = self.storage.load('SOURMASH-MANIFEST.csv')
-        except KeyError:
+        except (KeyError, FileNotFoundError):
             self.manifest = None
         else:
             debug_literal(f'found manifest on load for {self.storage.path}')
@@ -616,18 +616,16 @@ def _signatures_with_internal(self):
 
         Note: does not limit signatures to subsets.
         """
-        zf = self.storage.zipfile
-
         # list all the files, without using the Storage interface; currently,
         # 'Storage' does not provide a way to list all the files, so :shrug:.
-        for zipinfo in zf.infolist():
+        for filename in self.storage._filenames():
             # should we load this file? if it ends in .sig OR we are forcing:
-            if zipinfo.filename.endswith('.sig') or \
-               zipinfo.filename.endswith('.sig.gz') or \
+            if filename.endswith('.sig') or \
+               filename.endswith('.sig.gz') or \
                self.traverse_yield_all:
-                fp = zf.open(zipinfo)
-                for ss in load_signatures(fp):
-                    yield ss, zipinfo.filename
+                sig_data = self.storage.load(filename)
+                for ss in load_signatures(sig_data):
+                    yield ss, filename
 
     def signatures(self):
         "Load all signatures in the zip file."
@@ -652,10 +650,10 @@ def signatures(self):
             # if no manifest here, break Storage class encapsulation
             # and go for all the files. (This is necessary to support
             # ad-hoc zipfiles that have no manifests.)
-            for zipinfo in storage.zipfile.infolist():
+            for filename in storage._filenames():
                 # should we load this file? if it ends in .sig OR force:
-                if zipinfo.filename.endswith('.sig') or \
-                   zipinfo.filename.endswith('.sig.gz') or \
+                if filename.endswith('.sig') or \
+                   filename.endswith('.sig.gz') or \
                    self.traverse_yield_all:
                     if selection_dict:
                         select = lambda x: select_signature(x,
@@ -663,7 +661,7 @@ def signatures(self):
                     else:
                         select = lambda x: True
 
-                    data = self.storage.load(zipinfo.filename)
+                    data = self.storage.load(filename)
                     for ss in load_signatures(data):
                         if select(ss):
                             yield ss
diff --git a/src/sourmash/lca/lca_db.py b/src/sourmash/lca/lca_db.py
index fb9119def4..8f88d0c11f 100644
--- a/src/sourmash/lca/lca_db.py
+++ b/src/sourmash/lca/lca_db.py
@@ -510,10 +510,13 @@ def find(self, search_fn, query, **kwargs):
             score = search_fn.score_fn(query_size, shared_size, subj_size,
                                        total_size)
 
-            # note to self: even with JaccardSearchBestOnly, this will
+            # CTB note to self: even with JaccardSearchBestOnly, this will
             # still iterate over & score all signatures. We should come
             # up with a protocol by which the JaccardSearch object can
             # signal that it is done, or something.
+            # For example, see test_lca_jaccard_ordering, where
+            # for containment we could be done early, but for Jaccard we
+            # cannot.
             if search_fn.passes(score):
                 if search_fn.collect(score, subj):
                     if passes_all_picklists(subj, self.picklists):
diff --git a/src/sourmash/minhash.py b/src/sourmash/minhash.py
index 5e5a14c4cf..d2323ae8ab 100644
--- a/src/sourmash/minhash.py
+++ b/src/sourmash/minhash.py
@@ -647,7 +647,7 @@ def jaccard(self, other, downsample=False):
             raise TypeError(err)
         return self._methodcall(lib.kmerminhash_similarity, other._get_objptr(), True, downsample)
 
-    def jaccard_ani(self, other, downsample=False, jaccard=None, confidence=0.95):
+    def jaccard_ani(self, other, *, downsample=False, jaccard=None, prob_threshold=1e-3,  err_threshold=1e-4):
         "Calculate Jaccard --> ANI of two MinHash objects."
         self_mh = self
         other_mh = other
@@ -659,11 +659,12 @@ def jaccard_ani(self, other, downsample=False, jaccard=None, confidence=0.95):
         if jaccard is None:
             jaccard = self_mh.similarity(other_mh, ignore_abundance=True)
         avg_scaled_kmers = round((len(self_mh) + len(other_mh))/2)
-        avg_n_kmers = avg_scaled_kmers * scaled # would be better if hll estimate
-        j_ani,ani_low,ani_high = jaccard_to_distance(jaccard, self_mh.ksize, scaled,
-                                                    n_unique_kmers=avg_n_kmers, confidence=confidence,
-                                                    return_identity=True)
-        return j_ani, ani_low, ani_high
+        avg_n_kmers = avg_scaled_kmers * scaled  # would be better if hll estimate - see #1798
+        j_aniresult = jaccard_to_distance(jaccard, self_mh.ksize, scaled,
+                                          n_unique_kmers=avg_n_kmers,
+                                          prob_threshold = prob_threshold,
+                                          err_threshold = err_threshold)
+        return j_aniresult
 
     def similarity(self, other, ignore_abundance=False, downsample=False):
         """Calculate similarity of two sketches.
@@ -702,7 +703,7 @@ def contained_by(self, other, downsample=False):
 
         return self.count_common(other, downsample) / len(self)
 
-    def containment_ani(self, other, downsample=False, containment=None, confidence=0.95):
+    def containment_ani(self, other, *, downsample=False, containment=None, confidence=0.95, estimate_ci = False):
         "Estimate ANI from containment with the other MinHash."
         self_mh = self
         other_mh = other
@@ -713,11 +714,13 @@ def containment_ani(self, other, downsample=False, containment=None, confidence=
             other_mh = other.downsample(scaled=scaled)
         if containment is None:
             containment = self_mh.contained_by(other_mh)
-        n_kmers = len(self_mh) * scaled # would be better if hll estimate
-        c_ani,ani_low,ani_high = containment_to_distance(containment, self_mh.ksize, self_mh.scaled,
+        n_kmers = len(self_mh) * scaled # would be better if hll estimate - see #1798
+
+        c_aniresult = containment_to_distance(containment, self_mh.ksize, self_mh.scaled,
                                                         n_unique_kmers=n_kmers, confidence=confidence,
-                                                        return_identity=True)
-        return c_ani, ani_low, ani_high
+                                                        estimate_ci = estimate_ci)
+        return c_aniresult
+
 
     def max_containment(self, other, downsample=False):
         """
@@ -731,7 +734,7 @@ def max_containment(self, other, downsample=False):
 
         return self.count_common(other, downsample) / min_denom
 
-    def max_containment_ani(self, other, downsample=False, max_containment=None, confidence=0.95):
+    def max_containment_ani(self, other, *, downsample=False, max_containment=None, confidence=0.95, estimate_ci=False):
         "Estimate ANI from containment with the other MinHash."
         self_mh = self
         other_mh = other
@@ -743,11 +746,12 @@ def max_containment_ani(self, other, downsample=False, max_containment=None, con
         if max_containment is None:
             max_containment = self_mh.max_containment(other_mh)
         min_n_kmers = min(len(self_mh), len(other_mh))
-        n_kmers = min_n_kmers * scaled
-        c_ani,ani_low,ani_high = containment_to_distance(max_containment, self_mh.ksize, scaled,
-                                                        n_unique_kmers=n_kmers,confidence=confidence,
-                                                        return_identity=True)
-        return c_ani, ani_low, ani_high
+        n_kmers = min_n_kmers * scaled  # would be better if hll estimate - see #1798
+
+        c_aniresult = containment_to_distance(max_containment, self_mh.ksize, scaled,
+                                           n_unique_kmers=n_kmers,confidence=confidence,
+                                           estimate_ci = estimate_ci)
+        return c_aniresult
 
     def __add__(self, other):
         if not isinstance(other, MinHash):
diff --git a/src/sourmash/picklist.py b/src/sourmash/picklist.py
index fc306782b6..f48377e4cb 100644
--- a/src/sourmash/picklist.py
+++ b/src/sourmash/picklist.py
@@ -1,5 +1,6 @@
 "Picklist code for extracting subsets of signatures."
 import csv
+import os
 from enum import Enum
 
 # set up preprocessing functions for column stuff
@@ -143,18 +144,23 @@ def load(self, pickfile, column_name):
         "load pickset, return num empty vals, and set of duplicate vals."
         pickset = self.init()
 
+        if not os.path.exists(pickfile) or not os.path.isfile(pickfile):
+            raise ValueError(f"pickfile '{pickfile}' must exist and be a regular file")
+
         n_empty_val = 0
         dup_vals = set()
         with open(pickfile, newline='') as csvfile:
             x = csvfile.readline()
 
             # skip leading comment line in case there's a manifest header
-            if x[0] == '#':
+            if not x or x[0] == '#':
                 pass
             else:
                 csvfile.seek(0)
 
             r = csv.DictReader(csvfile)
+            if not r.fieldnames:
+                raise ValueError(f"empty or improperly formatted pickfile '{pickfile}'")
 
             if column_name not in r.fieldnames:
                 raise ValueError(f"column '{column_name}' not in pickfile '{pickfile}'")
diff --git a/src/sourmash/sbt.py b/src/sourmash/sbt.py
index d974c23c91..bb001fd940 100644
--- a/src/sourmash/sbt.py
+++ b/src/sourmash/sbt.py
@@ -625,7 +625,7 @@ def save(self, path, storage=None, sparseness=0.0, structure_only=False):
             kind = "Zip"
             if not path.endswith('.sbt.zip'):
                 path += '.sbt.zip'
-            storage = ZipStorage(path)
+            storage = ZipStorage(path, mode="w")
             backend = "FSStorage"
 
             assert path[-8:] == '.sbt.zip'
@@ -1440,6 +1440,8 @@ def convert_cmd(name, backend):
     backend = options.pop(0)
     backend = backend.lower().strip("'")
 
+    kwargs = {}
+
     if options:
       print(options)
       options = options[0].split(')')
@@ -1454,6 +1456,7 @@ def convert_cmd(name, backend):
         backend = RedisStorage
     elif backend.lower() in ('zip', 'zipstorage'):
         backend = ZipStorage
+        kwargs['mode'] = 'w'
     elif backend.lower() in ('fs', 'fsstorage'):
         backend = FSStorage
         if options:
@@ -1469,6 +1472,6 @@ def convert_cmd(name, backend):
     else:
         error('backend not recognized: {}'.format(backend))
 
-    with backend(*options) as storage:
+    with backend(*options, **kwargs) as storage:
         sbt = SBT.load(name, leaf_loader=SigLeaf.load)
         sbt.save(name, storage=storage)
diff --git a/src/sourmash/sbt_storage.py b/src/sourmash/sbt_storage.py
index 695832fa18..398e11c877 100644
--- a/src/sourmash/sbt_storage.py
+++ b/src/sourmash/sbt_storage.py
@@ -9,6 +9,11 @@
 from abc import ABC
 from pathlib import Path
 
+from ._lowlevel import ffi, lib
+from .utils import RustObject, rustcall, decode_str
+from .minhash import to_bytes
+
+
 class Storage(ABC):
 
     @abc.abstractmethod
@@ -89,7 +94,110 @@ def load(self, path):
         return path.read_bytes()
 
 
-class ZipStorage(Storage):
+class ZipStorage(RustObject, Storage):
+
+    __dealloc_func__ = lib.zipstorage_free
+
+    def __init__(self, path, *, mode="r"):
+        if mode == "w":
+            self.__inner = _RwZipStorage(path)
+        else:
+            self.__inner = None
+            path = os.path.abspath(path)
+            self._objptr = rustcall(lib.zipstorage_new, to_bytes(path), len(path))
+
+    @staticmethod
+    def can_open(location):
+        return zipfile.is_zipfile(location)
+
+    @property
+    def path(self):
+        if self.__inner:
+            return self.__inner.path
+        return decode_str(self._methodcall(lib.zipstorage_path))
+
+    @property
+    def subdir(self):
+        if self.__inner:
+            return self.__inner.subdir
+        return decode_str(self._methodcall(lib.zipstorage_subdir))
+
+    @subdir.setter
+    def subdir(self, value):
+        if self.__inner:
+            self.__inner.subdir = value
+        else:
+            self._methodcall(lib.zipstorage_set_subdir, to_bytes(value), len(value))
+
+    def _filenames(self):
+        if self.__inner:
+            return self.__inner._filenames()
+
+        size = ffi.new("uintptr_t *")
+        paths_ptr = self._methodcall(lib.zipstorage_filenames, size)
+        size = size[0]
+
+        paths = []
+        for i in range(size):
+            path = decode_str(paths_ptr[i][0])
+            paths.append(path)
+
+        return paths
+
+    def save(self, path, content, *, overwrite=False, compress=False):
+        if self.__inner:
+            return self.__inner.save(path, content, overwrite=overwrite, compress=compress)
+        raise NotImplementedError()
+
+    def load(self, path):
+        if self.__inner:
+            return self.__inner.load(path)
+
+        try:
+            size = ffi.new("uintptr_t *")
+            rawbuf = self._methodcall(lib.zipstorage_load, to_bytes(path), len(path), size)
+            size = size[0]
+
+            rawbuf = ffi.gc(rawbuf, lambda o: lib.nodegraph_buffer_free(o, size), size)
+            buf = ffi.buffer(rawbuf, size)
+
+            # TODO: maybe avoid the [:] here, it triggers a copy...
+            return buf[:]
+        except ValueError:
+            raise FileNotFoundError(path)
+
+    def list_sbts(self):
+        if self.__inner:
+            return self.__inner.list_sbts()
+
+        size = ffi.new("uintptr_t *")
+        paths_ptr = self._methodcall(lib.zipstorage_list_sbts, size)
+        size = size[0]
+
+        paths = []
+        for i in range(size):
+            path = decode_str(paths_ptr[i][0])
+            paths.append(path)
+
+        return paths
+
+    def init_args(self):
+        return {'path': self.path}
+
+    def flush(self):
+        if self.__inner:
+            self.__inner.flush()
+
+    def close(self):
+        if self.__inner:
+            self.__inner.close()
+
+    @staticmethod
+    def can_open(location):
+        return zipfile.is_zipfile(location)
+
+
+class _RwZipStorage(Storage):
 
     def __init__(self, path):
         self.path = os.path.abspath(path)
@@ -119,6 +227,9 @@ def __init__(self, path):
         if len(subdirs) == 1:
             self.subdir = subdirs[0]
 
+    def _filenames(self):
+        return [info.filename for info in self.zipfile.infolist()]
+
     def _content_matches(self, zf, path, content):
         info = zf.getinfo(path)
         entry_content = zf.read(info)
@@ -209,9 +320,6 @@ def load(self, path):
             else:
                 raise FileNotFoundError(path)
 
-    def init_args(self):
-        return {'path': self.path}
-
     def close(self):
         # TODO: this is not ideal; checking for zipfile.fp is looking at
         # internal implementation details from CPython...
@@ -285,10 +393,6 @@ def flush(self, *, keep_closed=False):
             self.bufferzip.close()
             self.bufferzip = None
 
-    @staticmethod
-    def can_open(location):
-        return zipfile.is_zipfile(location)
-
     def list_sbts(self):
         return [f for f in self.zipfile.namelist() if f.endswith(".sbt.json")]
 
diff --git a/src/sourmash/sourmash_args.py b/src/sourmash/sourmash_args.py
index dc001013e3..60cba7f43f 100644
--- a/src/sourmash/sourmash_args.py
+++ b/src/sourmash/sourmash_args.py
@@ -126,15 +126,15 @@ def load_picklist(args):
     if args.picklist:
         try:
             picklist = SignaturePicklist.from_picklist_args(args.picklist)
+
+            notify(f"picking column '{picklist.column_name}' of type '{picklist.coltype}' from '{picklist.pickfile}'")
+
+            n_empty_val, dup_vals = picklist.load(picklist.pickfile, picklist.column_name)
         except ValueError as exc:
             error("ERROR: could not load picklist.")
             error(str(exc))
             sys.exit(-1)
 
-        notify(f"picking column '{picklist.column_name}' of type '{picklist.coltype}' from '{picklist.pickfile}'")
-
-        n_empty_val, dup_vals = picklist.load(picklist.pickfile, picklist.column_name)
-
         notify(f"loaded {len(picklist.pickset)} distinct values into picklist.")
         if n_empty_val:
             notify(f"WARNING: {n_empty_val} empty values in column '{picklist.column_name}' in picklist file")
@@ -966,7 +966,7 @@ def open(self):
         if os.path.exists(self.location):
             do_create = False
 
-        storage = ZipStorage(self.location)
+        storage = ZipStorage(self.location, mode="w")
         if not storage.subdir:
             storage.subdir = 'signatures'
 
diff --git a/tests/test_cmd_signature.py b/tests/test_cmd_signature.py
index b09d05076b..cd93349f7e 100644
--- a/tests/test_cmd_signature.py
+++ b/tests/test_cmd_signature.py
@@ -1627,6 +1627,45 @@ def test_sig_extract_7_no_ksize(c):
     assert len(siglist) == 3
 
 
+def test_sig_extract_8_empty_picklist_fail(runtmp):
+    # what happens with an empty picklist?
+    sig47 = utils.get_test_data('47.fa.sig')
+    sig63 = utils.get_test_data('63.fa.sig')
+
+    # make empty picklist
+    picklist_csv = runtmp.output('pick.csv')
+    with open(picklist_csv, 'w', newline='') as csvfp:
+        pass
+
+    picklist_arg = f"{picklist_csv}:md5full:md5"
+
+    with pytest.raises(SourmashCommandFailed):
+        runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)
+
+    err = runtmp.last_result.err
+    print(err)
+
+    assert "empty or improperly formatted pickfile" in err
+
+
+def test_sig_extract_8_nofile_picklist_fail(runtmp):
+    # what happens when picklist file does not exist?
+    sig47 = utils.get_test_data('47.fa.sig')
+    sig63 = utils.get_test_data('63.fa.sig')
+
+    # picklist file does not exist
+    picklist_csv = runtmp.output('pick.csv')
+    picklist_arg = f"{picklist_csv}:md5full:md5"
+
+    with pytest.raises(SourmashCommandFailed):
+        runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)
+
+    err = runtmp.last_result.err
+    print(err)
+
+    assert "must exist and be a regular file" in err
+
+
 def test_sig_extract_8_picklist_md5(runtmp):
     # extract 47 from 47, using a picklist w/full md5
     sig47 = utils.get_test_data('47.fa.sig')
diff --git a/tests/test_distance_utils.py b/tests/test_distance_utils.py
index c2ed4613c1..ebc4dd56d8 100644
--- a/tests/test_distance_utils.py
+++ b/tests/test_distance_utils.py
@@ -1,37 +1,70 @@
 """
 Tests for distance utils.
 """
-
 import pytest
-from sourmash.distance_utils import containment_to_distance, jaccard_to_distance, distance_to_identity, sequence_len_to_n_kmers, jaccard_to_distance_point_estimate
+from sourmash.distance_utils import (containment_to_distance, get_exp_probability_nothing_common,
+                                    handle_seqlen_nkmers, jaccard_to_distance,
+                                    ANIResult, ciANIResult, jaccardANIResult, var_n_mutated)
+
+def test_aniresult():
+    res = ANIResult(0.4, 0.1)
+    assert res.dist == 0.4
+    assert res.ani == 0.6
+    assert res.p_nothing_in_common == 0.1
+    assert res.p_exceeds_threshold ==True
+    # check that they're equivalent
+    res2 = ANIResult(0.4, 0.1)
+    assert res == res2
+    res3 = ANIResult(0.5, 0)
+    assert res != res3
+    assert res3.p_exceeds_threshold ==False
+
+def test_aniresult_bad_distance():
+    """
+    Fail if distance is not between 0 and 1.
+    """
+    with pytest.raises(Exception) as exc:
+        ANIResult(1.1, 0.1)
+    print("\n", str(exc.value))
+    assert "distance value 1.1000 is not between 0 and 1!" in str(exc.value)
+    with pytest.raises(Exception) as exc:
+        ANIResult(-0.1, 0.1)
+    print("\n", str(exc.value))
+    assert "distance value -0.1000 is not between 0 and 1!" in str(exc.value)
 
-def test_distance_to_identity():
-    ident,id_low,id_high = distance_to_identity(0.5,0.4,0.6)
-    assert ident == 0.5
-    assert id_low == 0.4
-    assert id_high ==0.6
 
+def test_jaccard_aniresult():
+    res = jaccardANIResult(0.4, 0.1, jaccard_error=0.03)
+    assert res.dist == 0.4
+    assert res.ani == 0.6
+    assert res.p_nothing_in_common == 0.1
+    assert res.jaccard_error == 0.03
+    assert res.p_exceeds_threshold ==True
+    assert res.je_exceeds_threshold ==True
+    res2 = jaccardANIResult(0.4, 0.1, jaccard_error=0.03, je_threshold=0.1)
+    assert res2.je_exceeds_threshold ==False
 
-def test_distance_to_identity_just_point_estimate():
-    ident = distance_to_identity(0.4)
-    assert ident == 0.6
 
+def test_jaccard_aniresult_nojaccarderror():
+    #jaccard error is None
+    with pytest.raises(Exception) as exc:
+        jaccardANIResult(0.4, 0.1, None)
+    print("\n", str(exc.value))
+    assert "Error: jaccard_error cannot be None." in str(exc.value)
 
-def test_distance_to_identity_just_point_estimate_extra_input():
-    """
-    Ignore 2nd input, unless put both dist_low and dist_high
-    """
-    ident = distance_to_identity(0.4,0.5)
-    assert ident == 0.6
-    
 
-def test_distance_to_identity_fail():
-    with pytest.raises(Exception) as exc:
-        ident,id_low,id_high = distance_to_identity(1.1,0.4,0.6)
-    assert "distance value 1.1 is not between 0 and 1!" in str(exc.value)
-    with pytest.raises(Exception) as exc:
-        ident,id_low,id_high = distance_to_identity(-0.1,0.4,0.6)
-    assert "distance value -0.1 is not between 0 and 1!" in str(exc.value)
+def test_ci_aniresult():
+    res = ciANIResult(0.4, 0.1, dist_low=0.3,dist_high=0.5)
+    print(res)
+    assert res.dist == 0.4
+    assert res.ani == 0.6
+    assert res.p_nothing_in_common == 0.1
+    assert res.ani_low == 0.5
+    assert res.ani_high == 0.7
+    res2 = ciANIResult(0.4, 0.1, dist_low=0.3,dist_high=0.5)
+    assert res == res2
+    res3 = ciANIResult(0.4, 0.2, dist_low=0.3, dist_high=0.5)
+    assert res != res3
 
 
 def test_containment_to_distance_zero():
@@ -39,20 +72,23 @@ def test_containment_to_distance_zero():
     scaled = 1
     nkmers = 10000
     ksize=21
-    dist,low,high = containment_to_distance(contain,ksize,scaled, n_unique_kmers=nkmers)
-    print("\nDIST:", dist)
-    print("CI:", low, " - ", high)
+    res = containment_to_distance(contain,ksize,scaled, n_unique_kmers=nkmers, estimate_ci=True)
+    print(res)
     # check results
-    exp_dist, exp_low,exp_high = 1.0,1.0,1.0
-    assert dist == exp_dist
-    assert low == exp_low
-    assert high == exp_high
-    # return identity instead
-    exp_id, exp_idlow,exp_idhigh = 0.0,0.0,0.0
-    ident,low,high = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers,return_identity=True)
-    assert ident == exp_id
-    assert low == exp_idlow
-    assert high == exp_idhigh
+    exp_dist,exp_low,exp_high,pnc = 1.0,None,None,1.0
+    exp_id, exp_idlow,exp_idhigh,pnc = 0.0,None,None,1.0
+    assert res.dist == exp_dist
+    assert res.dist_low == exp_low
+    assert res.dist_high == exp_high
+    assert res.p_nothing_in_common == pnc
+    assert res.ani == exp_id
+    assert res.ani_low == exp_idlow
+    assert res.ani_high == exp_idhigh
+    # check without returning ci
+    res2 = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers)
+    print(res2)
+    exp_res = ciANIResult(dist=1.0, p_nothing_in_common=1.0, p_threshold=0.001)
+    assert res2 == exp_res
 
 
 def test_containment_to_distance_one():
@@ -60,20 +96,25 @@ def test_containment_to_distance_one():
     scaled = 1
     nkmers = 10000
     ksize=21
-    dist,low,high = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers)
-    print("\nDIST:", dist)
-    print("CI:", low, " - ", high)
-    # check results
-    exp_dist, exp_low,exp_high = 0.0,0.0,0.0
-    assert dist == exp_dist
-    assert low == exp_low
-    assert high == exp_high
-    # return identity instead
-    exp_id, exp_idlow,exp_idhigh = 1.0,1.0,1.0
-    ident,low,high = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers,return_identity=True)
-    assert ident == exp_id
-    assert low == exp_idlow
-    assert high == exp_idhigh
+    res = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers,estimate_ci=True)
+    print(res)
+    exp_dist, exp_low,exp_high,pnc = 0.0,None,None,0.0
+    exp_id, exp_idlow,exp_idhigh,pnc = 1.0,None,None,0.0
+    assert res.dist == exp_dist
+    assert res.dist_low == exp_low
+    assert res.dist_high == exp_high
+    assert res.p_nothing_in_common == pnc
+    assert res.ani == exp_id
+    assert res.ani_low == exp_idlow
+    assert res.ani_high == exp_idhigh
+
+    # check without returning ci
+    res = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers)
+    assert res.dist == exp_dist
+    assert res.ani == exp_id
+    assert res.p_nothing_in_common == pnc
+    assert res.ani_low == None
+    assert res.ani_high == None
 
 
 def test_containment_to_distance_scaled1():
@@ -81,70 +122,64 @@ def test_containment_to_distance_scaled1():
     scaled = 1
     nkmers = 10000
     ksize=21
-    dist,low,high = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers)
-    print("\nDIST:", dist)
-    print("CI:", low, " - ", high)
-    print(f"{dist},{low},{high}")
+    res = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers,estimate_ci=True)
+    print(res)
     # check results
-    exp_dist, exp_low,exp_high = 0.032468221476108394,0.028709912966405623,0.03647860197289783
-    assert dist == exp_dist
-    assert low == exp_low
-    assert high == exp_high
-    # return identity instead
-    exp_id, exp_idlow,exp_idhigh = 0.9675317785238916,0.9635213980271021,0.9712900870335944
-    ident,low,high = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers,return_identity=True)
-    print(f"{ident},{low},{high}")
-    assert ident == exp_id
-    assert low == exp_idlow
-    assert high == exp_idhigh
-
-
-def test_containment_to_distance_2():
+    assert res.dist == 0.032468221476108394
+    assert res.ani == 0.9675317785238916
+    assert res.dist_low == 0.028709912966405623
+    assert res.ani_high == 0.9712900870335944
+    assert res.dist_high == 0.03647860197289783
+    assert res.ani_low == 0.9635213980271021
+    assert res.p_nothing_in_common == 0.0
+    # without returning ci
+    res2 = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers)
+    assert (res2.dist,res2.ani,res2.p_nothing_in_common) == (0.032468221476108394, 0.9675317785238916, 0.0)
+    assert (res2.dist,res2.ani,res2.p_nothing_in_common) == (res.dist, res.ani, res.p_nothing_in_common)
+
+
+def test_containment_to_distance_scaled100():
     contain = 0.1
     scaled = 100
     nkmers = 10000
     ksize=31
-    dist,low,high = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers)
-    print("\nDIST:", dist)
-    print("CI:", low, " - ", high)
-    print(f"{dist},{low},{high}")
+    res = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers,estimate_ci=True)
+    print(res)
     # check results
-    exp_dist, exp_low,exp_high = 0.07158545548052564,0.05320779238601372,0.09055547672455365
-    assert dist == exp_dist
-    assert low == exp_low
-    assert high == exp_high
+    assert res.dist == 0.07158545548052564
+    assert res.dist_low == 0.05320779238601372
+    assert res.dist_high == 0.09055547672455365
+    assert res.p_nothing_in_common == 4.3171247410658655e-05
+    assert res.p_exceeds_threshold == False
 
 
-def test_containment_to_distance_scaled100():
+def test_containment_to_distance_scaled100_2():
     contain = 0.5
     scaled = 100
     nkmers = 10000
     ksize=21
-    dist,low,high = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers)
-    print("\nDIST:", dist)
-    print("CI:", low, " - ", high)
-    print(f"{dist},{low},{high}")
+    res= containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers,estimate_ci=True)
+    print(res)
     # check results
-    exp_dist, exp_low,exp_high = 0.032468221476108394,0.023712063916639017,0.04309960543965866
-    assert dist == exp_dist
-    assert low == exp_low
-    assert high == exp_high
+    assert res.dist == 0.032468221476108394
+    assert res.dist_low == 0.023712063916639017
+    assert res.dist_high == 0.04309960543965866
+    assert res.p_exceeds_threshold == False
 
 
 def test_containment_to_distance_k10():
-    jaccard = 0.5
+    contain = 0.5
     scaled = 100
     nkmers = 10000
     ksize=10
-    dist,low,high = containment_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers)
-    print("\nDIST:", dist)
-    print("CI:", low, " - ", high)
-    print(f"{dist},{low},{high}")
+    res = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers,estimate_ci=True)
+    print(res)
     # check results
-    exp_dist, exp_low,exp_high = 0.06696700846319259,0.04982777541057476,0.08745108232411622
-    assert dist == exp_dist
-    assert low == exp_low
-    assert high == exp_high
+    assert res.dist == 0.06696700846319259
+    assert res.dist_low == 0.04982777541057476
+    assert res.dist_high == 0.08745108232411622
+    assert res.p_exceeds_threshold == False
+
 
 def test_containment_to_distance_confidence():
     contain = 0.1
@@ -152,26 +187,21 @@ def test_containment_to_distance_confidence():
     nkmers = 10000
     ksize=31
     confidence=0.99
-    dist,low,high = containment_to_distance(contain,ksize,scaled,confidence=confidence,n_unique_kmers=nkmers)
-    print("\nDIST:", dist)
-    print("CI:", low, " - ", high)
-    print(f"{dist},{low},{high}")
+    res = containment_to_distance(contain,ksize,scaled,confidence=confidence,n_unique_kmers=nkmers, estimate_ci=True)
+    print(res)
     # check results
-    exp_dist, exp_low,exp_high = 0.07158545548052564,0.04802880300938562,0.09619930040790341
-    assert dist == exp_dist
-    assert low == exp_low
-    assert high == exp_high
-
+    assert res.dist == 0.07158545548052564
+    assert res.dist_low == 0.04802880300938562
+    assert res.dist_high == 0.09619930040790341
+    assert res.p_exceeds_threshold == False
     confidence=0.90
-    dist,low,high = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers,confidence=confidence)
-    print("\nDIST:", dist)
-    print("CI:", low, " - ", high)
-    print(f"{dist},{low},{high}")
+    res2 = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers,confidence=confidence, estimate_ci=True)
+    print(res2)
     # check results
-    exp_dist, exp_low,exp_high = 0.07158545548052564,0.05599435479247415,0.08758718871990222
-    assert dist == exp_dist
-    assert low == exp_low
-    assert high == exp_high
+    assert res2.dist == res.dist
+    assert res2.dist_low == 0.05599435479247415
+    assert res2.dist_high == 0.08758718871990222
+    assert res.p_exceeds_threshold == False
 
 
 def test_nkmers_to_bp_containment():
@@ -179,17 +209,18 @@ def test_nkmers_to_bp_containment():
     scaled = 100
     bp_len = 10030
     ksize=31
-    nkmers = sequence_len_to_n_kmers(bp_len,ksize)
-    print("nkmers_from_bp:", bp_len)
+    nkmers = handle_seqlen_nkmers(ksize, sequence_len_bp= bp_len)
+    print("nkmers_from_bp:", nkmers)
     confidence=0.99
-    kmer_dist = containment_to_distance(containment,ksize,scaled,confidence=confidence,n_unique_kmers=nkmers)
-    bp_dist = containment_to_distance(containment,ksize,scaled,confidence=confidence,sequence_len_bp=bp_len)
-    print(f"\nkDIST:", kmer_dist)
-    print(f"\nbpDIST:",bp_dist)
+    kmer_res = containment_to_distance(containment,ksize,scaled,confidence=confidence,n_unique_kmers=nkmers,estimate_ci=True)
+    bp_res = containment_to_distance(containment,ksize,scaled,confidence=confidence,sequence_len_bp=bp_len,estimate_ci=True)
+    print(f"\nkDIST: {kmer_res}")
+    print(f"\nbpDIST:,{bp_res}")
     # check results
-    assert kmer_dist == (0.07158545548052564, 0.04802880300938562, 0.09619930040790341)
-    assert bp_dist ==  (0.07158545548052564, 0.04802880300938562, 0.09619930040790341)
-    assert kmer_dist==bp_dist
+    assert kmer_res==bp_res
+    assert kmer_res.dist == 0.07158545548052564
+    assert kmer_res.dist_low == 0.04802880300938562
+    assert kmer_res.dist_high == 0.09619930040790341
 
 
 def test_jaccard_to_distance_zero():
@@ -197,22 +228,13 @@ def test_jaccard_to_distance_zero():
     scaled = 1
     nkmers = 10000
     ksize=21
-    dist,low,high = jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers)
-    print("\nDIST:", dist)
-    print("CI:", low, " - ", high)
-    print(f"{dist},{low},{high}")
+    res= jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers)
+    print(res)
     # check results
-    exp_dist, exp_low,exp_high = 1.0,1.0,1.0
-    assert dist == exp_dist
-    assert low == exp_low
-    assert high == exp_high
-    # return identity instead
-    exp_id, exp_idlow,exp_idhigh = 0.0,0.0,0.0
-    ident,low,high = jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers,return_identity=True)
-    print(f"{ident},{low},{high}")
-    assert ident == exp_id
-    assert low == exp_idlow
-    assert high == exp_idhigh
+    assert res.dist == 1.0
+    assert res.ani == 0.0
+    assert res.p_nothing_in_common == 1.0
+    assert res.jaccard_error == 0.0
 
 
 def test_jaccard_to_distance_one():
@@ -220,52 +242,36 @@ def test_jaccard_to_distance_one():
     scaled = 1
     nkmers = 10000
     ksize=21
-    dist,low,high = jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers)
-    print("\nDIST:", dist)
-    print("CI:", low, " - ", high)
-    print(f"{dist},{low},{high}")
-    # check results
-    exp_dist, exp_low,exp_high = 0.0,0.0,0.0
-    assert dist == exp_dist
-    assert low == exp_low
-    assert high == exp_high
-    # return identity instead
-    exp_id, exp_idlow,exp_idhigh = 1.0,1.0,1.0
-    ident,low,high = jaccard_to_distance(jaccard,ksize,scaled,return_identity=True,n_unique_kmers=nkmers)
-    print(f"{ident},{low},{high}")
-    assert ident == exp_id
-    assert low == exp_idlow
-    assert high == exp_idhigh
-
-
-def test_jaccard_to_distance_scaled1():
-    jaccard = 0.5
-    scaled = 1
-    nkmers = 10000
-    ksize=21
-    dist,low,high = jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers)
-    print("\nDIST:", dist)
-    print("CI:", low, " - ", high)
-    print(f"({dist},{low},{high})")
+    res= jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers)
+    print(res)
     # check results
-    assert (dist,low,high) == (0.019122659390482077,0.017549816764593173,0.020857520093330525)
-    # return identity instead
-    ident,low,high = jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers,return_identity=True)
-    print(f"({ident},{low},{high})")
-    assert (ident,low, high) == (0.9808773406095179,0.9791424799066695,0.9824501832354068)
+    assert res.dist == 0.0
+    assert res.ani == 1.0
+    assert res.p_nothing_in_common == 0.0
+    assert res.jaccard_error == 0.0
 
 
-def test_jaccard_to_distance_scaled100():
+def test_jaccard_to_distance_scaled():
+    # scaled value doesn't impact point estimate or jaccard error, just p_nothing_in_common
     jaccard = 0.5
-    scaled = 100
+    scaled = 1
     nkmers = 10000
     ksize=21
-    dist,low,high = jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers)
-    print("\nDIST:", dist)
-    print("CI:", low, " - ", high)
-    print(f"{dist},{low},{high}")
+    res = jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers)
+    print(res)
     # check results
-    (dist,low,high) = (0.019122659390482077,0.014156518319161603,0.02509547110001303)
+    assert res.dist == 0.019122659390482077
+    assert res.ani == 0.9808773406095179
+    assert res.p_exceeds_threshold == False
+    assert res.jaccard_error == 0.00018351337045518042
+    assert res.je_exceeds_threshold ==True
+    scaled = 100
+    res2 = jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers)
+    print(res2)
+    assert res2.dist == res.dist
+    assert res2.jaccard_error == res.jaccard_error
+    assert res2.p_nothing_in_common != res.p_nothing_in_common
+    assert res2.p_exceeds_threshold ==False
 
 
 def test_jaccard_to_distance_k31():
@@ -273,12 +279,15 @@ def test_jaccard_to_distance_k31():
     scaled = 100
     nkmers = 10000
     ksize=31
-    dist,low,high = jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers)
-    print("\nDIST:", dist)
-    print("CI:", low, " - ", high)
-    print(f"({dist},{low},{high})")
+    res = jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers)
+    print(res)
     # check results
-    assert (dist, low, high) == (0.012994354410710174,0.009559840649526755,0.017172145712325677)
+    assert res.ani == 0.9870056455892898
+    assert res.p_exceeds_threshold == False
+    assert res.je_exceeds_threshold ==True
+    res2 = jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers, err_threshold=0.1)
+    assert res2.ani == res.ani
+    assert res2.je_exceeds_threshold == False
 
 
 def test_jaccard_to_distance_k31_2():
@@ -286,85 +295,101 @@ def test_jaccard_to_distance_k31_2():
     scaled = 100
     nkmers = 10000
     ksize=31
-    dist,low,high = jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers)
-    print("\nDIST:", dist)
-    print("CI:", low, " - ", high)
-    print(f"({dist},{low},{high})")
+    res = jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers)
+    print(res)
     # check results
-    assert (dist, low, high) == (0.0535071608231702,0.04073963222782166,0.0667374639111566)
+    assert res.ani == 0.9464928391768298
+    assert res.p_exceeds_threshold == False
+    assert res.je_exceeds_threshold == False
 
 
-def test_jaccard_to_distance_confidence():
+def test_nkmers_to_bp_jaccard():
     jaccard = 0.1
     scaled = 100
-    nkmers = 10000
+    bp_len = 10030
     ksize=31
-    confidence=0.99
-    dist,low,high = jaccard_to_distance(jaccard,ksize,scaled,confidence=confidence,n_unique_kmers=nkmers)
-    print("\nDIST:", dist)
-    print("CI:", low, " - ", high)
-    print(f"({dist},{low},{high})")
+    nkmers = handle_seqlen_nkmers(ksize, sequence_len_bp= bp_len)
+    print("nkmers_from_bp:", nkmers)
+    kmer_res = jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers)
+    bp_res = jaccard_to_distance(jaccard,ksize,scaled,sequence_len_bp=bp_len)
+    print(f"\nkmer_res: {kmer_res}")
+    print(f"\nbp_res: {bp_res}")
     # check results
-    assert (dist,low,high) == (0.0535071608231702,0.03702518586582811,0.07080999238232542)
+    assert kmer_res == bp_res
+    assert kmer_res.dist == 0.0535071608231702
+    assert kmer_res.p_exceeds_threshold == False
+    assert kmer_res.je_exceeds_threshold == False
 
-    confidence=0.90
-    dist,low,high = jaccard_to_distance(jaccard,ksize,scaled,confidence=confidence,n_unique_kmers=nkmers)
-    print("\nDIST:", dist)
-    print("CI:", low, " - ", high)
-    print(f"({dist},{low},{high})")
-    # check results
-    assert(dist,low,high) == (0.0535071608231702,0.04270836172610141,0.06462806500239282)
 
+def test_exp_prob_nothing_common():
+    dist = 0.25
+    ksize = 31
+    scaled = 10
+    bp_len = 1000030
+    nkmers = handle_seqlen_nkmers(ksize, sequence_len_bp= bp_len)
+    print("nkmers_from_bp:", nkmers)
 
-def test_nkmers_to_bp():
-    jaccard = containment = 0.1
-    scaled = 100
-    bp_len = 10030
-    ksize=31
-    nkmers = sequence_len_to_n_kmers(bp_len,ksize)
-    print("nkmers_from_bp:", bp_len)
-    confidence=0.99
-    kmer_dist = jaccard_to_distance(jaccard,ksize,scaled,confidence=confidence,n_unique_kmers=nkmers)
-    bp_dist = jaccard_to_distance(jaccard,ksize,scaled,confidence=confidence,sequence_len_bp=bp_len)
-    print(f"\nk_jDIST:", kmer_dist)
-    print(f"\nbp_jDIST:",bp_dist)
-    # check results
-    assert kmer_dist == (0.0535071608231702, 0.03702518586582811, 0.07080999238232542)
-    assert bp_dist ==  (0.0535071608231702, 0.03702518586582811, 0.07080999238232542)
-    assert kmer_dist==bp_dist
-
-    kmer_cdist = containment_to_distance(containment,ksize,scaled,confidence=confidence,n_unique_kmers=nkmers)
-    bp_cdist = containment_to_distance(containment,ksize,scaled,confidence=confidence,sequence_len_bp=bp_len)
-    print(f"\nk_cDIST:", kmer_cdist)
-    print(f"\nbp_cDIST:",bp_cdist)
-    # check results
-    assert kmer_cdist == (0.07158545548052564, 0.04802880300938562, 0.09619930040790341)
-    assert bp_cdist == (0.07158545548052564, 0.04802880300938562, 0.09619930040790341)
-    assert kmer_cdist==bp_cdist
+    nkmers_pnc = get_exp_probability_nothing_common(dist,ksize,scaled,n_unique_kmers=nkmers)
+    print(f"prob nothing in common: {nkmers_pnc}")
+    bp_pnc = get_exp_probability_nothing_common(dist,ksize,scaled,sequence_len_bp=bp_len)
+    assert nkmers_pnc == bp_pnc == 7.437016945722123e-07
 
 
-def test_jaccard_to_distance_point_estimate():
-    jaccard = 0.9
-    ksize = 21
-    scaled = 10
-    n_unique_kmers = 100000
-    # jaccard_to_distance_point_estimate usage
-    print(n_unique_kmers)
-    mut_rate, err = jaccard_to_distance_point_estimate(jaccard, ksize, scaled, n_unique_kmers)
-    print('Point estimate is: ' + str(mut_rate))
-    print('Error is: ' + str(err))
-#    if err > 10.0**(-4.0):
-#        print('Cannot trust this point estimate!')
-
-#    # get_exp_probability_nothing_common usage
-#    ksize = 21
-#    scaled = 1000
-#    n_unique_kmers = 10000000
-#    mutation_rate = 0.3
-#    threshold = 10.0**(-3)
-
-#    exp_probability_no_common = get_exp_probability_nothing_common(n_unique_kmers,
-#                                            ksize, mutation_rate, scaled)
-#    print(exp_probability_no_common)
-#    if (exp_probability_no_common >= threshold):
-#        print('There could be cases where nothing common between sketches may happen!')
\ No newline at end of file
+def test_containment_to_distance_tinytestdata_var0():
+    """
+    tiny test data to trigger the following:
+    WARNING: Cannot estimate ANI confidence intervals from containment. Do your sketches contain enough hashes?
+    Error: varN <0.0!
+    """
+    contain = 0.9
+    scaled = 1
+    nkmers = 4
+    ksize=31
+    res = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers, estimate_ci=True)
+    print(res)
+    # check results
+    assert res.dist == 0.003392957179023992
+    assert res.dist_low == None
+    assert res.dist_high == None
+    assert res.ani_low == None
+    assert res.ani_high == None
+    assert res.p_exceeds_threshold == False
+
+
+def test_var_n_mutated():
+    # check 0
+    r = 0
+    ksize = 31
+    nkmers = 200
+    var_n_mut = var_n_mutated(nkmers,ksize,r)
+    print(f"var_n_mutated: {var_n_mut}")
+    assert var_n_mut == 0
+    # check var 0.0 valuerror
+    r = 10
+    ksize = 31
+    nkmers = 200
+    with pytest.raises(ValueError) as exc:
+        var_n_mut = var_n_mutated(nkmers,ksize,r)
+    assert "Error: varN <0.0!" in str(exc)
+    # check successful
+    r = 0.4
+    ksize = 31
+    nkmers = 200000
+    var_n_mut = var_n_mutated(nkmers,ksize,r)
+    print(f"var_n_mutated: {var_n_mut}")
+    assert var_n_mut == 0.10611425440741508
+
+
+def test_handle_seqlen_nkmers():
+    bp_len = 10030
+    ksize=31
+    # convert seqlen to nkmers
+    nkmers = handle_seqlen_nkmers(ksize, sequence_len_bp= bp_len)
+    assert nkmers == 10000
+    # if nkmers is provided, just use that
+    nkmers = handle_seqlen_nkmers(ksize, sequence_len_bp= bp_len, n_unique_kmers= bp_len)
+    assert nkmers == 10030
+    # if neither seqlen or nkmers provided, complain
+    with pytest.raises(ValueError) as exc:
+        nkmers = handle_seqlen_nkmers(ksize)
+    assert("Error: distance estimation requires input of either 'sequence_len_bp' or 'n_unique_kmers'") in str(exc)
diff --git a/tests/test_index.py b/tests/test_index.py
index 35e31e0714..c0ee430ea2 100644
--- a/tests/test_index.py
+++ b/tests/test_index.py
@@ -941,8 +941,7 @@ def test_zipfile_API_signatures_traverse_yield_all(use_manifest):
     assert len(zipidx) == 8
 
     # confirm that there are 12 files in there total, incl build.sh and dirs
-    zf = zipidx.storage.zipfile
-    allfiles = [ zi.filename for zi in zf.infolist() ]
+    allfiles = zipidx.storage._filenames()
     print(allfiles)
     assert len(allfiles) == 13
 
diff --git a/tests/test_lca.py b/tests/test_lca.py
index 03a4ff6650..990de48864 100644
--- a/tests/test_lca.py
+++ b/tests/test_lca.py
@@ -2670,3 +2670,47 @@ def test_lca_index_with_picklist_exclude(runtmp):
     assert len(siglist) == 9
     for ss in siglist:
         assert 'Thermotoga' not in ss.name
+
+
+def test_lca_jaccard_ordering():
+    # this tests a tricky situation where for three sketches A, B, C,
+    # |A intersect B| is greater than |A intersect C|
+    # _but_
+    # |A jaccard B| is less than |A intersect B|
+    a = sourmash.MinHash(ksize=31, n=0, scaled=2)
+    b = a.copy_and_clear()
+    c = a.copy_and_clear()
+
+    a.add_many([1, 2, 3, 4])
+    b.add_many([1, 2, 3] + list(range(10, 30)))
+    c.add_many([1, 5])
+
+    def _intersect(x, y):
+        return x.intersection_and_union_size(y)[0]
+
+    print('a intersect b:', _intersect(a, b))
+    print('a intersect c:', _intersect(a, c))
+    print('a jaccard b:', a.jaccard(b))
+    print('a jaccard c:', a.jaccard(c))
+    assert _intersect(a, b) > _intersect(a, c)
+    assert a.jaccard(b) < a.jaccard(c)
+
+    # thresholds to use:
+    assert a.jaccard(b) < 0.15
+    assert a.jaccard(c) > 0.15
+
+    # now - make signatures, try out :)
+    ss_a = sourmash.SourmashSignature(a, name='A')
+    ss_b = sourmash.SourmashSignature(b, name='B')
+    ss_c = sourmash.SourmashSignature(c, name='C')
+
+    db = sourmash.lca.LCA_Database(ksize=31, scaled=2)
+    db.insert(ss_a)
+    db.insert(ss_b)
+    db.insert(ss_c)
+
+    sr = db.search(ss_a, threshold=0.15)
+    print(sr)
+    assert len(sr) == 2
+    assert sr[0].signature == ss_a
+    assert sr[1].signature == ss_c
diff --git a/tests/test_minhash.py b/tests/test_minhash.py
index 2bac12751a..4d9124c501 100644
--- a/tests/test_minhash.py
+++ b/tests/test_minhash.py
@@ -2655,62 +2655,39 @@ def test_containment(track_abundance):
 def test_containment_ANI():
     f1 = utils.get_test_data('2.fa.sig')
     f2 = utils.get_test_data('2+63.fa.sig')
-    f3 = utils.get_test_data('47+63.fa.sig')
     mh1 = sourmash.load_one_signature(f1, ksize=31).minhash
     mh2 = sourmash.load_one_signature(f2, ksize=31).minhash
-    mh3 = sourmash.load_one_signature(f3, ksize=31).minhash
-
-    print("\nmh1 contained by mh2", mh1.containment_ani(mh2))
-    print("mh2 contained by mh1",mh2.containment_ani(mh1))
-    print("mh1 max containment", mh1.max_containment_ani(mh2))
-    print("mh2 max containment", mh2.max_containment_ani(mh1))
-
-    print("\nmh2 contained by mh3", mh2.containment_ani(mh3))
-    print("mh3 contained by mh2",mh3.containment_ani(mh2))
-
-    print("\nmh2 contained by mh3, CI 90%", mh2.containment_ani(mh3, confidence=0.9))
-    print("mh3 contained by mh2, CI 99%",mh3.containment_ani(mh2, confidence=0.99))
 
-    assert mh1.containment_ani(mh2) == (1.0, 1.0, 1.0)
-    assert mh2.containment_ani(mh1) == (0.9658183324254062, 0.9648452889933389, 0.966777042966207)
-    assert mh1.max_containment_ani(mh2) == (1.0, 1.0, 1.0)
-    assert mh2.max_containment_ani(mh1) == (1.0, 1.0, 1.0)
+    m1_cont_m2 = mh1.containment_ani(mh2, estimate_ci =True)
+    m2_cont_m1 = mh2.containment_ani(mh1, estimate_ci =True)
+    print("\nmh1 contained by mh2", m1_cont_m2)
+    print("mh2 contained by mh1", m2_cont_m1)
 
-    # containment 1 is special case. check max containment for non 0/1 values:
-    assert mh2.containment_ani(mh3) == (0.9866751346467802, 0.9861576758035308, 0.9871770716451368)
-    assert mh3.containment_ani(mh2) == (0.9868883523107224, 0.986374049720872, 0.9873870188726516)
-    assert mh2.max_containment_ani(mh3) == (0.9868883523107224, 0.986374049720872, 0.9873870188726516)
-    assert mh3.max_containment_ani(mh2) == (0.9868883523107224, 0.986374049720872, 0.9873870188726516)
-    assert mh2.max_containment_ani(mh3)[0] == max(mh2.containment_ani(mh3)[0], mh3.containment_ani(mh2)[0])
-
-    # check confidence
-    assert mh2.containment_ani(mh3, confidence=0.9) == (0.9866751346467802, 0.986241913154108, 0.9870974232542815)
-    assert mh3.containment_ani(mh2, confidence=0.99) == (0.9868883523107224, 0.9862092287269876, 0.987540474733798)
+    assert (m1_cont_m2.ani, m1_cont_m2.ani_low, m1_cont_m2.ani_high, m1_cont_m2.p_nothing_in_common) == (1.0, None, None, 0.0)
+    assert (round(m2_cont_m1.ani,3), round(m2_cont_m1.ani_low,3), round(m2_cont_m1.ani_high,3)) == (0.966, 0.965, 0.967)
 
+    m1_mc_m2 = mh1.max_containment_ani(mh2, estimate_ci =True)
+    m2_mc_m1 = mh2.max_containment_ani(mh1, estimate_ci =True)
+    print("mh1 max containment", m1_mc_m2)
+    print("mh2 max containment", m2_mc_m1)
+    assert m1_mc_m2 == m2_mc_m1
+    assert (m1_mc_m2.ani, m1_mc_m2.ani_low, m1_mc_m2.ani_high) == (1.0,None,None)
+ 
 
 def test_containment_ANI_precalc_containment():
     f1 = utils.get_test_data('2.fa.sig')
     f2 = utils.get_test_data('2+63.fa.sig')
-    f3 = utils.get_test_data('47+63.fa.sig')
     mh1 = sourmash.load_one_signature(f1, ksize=31).minhash
     mh2 = sourmash.load_one_signature(f2, ksize=31).minhash
-    mh3 = sourmash.load_one_signature(f3, ksize=31).minhash
     # precalc containments and assert same results
     s1c = mh1.contained_by(mh2)
     s2c = mh2.contained_by(mh1)
-    s3c = mh2.contained_by(mh3)
-    s4c = mh3.contained_by(mh2)
     mc = max(s1c, s2c)
-    assert mh1.containment_ani(mh2, containment=s1c) == (1.0, 1.0, 1.0)
-    assert mh2.containment_ani(mh1, containment=s2c) == (0.9658183324254062, 0.9648452889933389, 0.966777042966207)
-    assert mh1.max_containment_ani(mh2, max_containment=mc) == (1.0, 1.0, 1.0)
-    assert mh2.max_containment_ani(mh1, max_containment=mc) == (1.0, 1.0, 1.0)
 
-    assert mh2.containment_ani(mh3, containment=s3c) == (0.9866751346467802, 0.9861576758035308, 0.9871770716451368)
-    assert mh3.containment_ani(mh2, containment=s4c) == (0.9868883523107224, 0.986374049720872, 0.9873870188726516)
-    assert mh3.containment_ani(mh2, containment=s4c, confidence=0.99) == (0.9868883523107224, 0.9862092287269876, 0.987540474733798)
-    assert mh2.max_containment_ani(mh3, max_containment=s4c) == (0.9868883523107224, 0.986374049720872, 0.9873870188726516)
-    assert mh3.max_containment_ani(mh2, max_containment=s4c) == (0.9868883523107224, 0.986374049720872, 0.9873870188726516)
+    assert mh1.containment_ani(mh2, estimate_ci=True) ==  mh1.containment_ani(mh2, containment=s1c, estimate_ci=True)
+    assert mh2.containment_ani(mh1) ==  mh2.containment_ani(mh1, containment=s2c)
+    assert mh1.max_containment_ani(mh2) ==  mh1.max_containment_ani(mh2, max_containment=mc)
+    assert mh1.max_containment_ani(mh2) ==  mh2.max_containment_ani(mh1, max_containment=mc)
 
 
 def test_containment_ANI_downsample():
@@ -2752,12 +2729,12 @@ def test_jaccard_ANI():
     mh2 = sourmash.load_one_signature(f2).minhash
 
     print("\nJACCARD_ANI", mh1.jaccard_ani(mh2))
-    print("\nJACCARD_ANI 90% CI", mh1.jaccard_ani(mh2, confidence=0.9))
-    print("\nJACCARD_ANI 99% CI", mh1.jaccard_ani(mh2, confidence=0.99))
 
-    assert mh1.jaccard_ani(mh2) == (0.9783711630110239, 0.9776381521132318, 0.9790929734698974)
-    assert mh1.jaccard_ani(mh2, confidence=0.9) == (0.9783711630110239, 0.9777567290812516, 0.9789777082973189)
-    assert mh1.jaccard_ani(mh2, confidence=0.99) == (0.9783711630110239, 0.9774056164150094, 0.9793173653983231)
+    m1_jani_m2 = mh1.jaccard_ani(mh2)
+    m2_jani_m1 = mh2.jaccard_ani(mh1)
+
+    assert m1_jani_m2 == m2_jani_m1
+    assert (m1_jani_m2.ani, m1_jani_m2.p_nothing_in_common, m1_jani_m2.jaccard_error) == (0.9783711630110239, 0.0, 3.891666770716877e-07)
 
 
 def test_jaccard_ANI_precalc_jaccard():
@@ -2769,8 +2746,9 @@ def test_jaccard_ANI_precalc_jaccard():
     jaccard = mh1.jaccard(mh2)
     print("\nJACCARD_ANI", mh1.jaccard_ani(mh2,jaccard=jaccard))
 
-    assert mh1.jaccard_ani(mh2, jaccard=jaccard) == (0.9783711630110239, 0.9776381521132318, 0.9790929734698974)
-    assert mh1.jaccard_ani(mh2, jaccard=jaccard, confidence=0.9) == (0.9783711630110239, 0.9777567290812516, 0.9789777082973189) 
+    assert mh1.jaccard_ani(mh2) == mh1.jaccard_ani(mh2, jaccard=jaccard) == mh2.jaccard_ani(mh1, jaccard=jaccard)
+    wrong_jaccard = jaccard - 0.1
+    assert mh1.jaccard_ani(mh2) != mh1.jaccard_ani(mh2, jaccard=wrong_jaccard)
 
 
 def test_jaccard_ANI_downsample():
@@ -2793,3 +2771,21 @@ def test_jaccard_ANI_downsample():
     assert mh1.scaled == mh2.scaled
     ds_j_manual = mh1.jaccard_ani(mh2)
     assert ds_s1c == ds_s2c == ds_j_manual
+
+def test_containment_ani_ci_tiny_testdata():
+    """
+    tiny test data to trigger the following:
+    WARNING: Cannot estimate ANI confidence intervals from containment. Do your sketches contain enough hashes?
+    Error: varN <0.0!
+    """
+    mh1 = MinHash(0, 21, scaled=1, track_abundance=False)
+    mh2 = MinHash(0, 21, scaled=1, track_abundance=False)
+
+    mh1.add_many((1, 3, 4))
+    mh2.add_many((1, 2, 3, 4))
+
+    m2_cani_m1 = mh2.containment_ani(mh1, estimate_ci=True)
+    print(m2_cani_m1)
+    assert m2_cani_m1.ani == 0.986394259982259
+    assert m2_cani_m1.ani_low == None
+    assert m2_cani_m1.ani_high == None
diff --git a/tests/test_sbt.py b/tests/test_sbt.py
index cb5b043c91..f31aa8c77c 100644
--- a/tests/test_sbt.py
+++ b/tests/test_sbt.py
@@ -367,7 +367,7 @@ def test_sbt_zipstorage(tmpdir):
     old_result = {str(s.signature) for s in tree.find(search_obj, to_search.data)}
     print(*old_result, sep='\n')
 
-    with ZipStorage(str(tmpdir.join("tree.sbt.zip"))) as storage:
+    with ZipStorage(str(tmpdir.join("tree.sbt.zip")), mode="w") as storage:
         tree.save(str(tmpdir.join("tree.sbt.json")), storage=storage)
 
     with ZipStorage(str(tmpdir.join("tree.sbt.zip"))) as storage:
@@ -921,7 +921,7 @@ def test_gather_single_return(c):
     sig47 = load_one_signature(sig47file, ksize=31)
     sig63 = load_one_signature(sig63file, ksize=31)
 
-    # construct LCA Database
+    # construct SBT Database
     factory = GraphFactory(31, 1e5, 4)
     tree = SBT(factory, d=2)
 
@@ -937,6 +937,51 @@ def test_gather_single_return(c):
     assert results[0][0] == 1.0
 
 
+def test_sbt_jaccard_ordering(runtmp):
+    # this tests a tricky situation where for three sketches A, B, C,
+    # |A intersect B| is greater than |A intersect C|
+    # _but_
+    # |A jaccard B| is less than |A intersect B|
+    a = sourmash.MinHash(ksize=31, n=0, scaled=2)
+    b = a.copy_and_clear()
+    c = a.copy_and_clear()
+
+    a.add_many([1, 2, 3, 4])
+    b.add_many([1, 2, 3] + list(range(10, 30)))
+    c.add_many([1, 5])
+
+    def _intersect(x, y):
+        return x.intersection_and_union_size(y)[0]
+
+    print('a intersect b:', _intersect(a, b))
+    print('a intersect c:', _intersect(a, c))
+    print('a jaccard b:', a.jaccard(b))
+    print('a jaccard c:', a.jaccard(c))
+    assert _intersect(a, b) > _intersect(a, c)
+    assert a.jaccard(b) < a.jaccard(c)
+
+    # thresholds to use:
+    assert a.jaccard(b) < 0.15
+    assert a.jaccard(c) > 0.15
+
+    # now - make signatures, try out :)
+    ss_a = sourmash.SourmashSignature(a, name='A')
+    ss_b = sourmash.SourmashSignature(b, name='B')
+    ss_c = sourmash.SourmashSignature(c, name='C')
+
+    factory = GraphFactory(31, 1e5, 4)
+    db = SBT(factory, d=2)
+    db.insert(ss_a)
+    db.insert(ss_b)
+    db.insert(ss_c)
+
+    sr = db.search(ss_a, threshold=0.15)
+    print(sr)
+    assert len(sr) == 2
+    assert sr[0].signature == ss_a
+    assert sr[1].signature == ss_c
+
+
 def test_sbt_protein_command_index(runtmp):
     c = runtmp
 
diff --git a/tests/test_sourmash.py b/tests/test_sourmash.py
index 38ba835cb4..955efc7301 100644
--- a/tests/test_sourmash.py
+++ b/tests/test_sourmash.py
@@ -982,6 +982,17 @@ def test_search_ignore_abundance(runtmp):
     assert out1 != out2
 
 
+def test_search_abund_subj_flat(runtmp):
+    # test Index.search_abund requires an abund subj
+    sig47 = utils.get_test_data('track_abund/47.fa.sig')
+    sig63 = utils.get_test_data('63.fa.sig')
+
+    with pytest.raises(SourmashCommandFailed) as exc:
+        runtmp.sourmash('search', sig47, sig63)
+
+    assert "'search_abund' requires subject signatures with abundance information" in str(exc.value)
+
+
 def test_search_abund_csv(runtmp):
     # test search with abundance signatures, look at CSV output
     testdata1 = utils.get_test_data('short.fa')
diff --git a/tests/test_sourmash_sketch.py b/tests/test_sourmash_sketch.py
index 9f7912ade9..ba4aeb949a 100644
--- a/tests/test_sourmash_sketch.py
+++ b/tests/test_sourmash_sketch.py
@@ -511,7 +511,8 @@ def test_manifest_row_to_compute_parameters_4():
 
 
 def test_bad_compute_parameters():
-    p = ComputeParameters([31], 42, 0, 0, 0, 0, 0, True, 1000)
+    p = ComputeParameters(ksizes=[31], seed=42, dna=0, protein=0, dayhoff=0,
+                          hp=0, num_hashes=0, track_abundance=True, scaled=1000)
     with pytest.raises(AssertionError):
         p.moltype
 
diff --git a/tox.ini b/tox.ini
index ec4576ea10..af5250fbee 100644
--- a/tox.ini
+++ b/tox.ini
@@ -95,7 +95,7 @@ deps =
 changedir = {toxinidir}
 commands =
   asv machine --yes
-  asv continuous latest HEAD
+  asv continuous latest HEAD {posargs}
 
 [testenv:docs]
 description = invoke sphinx-build to build the HTML docs