From fee62922d8857ce93f1d4e90fd7240629d606997 Mon Sep 17 00:00:00 2001
From: Luiz Irber <luizirber@users.noreply.github.com>
Date: Mon, 5 Feb 2024 08:26:15 -0800
Subject: [PATCH] Pre-commit updates (#2427)

Ref #2421
Fix #2908

Updates older pre-commit hooks (initial exp:
https://github.com/sourmash-bio/sourmash/pull/680), mostly based (again)
on [tox
configs](https://github.com/tox-dev/tox/blob/main/.pre-commit-config.yaml)

Use ruff instead of pyupgrade/isort/black/flake8

This PR has a couple of commits: the first ones update configs, the last
one runs `tox -e fix_lint` to apply pre-commit. Mostly looking into
updating the first commits without breaking tests on the last commit.
---
 .pre-commit-config.yaml                  |   95 +-
 benchmarks/benchmarks.py                 |   65 +-
 doc/conf.py                              |  191 +-
 pyproject.toml                           |   29 +-
 src/sourmash/__init__.py                 |   55 +-
 src/sourmash/__main__.py                 |    9 +-
 src/sourmash/cli/__init__.py             |   75 +-
 src/sourmash/cli/categorize.py           |   28 +-
 src/sourmash/cli/compare.py              |   93 +-
 src/sourmash/cli/compute.py              |  107 +-
 src/sourmash/cli/gather.py               |  131 +-
 src/sourmash/cli/import_csv.py           |   12 +-
 src/sourmash/cli/index.py                |   64 +-
 src/sourmash/cli/info.py                 |   23 +-
 src/sourmash/cli/lca/__init__.py         |   19 +-
 src/sourmash/cli/lca/classify.py         |   51 +-
 src/sourmash/cli/lca/compare_csv.py      |   34 +-
 src/sourmash/cli/lca/index.py            |   80 +-
 src/sourmash/cli/lca/rankinfo.py         |   20 +-
 src/sourmash/cli/lca/summarize.py        |   59 +-
 src/sourmash/cli/migrate.py              |    6 +-
 src/sourmash/cli/multigather.py          |   58 +-
 src/sourmash/cli/plot.py                 |   76 +-
 src/sourmash/cli/prefetch.py             |   76 +-
 src/sourmash/cli/sbt_combine.py          |   15 +-
 src/sourmash/cli/scripts/__init__.py     |   24 +-
 src/sourmash/cli/search.py               |   99 +-
 src/sourmash/cli/sig/__init__.py         |   22 +-
 src/sourmash/cli/sig/cat.py              |   41 +-
 src/sourmash/cli/sig/check.py            |   57 +-
 src/sourmash/cli/sig/collect.py          |   55 +-
 src/sourmash/cli/sig/describe.py         |   31 +-
 src/sourmash/cli/sig/downsample.py       |   36 +-
 src/sourmash/cli/sig/export.py           |   19 +-
 src/sourmash/cli/sig/extract.py          |   43 +-
 src/sourmash/cli/sig/fileinfo.py         |   24 +-
 src/sourmash/cli/sig/filter.py           |   44 +-
 src/sourmash/cli/sig/flatten.py          |   38 +-
 src/sourmash/cli/sig/grep.py             |   65 +-
 src/sourmash/cli/sig/inflate.py          |   23 +-
 src/sourmash/cli/sig/ingest.py           |   22 +-
 src/sourmash/cli/sig/intersect.py        |   33 +-
 src/sourmash/cli/sig/kmers.py            |   50 +-
 src/sourmash/cli/sig/manifest.py         |   36 +-
 src/sourmash/cli/sig/merge.py            |   35 +-
 src/sourmash/cli/sig/overlap.py          |   12 +-
 src/sourmash/cli/sig/rename.py           |   38 +-
 src/sourmash/cli/sig/split.py            |   32 +-
 src/sourmash/cli/sig/subtract.py         |   32 +-
 src/sourmash/cli/sketch/__init__.py      |   19 +-
 src/sourmash/cli/sketch/dna.py           |   79 +-
 src/sourmash/cli/sketch/fromfile.py      |   63 +-
 src/sourmash/cli/sketch/protein.py       |   83 +-
 src/sourmash/cli/sketch/translate.py     |   87 +-
 src/sourmash/cli/storage/__init__.py     |   19 +-
 src/sourmash/cli/storage/convert.py      |   13 +-
 src/sourmash/cli/tax/__init__.py         |   22 +-
 src/sourmash/cli/tax/annotate.py         |   71 +-
 src/sourmash/cli/tax/genome.py           |  105 +-
 src/sourmash/cli/tax/grep.py             |   76 +-
 src/sourmash/cli/tax/metagenome.py       |  113 +-
 src/sourmash/cli/tax/prepare.py          |   55 +-
 src/sourmash/cli/tax/summarize.py        |   47 +-
 src/sourmash/cli/utils.py                |  190 +-
 src/sourmash/cli/watch.py                |   29 +-
 src/sourmash/command_compute.py          |  257 +-
 src/sourmash/command_sketch.py           |  309 +-
 src/sourmash/commands.py                 |  756 ++-
 src/sourmash/compare.py                  |  119 +-
 src/sourmash/distance_utils.py           |  114 +-
 src/sourmash/exceptions.py               |   11 +-
 src/sourmash/fig.py                      |   43 +-
 src/sourmash/hll.py                      |    2 +-
 src/sourmash/index/__init__.py           |  245 +-
 src/sourmash/index/revindex.py           |  154 +-
 src/sourmash/index/sqlite_index.py       |  416 +-
 src/sourmash/lca/__init__.py             |   13 +-
 src/sourmash/lca/__main__.py             |    9 +-
 src/sourmash/lca/command_classify.py     |   55 +-
 src/sourmash/lca/command_compare_csv.py  |   54 +-
 src/sourmash/lca/command_index.py        |  237 +-
 src/sourmash/lca/command_rankinfo.py     |    7 +-
 src/sourmash/lca/command_summarize.py    |   79 +-
 src/sourmash/lca/lca_db.py               |  220 +-
 src/sourmash/lca/lca_utils.py            |   90 +-
 src/sourmash/logging.py                  |   54 +-
 src/sourmash/manifest.py                 |  142 +-
 src/sourmash/minhash.py                  |  442 +-
 src/sourmash/nodegraph.py                |   46 +-
 src/sourmash/np_utils.py                 |    6 +-
 src/sourmash/picklist.py                 |  108 +-
 src/sourmash/plugins.py                  |   80 +-
 src/sourmash/save_load.py                |  105 +-
 src/sourmash/sbt.py                      |  510 +-
 src/sourmash/sbt_storage.py              |   76 +-
 src/sourmash/sbtmh.py                    |   24 +-
 src/sourmash/search.py                   |  416 +-
 src/sourmash/sig/__init__.py             |    2 +-
 src/sourmash/sig/__main__.py             |  693 ++-
 src/sourmash/sig/grep.py                 |   31 +-
 src/sourmash/signature.py                |  124 +-
 src/sourmash/sketchcomparison.py         |  106 +-
 src/sourmash/sourmash_args.py            |  259 +-
 src/sourmash/sqlite_utils.py             |   23 +-
 src/sourmash/tax/__main__.py             |  387 +-
 src/sourmash/tax/tax_utils.py            | 1167 ++--
 src/sourmash/utils.py                    |    2 +-
 tests/conftest.py                        |   36 +-
 tests/sourmash_tst_utils.py              |   84 +-
 tests/test__minhash_hypothesis.py        |   16 +-
 tests/test_api.py                        |   35 +-
 tests/test_bugs.py                       |    9 +-
 tests/test_cmd_signature.py              | 3280 ++++++-----
 tests/test_cmd_signature_collect.py      |  410 +-
 tests/test_cmd_signature_fileinfo.py     |  210 +-
 tests/test_cmd_signature_grep.py         |  268 +-
 tests/test_compare.py                    |  150 +-
 tests/test_deprecated.py                 |    7 +-
 tests/test_distance_utils.py             |  270 +-
 tests/test_hll.py                        |   36 +-
 tests/test_index.py                      |  746 +--
 tests/test_index_protocol.py             |  323 +-
 tests/test_jaccard.py                    |   44 +-
 tests/test_lca.py                        | 2520 +++++----
 tests/test_lca_db_protocol.py            |   56 +-
 tests/test_lca_functions.py              |  356 +-
 tests/test_manifest.py                   |   57 +-
 tests/test_manifest_protocol.py          |  119 +-
 tests/test_minhash.py                    |  731 ++-
 tests/test_nodegraph.py                  |   22 +-
 tests/test_np_utils.py                   |    1 -
 tests/test_picklist.py                   |   34 +-
 tests/test_plugin_framework.py           |  185 +-
 tests/test_prefetch.py                   |  799 ++-
 tests/test_sbt.py                        |  572 +-
 tests/test_search.py                     |  413 +-
 tests/test_signature.py                  |  246 +-
 tests/test_sketchcomparison.py           |  506 +-
 tests/test_sourmash.py                   | 6435 +++++++++++++---------
 tests/test_sourmash_args.py              |  298 +-
 tests/test_sourmash_compute.py           |  874 +--
 tests/test_sourmash_sketch.py            | 1497 +++--
 tests/test_sqlite_index.py               |  380 +-
 tests/test_tax.py                        | 4702 +++++++++++-----
 tests/test_tax_utils.py                  | 3945 +++++++++----
 tests/test_test_framework.py             |    2 +-
 tox.ini                                  |  259 +-
 utils/cardinality_estimate_confidence.py |   86 +-
 utils/check-tree.py                      |   10 +-
 utils/compute-dna-mh-another-way.py      |   19 +-
 utils/compute-input-prot-another-way.py  |  108 +-
 utils/compute-prot-mh-another-way.py     |  108 +-
 152 files changed, 26467 insertions(+), 16243 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index bca7329143..50ab4e2c26 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,71 +1,26 @@
-default_language_version:
-  python: python3
 repos:
-- repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v3.2.0
-  hooks:
-  - id: check-ast
-#  - id: check-builtin-literals
-  - id: check-docstring-first
-  - id: check-merge-conflict
-  - id: check-yaml
-  - id: check-toml
-  - id: debug-statements
-#  - id: end-of-file-fixer
-#    exclude: 'tests/test-data'
-#  - id: trailing-whitespace
-#    exclude: 'tests/test-data'
-#- repo: https://github.com/asottile/pyupgrade
-#  rev: v2.7.2
-#  hooks:
-#  - id: pyupgrade
-#- repo: https://github.com/pre-commit/mirrors-isort
-#  rev: v5.4.2
-#  hooks:
-#  - id: isort
-#    additional_dependencies: [toml]
-
-# format using black
-# when the full codebase is black, use it directly;
-#  while it isn't, let's use darker to format new/changed code
-- repo: https://github.com/akaihola/darker
-  rev: 1.7.1
-  hooks:
-    - id: darker
-#- repo: https://github.com/psf/black
-#  rev: 20.8b1
-#  hooks:
-#  - id: black
-#    args:
-#    - --safe
-#    language_version: python3.8
-#- repo: https://github.com/asottile/blacken-docs
-#  rev: v1.8.0
-#  hooks:
-#  - id: blacken-docs
-#    additional_dependencies:
-#    - black==19.10b0
-#    language_version: python3.8
-
-#- repo: https://github.com/asottile/add-trailing-comma
-#  rev: v2.0.1
-#  hooks:
-#  - id: add-trailing-comma
-#- repo: https://github.com/pre-commit/pygrep-hooks
-#  rev: v1.6.0
-#  hooks:
-#  - id: rst-backticks
-#- repo: https://github.com/asottile/setup-cfg-fmt
-#  rev: v1.11.0
-#  hooks:
-#  - id: setup-cfg-fmt
-#    args:
-#    - --min-py3-version
-#    - '3.7'
-#- repo: https://gitlab.com/pycqa/flake8
-#  rev: 3.8.3
-#  hooks:
-#  - id: flake8
-#    additional_dependencies:
-#    - flake8-bugbear == 20.1.2
-#    language_version: python3.8
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+      - id: check-ast
+      - id: check-builtin-literals
+      - id: check-docstring-first
+      - id: check-merge-conflict
+      - id: check-yaml
+      - id: check-toml
+      - id: debug-statements
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.2.0
+    hooks:
+      - id: ruff-format
+      - id: ruff
+        args: ["--fix", "--unsafe-fixes", "--exit-non-zero-on-fix"]
+  - repo: https://github.com/tox-dev/tox-ini-fmt
+    rev: "0.5.2"
+    hooks:
+      - id: tox-ini-fmt
+        args: ["-p", "fix_lint"]
+  - repo: meta
+    hooks:
+      - id: check-hooks-apply
+      - id: check-useless-excludes
diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py
index b2b3d7180b..d517bf7b2f 100644
--- a/benchmarks/benchmarks.py
+++ b/benchmarks/benchmarks.py
@@ -4,30 +4,31 @@
 from sourmash.sbt_storage import ZipStorage
 from sourmash.minhash import MinHash
 
-RANDOM_SEQ_SIZE=3000
-RANDOM_SEQ_NUMBER=300
+RANDOM_SEQ_SIZE = 3000
+RANDOM_SEQ_NUMBER = 300
 
-MINHASH_NUM=500
-MINHASH_K=21
+MINHASH_NUM = 500
+MINHASH_K = 21
 
-GET_MINS_RANGE=500
-ADD_HASH_RANGE=10_000
-ADD_MANY_RANGE=1000
-SIMILARITY_TIMES=500
-COUNT_COMMON_TIMES=500
-MERGE_TIMES=500
-COPY_TIMES=500
-CONCAT_TIMES=500
-SET_ABUNDANCES_RANGE=500
-ZIP_STORAGE_WRITE=100_000
-ZIP_STORAGE_LOAD=20
+GET_MINS_RANGE = 500
+ADD_HASH_RANGE = 10_000
+ADD_MANY_RANGE = 1000
+SIMILARITY_TIMES = 500
+COUNT_COMMON_TIMES = 500
+MERGE_TIMES = 500
+COPY_TIMES = 500
+CONCAT_TIMES = 500
+SET_ABUNDANCES_RANGE = 500
+ZIP_STORAGE_WRITE = 100_000
+ZIP_STORAGE_LOAD = 20
 
 
 def load_sequences():
     sequences = []
     for i in range(10):
-        random_seq = random.sample("A,C,G,T".split(",") * RANDOM_SEQ_SIZE,
-                                   RANDOM_SEQ_NUMBER)
+        random_seq = random.sample(
+            "A,C,G,T".split(",") * RANDOM_SEQ_SIZE, RANDOM_SEQ_NUMBER
+        )
         sequences.append("".join(random_seq))
     return sequences
 
@@ -35,12 +36,12 @@ def load_sequences():
 class TimeMinHashSuite:
     def setup(self):
         self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
-        self.protein_mh = MinHash(MINHASH_NUM, MINHASH_K, is_protein=True,
-                                  track_abundance=False)
+        self.protein_mh = MinHash(
+            MINHASH_NUM, MINHASH_K, is_protein=True, track_abundance=False
+        )
         self.sequences = load_sequences()
 
-        self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K,
-                                    track_abundance=False)
+        self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
         for seq in self.sequences:
             self.populated_mh.add_sequence(seq)
 
@@ -103,8 +104,9 @@ def time_concat(self):
 class PeakmemMinHashSuite:
     def setup(self):
         self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
-        self.protein_mh = MinHash(MINHASH_NUM, MINHASH_K,
-                                  is_protein=True, track_abundance=True)
+        self.protein_mh = MinHash(
+            MINHASH_NUM, MINHASH_K, is_protein=True, track_abundance=True
+        )
         self.sequences = load_sequences()
 
     def peakmem_add_sequence(self):
@@ -158,21 +160,25 @@ def time_set_abundances_noclear(self):
         for i in range(SET_ABUNDANCES_RANGE):
             mh.set_abundances(mins, clear=False)
 
+
 class PeakmemMinAbundanceSuite(PeakmemMinHashSuite):
     def setup(self):
         PeakmemMinHashSuite.setup(self)
         self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
 
+
 ####################
 
-class TimeZipStorageSuite:
 
+class TimeZipStorageSuite:
     def setup(self):
         import zipfile
+
         self.zipfile = NamedTemporaryFile()
 
-        with zipfile.ZipFile(self.zipfile, mode='w',
-                          compression=zipfile.ZIP_STORED) as storage:
+        with zipfile.ZipFile(
+            self.zipfile, mode="w", compression=zipfile.ZIP_STORED
+        ) as storage:
             for i in range(ZIP_STORAGE_WRITE):
                 # just so we have lots of entries
                 storage.writestr(str(i), b"0")
@@ -196,17 +202,18 @@ def teardown(self):
 class PeakmemZipStorageSuite:
     def setup(self):
         import zipfile
+
         self.zipfile = NamedTemporaryFile()
 
-        with zipfile.ZipFile(self.zipfile, mode='w',
-                          compression=zipfile.ZIP_STORED) as storage:
+        with zipfile.ZipFile(
+            self.zipfile, mode="w", compression=zipfile.ZIP_STORED
+        ) as storage:
             for i in range(ZIP_STORAGE_WRITE):
                 # just so we have lots of entries
                 storage.writestr(str(i), b"0")
             # one big-ish entry
             storage.writestr("sig1", b"9" * 1_000_000)
 
-
     def peakmem_load_from_zipstorage(self):
         with ZipStorage(self.zipfile.name) as storage:
             for i in range(ZIP_STORAGE_LOAD):
diff --git a/doc/conf.py b/doc/conf.py
index fdd819b93a..43623fcfc5 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 #
 # sourmash documentation build configuration file, created by
 # sphinx-quickstart on Sat Jun  4 16:35:43 2016.
@@ -17,57 +16,59 @@
 import os
 
 import sourmash
-print('sourmash at:', sourmash)
+
+print("sourmash at:", sourmash)
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-sys.path.insert(0, os.path.abspath('..'))
+sys.path.insert(0, os.path.abspath(".."))
 
 # -- General configuration ------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
-#needs_sphinx = '1.0'
+# needs_sphinx = '1.0'
 
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
-    'sphinx.ext.autodoc',
-    'sphinx.ext.doctest',
-    'sphinx.ext.coverage',
-    'sphinx.ext.viewcode',
-#    'sphinx.ext.napoleon',
-    'nbsphinx',
-    'IPython.sphinxext.ipython_console_highlighting',
-    'myst_parser'
+    "sphinx.ext.autodoc",
+    "sphinx.ext.doctest",
+    "sphinx.ext.coverage",
+    "sphinx.ext.viewcode",
+    #    'sphinx.ext.napoleon',
+    "nbsphinx",
+    "IPython.sphinxext.ipython_console_highlighting",
+    "myst_parser",
 ]
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 # source_suffix = ['.rst', '.md']
-source_suffix = ['.rst', '.md']
+source_suffix = [".rst", ".md"]
 
 # The encoding of source files.
-#source_encoding = 'utf-8-sig'
+# source_encoding = 'utf-8-sig'
 
 # The master toctree document.
-master_doc = 'index'
+master_doc = "index"
 
 # General information about the project.
-project = 'sourmash'
-copyright = '2016-2023, C. Titus Brown, Luiz Irber, and N. Tessa Pierce-Ward'
-author = 'C. Titus Brown, Luiz Irber, and N. Tessa Pierce-Ward'
+project = "sourmash"
+copyright = "2016-2023, C. Titus Brown, Luiz Irber, and N. Tessa Pierce-Ward"
+author = "C. Titus Brown, Luiz Irber, and N. Tessa Pierce-Ward"
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 from importlib.metadata import version
-release = version('sourmash')
-version = '.'.join(release.split('.')[:2])
+
+release = version("sourmash")
+version = ".".join(release.split(".")[:2])
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -78,208 +79,208 @@
 
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
-#today = ''
+# today = ''
 # Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
+# today_fmt = '%B %d, %Y'
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 
 # The reST default role (used for this markup: `text`) to use for all
 # documents.
-#default_role = None
+# default_role = None
 
 # If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
+# add_function_parentheses = True
 
 # If true, the current module name will be prepended to all description
 # unit titles (such as .. function::).
-#add_module_names = True
+# add_module_names = True
 
 # If true, sectionauthor and moduleauthor directives will be shown in the
 # output. They are ignored by default.
-#show_authors = False
+# show_authors = False
 
 # The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
 
 # A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
+# modindex_common_prefix = []
 
 # If true, keep warnings as "system message" paragraphs in the built documents.
-#keep_warnings = False
+# keep_warnings = False
 
 # If true, `todo` and `todoList` produce output, else they produce nothing.
 todo_include_todos = False
 
 # CTB: suppress warnings about circularity in ToC.
 # see https://github.com/sphinx-doc/sphinx/issues/7410.
-suppress_warnings = ['toc.circular']
+suppress_warnings = ["toc.circular"]
 
 # -- Options for HTML output ----------------------------------------------
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
-html_theme = 'alabaster'
+html_theme = "alabaster"
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
 html_theme_options = {
-    'logo': 'logo.png',
-    'logo_name': True,
-    'description': 'Quickly search, compare, and analyze genomic and metagenomic data sets',
-    'sidebar_collapse': False,
+    "logo": "logo.png",
+    "logo_name": True,
+    "description": "Quickly search, compare, and analyze genomic and metagenomic data sets",
+    "sidebar_collapse": False,
 }
 
 # Add any paths that contain custom themes here, relative to this directory.
-#html_theme_path = []
+# html_theme_path = []
 
 # The name for this set of Sphinx documents.
 # "<project> v<release> documentation" by default.
-#html_title = 'sourmash v1.0'
+# html_title = 'sourmash v1.0'
 
 # A shorter title for the navigation bar.  Default is the same as html_title.
-#html_short_title = None
+# html_short_title = None
 
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
-#html_logo = None
+# html_logo = None
 
 # The name of an image file (relative to this directory) to use as a favicon of
 # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
-#html_favicon = None
+# html_favicon = None
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
 
 # Add any extra paths that contain custom files (such as robots.txt or
 # .htaccess) here, relative to this directory. These files are copied
 # directly to the root of the documentation.
-#html_extra_path = []
+# html_extra_path = []
 
 # If not None, a 'Last updated on:' timestamp is inserted at every page
 # bottom, using the given strftime format.
 # The empty string is equivalent to '%b %d, %Y'.
-#html_last_updated_fmt = None
+# html_last_updated_fmt = None
 
 # If true, SmartyPants will be used to convert quotes and dashes to
 # typographically correct entities.
-#html_use_smartypants = True
+# html_use_smartypants = True
 
 # Custom sidebar templates, maps document names to template names.
-html_sidebars = {'*': ['about.html', 'navigation.html', 'relations.html',
-                       'sourcelink.html', 'searchbox.html']}
+html_sidebars = {
+    "*": [
+        "about.html",
+        "navigation.html",
+        "relations.html",
+        "sourcelink.html",
+        "searchbox.html",
+    ]
+}
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
-#html_additional_pages = {}
+# html_additional_pages = {}
 
 # If false, no module index is generated.
-#html_domain_indices = True
+# html_domain_indices = True
 
 # If false, no index is generated.
-#html_use_index = True
+# html_use_index = True
 
 # If true, the index is split into individual pages for each letter.
-#html_split_index = False
+# html_split_index = False
 
 # If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
+# html_show_sourcelink = True
 
 # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#html_show_sphinx = True
+# html_show_sphinx = True
 
 # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#html_show_copyright = True
+# html_show_copyright = True
 
 # If true, an OpenSearch description file will be output, and all pages will
 # contain a <link> tag referring to it.  The value of this option must be the
 # base URL from which the finished HTML is served.
-#html_use_opensearch = ''
+# html_use_opensearch = ''
 
 # This is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = None
+# html_file_suffix = None
 
 # Language to be used for generating the HTML full-text search index.
 # Sphinx supports the following languages:
 #   'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
 #   'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh'
-#html_search_language = 'en'
+# html_search_language = 'en'
 
 # A dictionary with options for the search language support, empty by default.
 # 'ja' uses this config value.
 # 'zh' user can custom change `jieba` dictionary path.
-#html_search_options = {'type': 'default'}
+# html_search_options = {'type': 'default'}
 
 # The name of a javascript file (relative to the configuration directory) that
 # implements a search results scorer. If empty, the default will be used.
-#html_search_scorer = 'scorer.js'
+# html_search_scorer = 'scorer.js'
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'sourmashdoc'
+htmlhelp_basename = "sourmashdoc"
 
 # -- Options for LaTeX output ---------------------------------------------
 
 latex_elements = {
-# The paper size ('letterpaper' or 'a4paper').
-#'papersize': 'letterpaper',
-
-# The font size ('10pt', '11pt' or '12pt').
-#'pointsize': '10pt',
-
-# Additional stuff for the LaTeX preamble.
-#'preamble': '',
-
-# Latex figure (float) alignment
-#'figure_align': 'htbp',
+    # The paper size ('letterpaper' or 'a4paper').
+    #'papersize': 'letterpaper',
+    # The font size ('10pt', '11pt' or '12pt').
+    #'pointsize': '10pt',
+    # Additional stuff for the LaTeX preamble.
+    #'preamble': '',
+    # Latex figure (float) alignment
+    #'figure_align': 'htbp',
 }
 
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    (master_doc, 'sourmash.tex', 'sourmash Documentation',
-     'C. Titus Brown', 'manual'),
+    (master_doc, "sourmash.tex", "sourmash Documentation", "C. Titus Brown", "manual"),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
 # the title page.
-#latex_logo = None
+# latex_logo = None
 
 # For "manual" documents, if this is true, then toplevel headings are parts,
 # not chapters.
-#latex_use_parts = False
+# latex_use_parts = False
 
 # If true, show page references after internal links.
-#latex_show_pagerefs = False
+# latex_show_pagerefs = False
 
 # If true, show URL addresses after external links.
-#latex_show_urls = False
+# latex_show_urls = False
 
 # Documents to append as an appendix to all manuals.
-#latex_appendices = []
+# latex_appendices = []
 
 # If false, no module index is generated.
-#latex_domain_indices = True
+# latex_domain_indices = True
 
 
 # -- Options for manual page output ---------------------------------------
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'sourmash', 'sourmash Documentation',
-     [author], 1)
-]
+man_pages = [(master_doc, "sourmash", "sourmash Documentation", [author], 1)]
 
 # If true, show URL addresses after external links.
-#man_show_urls = False
+# man_show_urls = False
 
 
 # -- Options for Texinfo output -------------------------------------------
@@ -288,22 +289,28 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    (master_doc, 'sourmash', 'sourmash Documentation',
-     author, 'sourmash', 'One line description of project.',
-     'Miscellaneous'),
+    (
+        master_doc,
+        "sourmash",
+        "sourmash Documentation",
+        author,
+        "sourmash",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
 ]
 
 # Documents to append as an appendix to all manuals.
-#texinfo_appendices = []
+# texinfo_appendices = []
 
 # If false, no module index is generated.
-#texinfo_domain_indices = True
+# texinfo_domain_indices = True
 
 # How to display URL addresses: 'footnote', 'no', or 'inline'.
-#texinfo_show_urls = 'footnote'
+# texinfo_show_urls = 'footnote'
 
 # If true, do not generate a @detailmenu in the "Top" node's menu.
-#texinfo_no_detailmenu = False
+# texinfo_no_detailmenu = False
 
 autodoc_mock_imports = ["sourmash.minhash"]
 myst_heading_anchors = 3
diff --git a/pyproject.toml b/pyproject.toml
index d1de447e72..3f2331b97c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -100,9 +100,11 @@ license = { text = "BSD 3-Clause License" }
 [project.optional-dependencies]
 test = [
   "pytest>=6.2.4,<8.1.0",
-  "pytest-cov>=2.12,<5.0",
-  "pytest-xdist",
+  "pytest-cov>=4,<5.0",
+  "pytest-xdist>=3.1",
   "pyyaml>=6,<7",
+  "diff-cover>=7.3",
+  "covdefaults>=2.2.2",
   "recommonmark",
   "hypothesis",
   "build",
@@ -155,6 +157,12 @@ macos-deployment-target = "10.14"
 [tool.maturin.target.aarch64-apple-darwin]
 macos-deployment-target = "11.0"
 
+[tool.ruff.lint]
+extend-select = [
+  "UP",  # pyupgrade
+]
+ignore = ["F401", "E712", "E402", "F821", "E722", "E741", "F811", "F403", "F822"]
+
 [tool.isort]
 known_third_party = ["deprecation", "hypothesis", "mmh3", "numpy", "pytest", "screed", "sourmash_tst_utils"]
 multi_line_output = 3 
@@ -212,3 +220,20 @@ testpaths = [
   "tests",
   "doc",
 ]
+
+[tool.coverage]
+html.show_contexts = true
+html.skip_covered = false
+paths.source = [
+  "src",
+  ".tox*/*/lib/python*/site-packages",
+  ".tox*/pypy*/site-packages",
+  ".tox*\\*\\Lib\\site-packages",
+  "*/src",
+  "*\\src",
+  "*/tests",
+  "*\tests",
+]
+report.fail_under = 88
+run.parallel = true
+run.plugins = ["covdefaults"]
diff --git a/src/sourmash/__init__.py b/src/sourmash/__init__.py
index 33170edcd8..53ee6e4803 100644
--- a/src/sourmash/__init__.py
+++ b/src/sourmash/__init__.py
@@ -18,17 +18,19 @@ class MinHash - hash sketch class
 from deprecation import deprecated
 from importlib.metadata import version
 
-__all__ = ['MinHash', 'SourmashSignature',
-           'load_one_signature',
-           'SourmashSignature',
-           'load_file_as_index',
-           'load_file_as_signatures',
-           'save_signatures',
-           'create_sbt_index',
-           'load_signatures',     # deprecated - remove in 5.0
-           'load_sbt_index',      # deprecated - remove in 5.0
-           'search_sbt_index',    # deprecated - remove in 5.0
-          ]
+__all__ = [
+    "MinHash",
+    "SourmashSignature",
+    "load_one_signature",
+    "SourmashSignature",
+    "load_file_as_index",
+    "load_file_as_signatures",
+    "save_signatures",
+    "create_sbt_index",
+    "load_signatures",  # deprecated - remove in 5.0
+    "load_sbt_index",  # deprecated - remove in 5.0
+    "search_sbt_index",  # deprecated - remove in 5.0
+]
 
 from ._lowlevel import ffi, lib
 
@@ -48,9 +50,13 @@ class MinHash - hash sketch class
     save_signatures,
 )
 
-@deprecated(deprecated_in="3.5.1", removed_in="5.0",
-            current_version=VERSION,
-            details='Use load_file_as_signatures instead.')
+
+@deprecated(
+    deprecated_in="3.5.1",
+    removed_in="5.0",
+    current_version=VERSION,
+    details="Use load_file_as_signatures instead.",
+)
 def load_signatures(*args, **kwargs):
     """Load a JSON string with signatures into classes.
 
@@ -65,12 +71,17 @@ def load_signatures(*args, **kwargs):
     """
     return load_signatures_private(*args, **kwargs)
 
+
 from .sbtmh import load_sbt_index as load_sbt_index_private
 from .sbtmh import search_sbt_index as search_sbt_index_private
 
-@deprecated(deprecated_in="3.5.1", removed_in="5.0",
-            current_version=VERSION,
-            details='Use load_file_as_index instead.')
+
+@deprecated(
+    deprecated_in="3.5.1",
+    removed_in="5.0",
+    current_version=VERSION,
+    details="Use load_file_as_index instead.",
+)
 def load_sbt_index(*args, **kwargs):
     """Load and return an SBT index.
 
@@ -80,9 +91,12 @@ def load_sbt_index(*args, **kwargs):
     return load_sbt_index_private(*args, **kwargs)
 
 
-@deprecated(deprecated_in="3.5.1", removed_in="5.0",
-            current_version=VERSION,
-            details='Use the new Index API instead.')
+@deprecated(
+    deprecated_in="3.5.1",
+    removed_in="5.0",
+    current_version=VERSION,
+    details="Use the new Index API instead.",
+)
 def search_sbt_index(*args, **kwargs):
     """\
     Search an SBT index `tree` with signature `query` for matches above
@@ -98,6 +112,7 @@ def search_sbt_index(*args, **kwargs):
     """
     return search_sbt_index_private(*args, **kwargs)
 
+
 from .sbtmh import create_sbt_index
 from . import lca
 from . import tax
diff --git a/src/sourmash/__main__.py b/src/sourmash/__main__.py
index 74fdf270c0..a8c70878fa 100644
--- a/src/sourmash/__main__.py
+++ b/src/sourmash/__main__.py
@@ -7,18 +7,19 @@
 
 def main(arglist=None):
     import sourmash
+
     args = sourmash.cli.parse_args(arglist)
-    if hasattr(args, 'subcmd'):
+    if hasattr(args, "subcmd"):
         mod = getattr(sourmash.cli, args.cmd)
         submod = getattr(mod, args.subcmd)
-        mainmethod = getattr(submod, 'main')
+        mainmethod = getattr(submod, "main")
     else:
         mod = getattr(sourmash.cli, args.cmd)
-        mainmethod = getattr(mod, 'main')
+        mainmethod = getattr(mod, "main")
 
     retval = mainmethod(args)
     raise SystemExit(retval)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/src/sourmash/cli/__init__.py b/src/sourmash/cli/__init__.py
index 575bbdb0f5..a02487f4fd 100644
--- a/src/sourmash/cli/__init__.py
+++ b/src/sourmash/cli/__init__.py
@@ -45,7 +45,7 @@ class SourmashParser(ArgumentParser):
     _citation_printed = False
 
     def __init__(self, citation=True, **kwargs):
-        super(SourmashParser, self).__init__(**kwargs)
+        super().__init__(**kwargs)
         self.citation = citation
 
     @classmethod
@@ -53,6 +53,7 @@ def print_citation(cls):
         if cls._citation_printed:
             return
         from sourmash.logging import notify
+
         notify(f"\n== This is sourmash version {sourmash.VERSION}. ==")
         notify("== Please cite Brown and Irber (2016), doi:10.21105/joss.00027. ==\n")
         cls._citation_printed = True
@@ -70,53 +71,56 @@ def _subparser_from_name(self, name):
 
     def print_help(self):
         self.print_citation()
-        super(SourmashParser, self).print_help()
-
+        super().print_help()
 
     def parse_args(self, args=None, namespace=None):
-        if (args is None and len(sys.argv) == 1) or (args is not None and len(args) == 0):
+        if (args is None and len(sys.argv) == 1) or (
+            args is not None and len(args) == 0
+        ):
             self.print_help()
             raise SystemExit(1)
-        args = super(SourmashParser, self).parse_args(args=args, namespace=namespace)
-        if ('quiet' not in args or not args.quiet) and self.citation:
+        args = super().parse_args(args=args, namespace=namespace)
+        if ("quiet" not in args or not args.quiet) and self.citation:
             self.print_citation()
 
-        if 'subcmd' in args and args.subcmd is None:
+        if "subcmd" in args and args.subcmd is None:
             self._subparser_from_name(args.cmd).print_help()
             raise SystemExit(1)
 
         # BEGIN: dirty hacks to simultaneously support new and previous interface
-        if hasattr(args, 'subcmd') and args.subcmd == 'import':
-            args.subcmd = 'ingest'
+        if hasattr(args, "subcmd") and args.subcmd == "import":
+            args.subcmd = "ingest"
         # END: dirty hacks to simultaneously support new and previous interface
         return args
 
 
 def get_parser():
     module_descs = {
-        'tax': 'Integrate taxonomy information based on "gather" results',
-        'lca': 'Taxonomic operations',
-        'sketch': 'Create signatures',
-        'sig': 'Manipulate signature files',
-        'storage': 'Operations on storage',
-        'scripts': "Plug-ins",
+        "tax": 'Integrate taxonomy information based on "gather" results',
+        "lca": "Taxonomic operations",
+        "sketch": "Create signatures",
+        "sig": "Manipulate signature files",
+        "storage": "Operations on storage",
+        "scripts": "Plug-ins",
     }
     alias = {
         "sig": "signature",
         "ext": "scripts",
     }
-    expert = set(['categorize', 'import_csv', 'migrate', 'multigather', 'sbt_combine', 'watch'])
+    expert = set(
+        ["categorize", "import_csv", "migrate", "multigather", "sbt_combine", "watch"]
+    )
 
     clidir = os.path.dirname(__file__)
     basic_ops = utils.command_list(clidir)
 
     # provide a list of the basic operations - not expert, not submodules.
     user_ops = [op for op in basic_ops if op not in expert and op not in module_descs]
-    usage = '    Basic operations\n'
+    usage = "    Basic operations\n"
     for op in user_ops:
         docstring = getattr(sys.modules[__name__], op).__doc__
-        helpstring = 'sourmash {op:s} --help'.format(op=op)
-        usage += '        {hs:25s} {ds:s}\n'.format(hs=helpstring, ds=docstring)
+        helpstring = f"sourmash {op:s} --help"
+        usage += f"        {helpstring:25s} {docstring:s}\n"
     # next, all the subcommand ones - dive into subdirectories.
     cmd_group_dirs = next(os.walk(clidir))[1]
     cmd_group_dirs = filter(utils.opfilter, cmd_group_dirs)
@@ -124,18 +128,33 @@ def get_parser():
 
     cmd_group_usage = [cmd for cmd in cmd_group_dirs if cmd not in alias.values()]
     for dirpath in cmd_group_usage:
-        usage += '\n    ' + module_descs[dirpath] + '\n'
-        usage += '        sourmash {gd:s} --help\n'.format(gd=dirpath)
+        usage += "\n    " + module_descs[dirpath] + "\n"
+        usage += f"        sourmash {dirpath:s} --help\n"
         if dirpath in alias:
-            usage += '        sourmash {gd:s} --help\n'.format(gd=alias[dirpath])
+            usage += f"        sourmash {alias[dirpath]:s} --help\n"
 
-    desc = 'Create, compare, and manipulate k-mer sketches of biological sequences.\n\nUsage instructions:\n' + usage
-    parser = SourmashParser(prog='sourmash', description=desc, formatter_class=RawDescriptionHelpFormatter, usage=SUPPRESS)
-    parser._optionals.title = 'Options'
-    parser.add_argument('-v', '--version', action='version', version='sourmash '+ sourmash.VERSION)
-    parser.add_argument('-q', '--quiet', action='store_true', help='don\'t print citation information')
+    desc = (
+        "Create, compare, and manipulate k-mer sketches of biological sequences.\n\nUsage instructions:\n"
+        + usage
+    )
+    parser = SourmashParser(
+        prog="sourmash",
+        description=desc,
+        formatter_class=RawDescriptionHelpFormatter,
+        usage=SUPPRESS,
+    )
+    parser._optionals.title = "Options"
+    parser.add_argument(
+        "-v", "--version", action="version", version="sourmash " + sourmash.VERSION
+    )
+    parser.add_argument(
+        "-q", "--quiet", action="store_true", help="don't print citation information"
+    )
     sub = parser.add_subparsers(
-        title='Instructions', dest='cmd', metavar='cmd', help=SUPPRESS,
+        title="Instructions",
+        dest="cmd",
+        metavar="cmd",
+        help=SUPPRESS,
     )
     for op in basic_ops + cmd_group_dirs:
         getattr(sys.modules[__name__], op).subparser(sub)
diff --git a/src/sourmash/cli/categorize.py b/src/sourmash/cli/categorize.py
index e3c41ec773..0c8002e224 100644
--- a/src/sourmash/cli/categorize.py
+++ b/src/sourmash/cli/categorize.py
@@ -4,32 +4,36 @@
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('categorize')
-    subparser.add_argument('database', help='location of signature collection/database to load')
+    subparser = subparsers.add_parser("categorize")
     subparser.add_argument(
-        'queries', nargs='+',
-        help='locations of signatures to categorize'
+        "database", help="location of signature collection/database to load"
     )
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "queries", nargs="+", help="locations of signatures to categorize"
+    )
+    subparser.add_argument(
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     add_ksize_arg(subparser)
     subparser.add_argument(
-        '--threshold', default=0.08, type=float,
-        help='minimum threshold for reporting matches; default=0.08'
+        "--threshold",
+        default=0.08,
+        type=float,
+        help="minimum threshold for reporting matches; default=0.08",
     )
     subparser.add_argument(
-        '--ignore-abundance', action='store_true',
-        help='do NOT use k-mer abundances if present'
+        "--ignore-abundance",
+        action="store_true",
+        help="do NOT use k-mer abundances if present",
     )
     add_moltype_args(subparser)
 
     # TODO: help messages in these
-    subparser.add_argument('--csv', help='output summary CSV to this file')
-    subparser.add_argument('--load-csv', default=None)
+    subparser.add_argument("--csv", help="output summary CSV to this file")
+    subparser.add_argument("--load-csv", default=None)
 
 
 def main(args):
     import sourmash
+
     return sourmash.commands.categorize(args)
diff --git a/src/sourmash/cli/compare.py b/src/sourmash/cli/compare.py
index 54864d6c93..74da5bd837 100644
--- a/src/sourmash/cli/compare.py
+++ b/src/sourmash/cli/compare.py
@@ -1,6 +1,6 @@
 """create a similarity matrix comparing many samples"""
 
-usage="""
+usage = """
 
 The `compare` subcommand compares one or more signatures (created with
 `sketch`) using estimated Jaccard index [1] or (if signatures are
@@ -27,69 +27,91 @@
 ---
 """
 
-from sourmash.cli.utils import (add_ksize_arg, add_moltype_args,
-                                add_picklist_args, add_pattern_args,
-                                add_scaled_arg)
+from sourmash.cli.utils import (
+    add_ksize_arg,
+    add_moltype_args,
+    add_picklist_args,
+    add_pattern_args,
+    add_scaled_arg,
+)
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('compare', description=__doc__, usage=usage)
+    subparser = subparsers.add_parser("compare", description=__doc__, usage=usage)
     subparser.add_argument(
-        'signatures', nargs='*', help='list of signatures to compare',
-        default=[]
+        "signatures", nargs="*", help="list of signatures to compare", default=[]
     )
     subparser.add_argument(
-        '-q', '--quiet', action='store_true', help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-o', '--output', metavar='F',
-        help='file to which output will be written; default is terminal '
-        '(standard output)'
+        "-o",
+        "--output",
+        metavar="F",
+        help="file to which output will be written; default is terminal "
+        "(standard output)",
     )
     subparser.add_argument(
-        '--ignore-abundance', action='store_true',
-        help='do NOT use k-mer abundances even if present'
+        "--ignore-abundance",
+        action="store_true",
+        help="do NOT use k-mer abundances even if present",
     )
     subparser.add_argument(
-        '--containment', action='store_true',
-        help='calculate containment instead of similarity'
+        "--containment",
+        action="store_true",
+        help="calculate containment instead of similarity",
     )
     subparser.add_argument(
-        '--max-containment', action='store_true',
-        help='calculate max containment instead of similarity'
+        "--max-containment",
+        action="store_true",
+        help="calculate max containment instead of similarity",
     )
     subparser.add_argument(
-        '--avg-containment', '--average-containment', action='store_true',
-        help='calculate average containment instead of similarity'
+        "--avg-containment",
+        "--average-containment",
+        action="store_true",
+        help="calculate average containment instead of similarity",
     )
     subparser.add_argument(
-        '--estimate-ani', '--ANI', '--ani', action='store_true',
-        help='return ANI estimated from jaccard, containment, average containment, or max containment; see https://doi.org/10.1101/2022.01.11.475870'
+        "--estimate-ani",
+        "--ANI",
+        "--ani",
+        action="store_true",
+        help="return ANI estimated from jaccard, containment, average containment, or max containment; see https://doi.org/10.1101/2022.01.11.475870",
     )
     subparser.add_argument(
-        '--from-file',
-        help='a text file containing a list of files to load signatures from'
+        "--from-file",
+        help="a text file containing a list of files to load signatures from",
     )
     subparser.add_argument(
-        '-f', '--force', action='store_true',
-        help='continue past errors in file loading'
+        "-f",
+        "--force",
+        action="store_true",
+        help="continue past errors in file loading",
     )
     subparser.add_argument(
-        '--csv', metavar='F',
-        help='write matrix to specified file in CSV format (with column '
-        'headers)'
+        "--csv",
+        metavar="F",
+        help="write matrix to specified file in CSV format (with column " "headers)",
     )
     subparser.add_argument(
-        '-p', '--processes', metavar='N', type=int, default=None,
-        help='Number of processes to use to calculate similarity')
+        "-p",
+        "--processes",
+        metavar="N",
+        type=int,
+        default=None,
+        help="Number of processes to use to calculate similarity",
+    )
     subparser.add_argument(
-        '--distance-matrix', action='store_true',
-        help='output a distance matrix, instead of a similarity matrix'
+        "--distance-matrix",
+        action="store_true",
+        help="output a distance matrix, instead of a similarity matrix",
     )
     subparser.add_argument(
-        '--similarity-matrix', action='store_false',
-        dest='distance_matrix',
-        help='output a similarity matrix; this is the default',
+        "--similarity-matrix",
+        action="store_false",
+        dest="distance_matrix",
+        help="output a similarity matrix; this is the default",
     )
 
     add_ksize_arg(subparser)
@@ -101,4 +123,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.commands.compare(args)
diff --git a/src/sourmash/cli/compute.py b/src/sourmash/cli/compute.py
index 7b3b48d20d..cfdb48f42a 100644
--- a/src/sourmash/cli/compute.py
+++ b/src/sourmash/cli/compute.py
@@ -1,6 +1,6 @@
 """compute sequence signatures for inputs"""
 
-usage="""
+usage = """
 
 ** WARNING: the sourmash compute command is DEPRECATED as of 4.0 and
 ** will be removed in 5.0. Please see the 'sourmash sketch' command instead.
@@ -35,8 +35,8 @@
 
 def ksize_parser(ksizes):
     # get list of k-mer sizes for which to compute sketches
-    if ',' in ksizes:
-        ksizes = ksizes.split(',')
+    if "," in ksizes:
+        ksizes = ksizes.split(",")
         ksizes = list(map(int, ksizes))
     else:
         ksizes = [int(ksizes)]
@@ -45,81 +45,98 @@ def ksize_parser(ksizes):
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('compute', description=__doc__, usage=usage)
+    subparser = subparsers.add_parser("compute", description=__doc__, usage=usage)
 
-    sketch_args = subparser.add_argument_group('Sketching options')
+    sketch_args = subparser.add_argument_group("Sketching options")
     sketch_args.add_argument(
-        '-k', '--ksizes', default='21,31,51',
+        "-k",
+        "--ksizes",
+        default="21,31,51",
         type=ksize_parser,
-        help='comma-separated list of k-mer sizes; default=%(default)s'
+        help="comma-separated list of k-mer sizes; default=%(default)s",
     )
     sketch_args.add_argument(
-        '--track-abundance', action='store_true',
-        help='track k-mer abundances in the generated signature'
+        "--track-abundance",
+        action="store_true",
+        help="track k-mer abundances in the generated signature",
     )
     sketch_args.add_argument(
-        '--scaled', type=float, default=0,
-        help='choose number of hashes as 1 in FRACTION of input k-mers'
+        "--scaled",
+        type=float,
+        default=0,
+        help="choose number of hashes as 1 in FRACTION of input k-mers",
     )
     add_construct_moltype_args(sketch_args)
     sketch_args.add_argument(
-        '--input-is-protein', action='store_true',
-        help='Consume protein sequences - no translation needed.'
+        "--input-is-protein",
+        action="store_true",
+        help="Consume protein sequences - no translation needed.",
     )
     sketch_args.add_argument(
-        '--seed', type=int, default=get_minhash_default_seed(),
-        help='seed used by MurmurHash; default=%(default)i'
+        "--seed",
+        type=int,
+        default=get_minhash_default_seed(),
+        help="seed used by MurmurHash; default=%(default)i",
     )
 
-    file_args = subparser.add_argument_group('File handling options')
+    file_args = subparser.add_argument_group("File handling options")
     file_args.add_argument(
-        '-f', '--force', action='store_true',
-        help='recompute signatures even if the file exists'
+        "-f",
+        "--force",
+        action="store_true",
+        help="recompute signatures even if the file exists",
     )
     file_args.add_argument(
-        '-o', '--output',
-        help='output computed signatures to this file'
+        "-o", "--output", help="output computed signatures to this file"
     )
     file_args.add_argument(
-        '--output-dir', '--outdir',
-        help='output computed signatures to this directory',
+        "--output-dir",
+        "--outdir",
+        help="output computed signatures to this directory",
     )
     file_args.add_argument(
-        '--singleton', action='store_true',
-        help='compute a signature for each sequence record individually'
+        "--singleton",
+        action="store_true",
+        help="compute a signature for each sequence record individually",
     )
     file_args.add_argument(
-        '--merge', '--name', type=str, default='', metavar="FILE",
-        help='merge all input files into one signature file with the '
-        'specified name'
+        "--merge",
+        "--name",
+        type=str,
+        default="",
+        metavar="FILE",
+        help="merge all input files into one signature file with the " "specified name",
     )
     file_args.add_argument(
-        '--name-from-first', action='store_true',
-        help='name the signature generated from each file after the first '
-        'record in the file'
+        "--name-from-first",
+        action="store_true",
+        help="name the signature generated from each file after the first "
+        "record in the file",
     )
     file_args.add_argument(
-        '--randomize', action='store_true',
-        help='shuffle the list of input filenames randomly'
+        "--randomize",
+        action="store_true",
+        help="shuffle the list of input filenames randomly",
     )
 
     subparser.add_argument(
-        '-q', '--quiet', action='store_true', help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '--check-sequence', action='store_true',
-        help='complain if input sequence is invalid'
+        "--check-sequence",
+        action="store_true",
+        help="complain if input sequence is invalid",
     )
     subparser.add_argument(
-        '--license', default='CC0', type=str,
-        help='signature license. Currently only CC0 is supported.'
+        "--license",
+        default="CC0",
+        type=str,
+        help="signature license. Currently only CC0 is supported.",
     )
 
-    subparser.add_argument(
-        'filenames', nargs='+', help='file(s) of sequences'
-    )
-    subparser._positionals.title = 'Required arguments'
-    subparser._optionals.title = 'Miscellaneous options'
+    subparser.add_argument("filenames", nargs="+", help="file(s) of sequences")
+    subparser._positionals.title = "Required arguments"
+    subparser._optionals.title = "Miscellaneous options"
     add_num_arg(sketch_args, 500)
 
 
@@ -127,8 +144,10 @@ def main(args):
     from sourmash.command_compute import compute
     from sourmash.logging import notify
 
-    notify("""\
+    notify(
+        """\
 ** WARNING: the sourmash compute command is DEPRECATED as of 4.0 and
 ** will be removed in 5.0. Please see the 'sourmash sketch' command instead.
-""")
+"""
+    )
     return compute(args)
diff --git a/src/sourmash/cli/gather.py b/src/sourmash/cli/gather.py
index 0b0115efd2..88860a50cd 100644
--- a/src/sourmash/cli/gather.py
+++ b/src/sourmash/cli/gather.py
@@ -1,6 +1,6 @@
 """search a metagenome signature against dbs"""
 
-usage="""
+usage = """
 
 The `gather` subcommand selects the best reference genomes to use for
 a metagenome analysis, by finding the smallest set of non-overlapping
@@ -62,103 +62,133 @@
 ---
 """
 
-from sourmash.cli.utils import (add_ksize_arg, add_moltype_args,
-                                add_picklist_args, add_scaled_arg,
-                                add_pattern_args)
+from sourmash.cli.utils import (
+    add_ksize_arg,
+    add_moltype_args,
+    add_picklist_args,
+    add_scaled_arg,
+    add_pattern_args,
+)
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('gather', description=__doc__, usage=usage)
-    subparser.add_argument('query', help='query signature')
+    subparser = subparsers.add_parser("gather", description=__doc__, usage=usage)
+    subparser.add_argument("query", help="query signature")
     subparser.add_argument(
-        'databases', nargs='+',
-        help='signatures/SBTs to search',
+        "databases",
+        nargs="+",
+        help="signatures/SBTs to search",
     )
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
+    subparser.add_argument("-d", "--debug", action="store_true")
     subparser.add_argument(
-        '-d', '--debug', action='store_true'
+        "-n",
+        "--num-results",
+        default=None,
+        type=int,
+        metavar="N",
+        help="number of results to report (default: terminate at --threshold-bp)",
     )
     subparser.add_argument(
-        '-n', '--num-results', default=None, type=int, metavar='N',
-        help='number of results to report (default: terminate at --threshold-bp)'
+        "-o",
+        "--output",
+        metavar="FILE",
+        help="output CSV containing matches to this file",
     )
     subparser.add_argument(
-        '-o', '--output', metavar='FILE',
-        help='output CSV containing matches to this file'
+        "--save-matches",
+        metavar="FILE",
+        help="save gather matched signatures from the database to the "
+        "specified file",
     )
     subparser.add_argument(
-        '--save-matches', metavar='FILE',
-        help='save gather matched signatures from the database to the '
-        'specified file'
+        "--save-prefetch",
+        metavar="FILE",
+        help="save all prefetch-matched signatures from the databases to the "
+        "specified file or directory",
     )
     subparser.add_argument(
-        '--save-prefetch', metavar='FILE',
-        help='save all prefetch-matched signatures from the databases to the '
-        'specified file or directory'
+        "--save-prefetch-csv",
+        metavar="FILE",
+        help="save a csv with information from all prefetch-matched signatures "
+        "to the specified file",
     )
     subparser.add_argument(
-        '--save-prefetch-csv', metavar='FILE',
-        help='save a csv with information from all prefetch-matched signatures '
-        'to the specified file'
+        "--threshold-bp",
+        metavar="REAL",
+        type=float,
+        default=5e4,
+        help="reporting threshold (in bp) for estimated overlap with remaining query (default=50kb)",
     )
     subparser.add_argument(
-        '--threshold-bp', metavar='REAL', type=float, default=5e4,
-        help='reporting threshold (in bp) for estimated overlap with remaining query (default=50kb)'
+        "--output-unassigned",
+        metavar="FILE",
+        help="output unassigned portions of the query as a signature to the "
+        "specified file",
     )
     subparser.add_argument(
-        '--output-unassigned', metavar='FILE',
-        help='output unassigned portions of the query as a signature to the '
-        'specified file'
+        "--ignore-abundance",
+        action="store_true",
+        help="do NOT use k-mer abundances if present",
     )
     subparser.add_argument(
-        '--ignore-abundance',  action='store_true',
-        help='do NOT use k-mer abundances if present'
+        "--md5", default=None, help="select the signature with this md5 as query"
     )
     subparser.add_argument(
-        '--md5', default=None,
-        help='select the signature with this md5 as query'
-    )
-    subparser.add_argument(
-        '--cache-size', default=0, type=int, metavar='N',
-        help='number of internal SBT nodes to cache in memory (default: 0, cache all nodes)'
+        "--cache-size",
+        default=0,
+        type=int,
+        metavar="N",
+        help="number of internal SBT nodes to cache in memory (default: 0, cache all nodes)",
     )
 
     # advanced parameters
     subparser.add_argument(
-        '--linear', dest="linear", action='store_true',
+        "--linear",
+        dest="linear",
+        action="store_true",
         help="force a low-memory but maybe slower database search",
     )
     subparser.add_argument(
-        '--no-linear', dest="linear", action='store_false',
+        "--no-linear",
+        dest="linear",
+        action="store_false",
     )
     subparser.add_argument(
-        '--no-prefetch', dest="prefetch", action='store_false',
+        "--no-prefetch",
+        dest="prefetch",
+        action="store_false",
         help="do not use prefetch before gather; see documentation",
     )
     subparser.add_argument(
-        '--prefetch', dest="prefetch", action='store_true',
+        "--prefetch",
+        dest="prefetch",
+        action="store_true",
         help="use prefetch before gather; see documentation",
     )
     subparser.add_argument(
-        '--estimate-ani-ci', action='store_true',
-        help='also output confidence intervals for ANI estimates'
+        "--estimate-ani-ci",
+        action="store_true",
+        help="also output confidence intervals for ANI estimates",
     )
     subparser.add_argument(
-        '--fail-on-empty-database', action='store_true',
-        help='stop at databases that contain no compatible signatures'
+        "--fail-on-empty-database",
+        action="store_true",
+        help="stop at databases that contain no compatible signatures",
     )
     subparser.add_argument(
-        '--no-fail-on-empty-database', action='store_false',
-        dest='fail_on_empty_database',
-        help='continue past databases that contain no compatible signatures'
+        "--no-fail-on-empty-database",
+        action="store_false",
+        dest="fail_on_empty_database",
+        help="continue past databases that contain no compatible signatures",
     )
     subparser.set_defaults(fail_on_empty_database=True)
     subparser.add_argument(
-        '--create-empty-results', action='store_true',
-        help='create an empty results file even if no matches.'
+        "--create-empty-results",
+        action="store_true",
+        help="create an empty results file even if no matches.",
     )
 
     add_ksize_arg(subparser)
@@ -170,4 +200,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.commands.gather(args)
diff --git a/src/sourmash/cli/import_csv.py b/src/sourmash/cli/import_csv.py
index 77fcbd14f8..6e0964678a 100644
--- a/src/sourmash/cli/import_csv.py
+++ b/src/sourmash/cli/import_csv.py
@@ -4,17 +4,19 @@
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('import_csv')
-    subparser.add_argument('mash_csvfile', help='CSV file with mash sketches')
+    subparser = subparsers.add_parser("import_csv")
+    subparser.add_argument("mash_csvfile", help="CSV file with mash sketches")
     subparser.add_argument(
-        '-o', '--output',
-        help='save signature generated from data to this file (default stdout)'
+        "-o",
+        "--output",
+        help="save signature generated from data to this file (default stdout)",
     )
 
 
 def main(args):
     import sourmash
+
     notify("** WARNING: 'import_csv' is deprecated as of sourmash 4.0, and will")
     notify("**    be removed in sourmash 5.0; use 'sourmash sig import --csv' instead.")
-    notify('')
+    notify("")
     return sourmash.commands.import_csv(args)
diff --git a/src/sourmash/cli/index.py b/src/sourmash/cli/index.py
index dcd8572ca0..4fb0fc7ab8 100644
--- a/src/sourmash/cli/index.py
+++ b/src/sourmash/cli/index.py
@@ -1,6 +1,6 @@
 """index signatures for rapid search"""
 
-usage="""
+usage = """
 
    sourmash index -k 31 dbname *.sig
 
@@ -25,46 +25,63 @@
 ---
 """
 
-from sourmash.cli.utils import (add_ksize_arg, add_moltype_args,
-                                add_picklist_args, add_scaled_arg)
+from sourmash.cli.utils import (
+    add_ksize_arg,
+    add_moltype_args,
+    add_picklist_args,
+    add_scaled_arg,
+)
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('index', description=__doc__,
-                                      usage=usage)
-    subparser.add_argument('sbt_name', help='name to save index into; .sbt.zip or .sbt.json file')
+    subparser = subparsers.add_parser("index", description=__doc__, usage=usage)
     subparser.add_argument(
-        'signatures', nargs='*',
-        help='signatures to load into SBT'
+        "sbt_name", help="name to save index into; .sbt.zip or .sbt.json file"
     )
+    subparser.add_argument("signatures", nargs="*", help="signatures to load into SBT")
     subparser.add_argument(
-        '--from-file',
-        help='a text file containing a list of files to load signatures from'
+        "--from-file",
+        help="a text file containing a list of files to load signatures from",
     )
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-d', '--n_children', metavar='D', type=int, default=2,
-        help='number of children for internal nodes; default=2'
+        "-d",
+        "--n_children",
+        metavar="D",
+        type=int,
+        default=2,
+        help="number of children for internal nodes; default=2",
     )
     subparser.add_argument(
-        '--append', action='store_true', default=False,
-        help='add signatures to an existing SBT'
+        "--append",
+        action="store_true",
+        default=False,
+        help="add signatures to an existing SBT",
     )
     subparser.add_argument(
-        '-x', '--bf-size', metavar='S', type=float, default=1e5,
-        help='Bloom filter size used for internal nodes'
+        "-x",
+        "--bf-size",
+        metavar="S",
+        type=float,
+        default=1e5,
+        help="Bloom filter size used for internal nodes",
     )
     subparser.add_argument(
-        '-f', '--force', action='store_true',
-        help='try loading *all* files in provided subdirectories, not just .sig files"'
+        "-f",
+        "--force",
+        action="store_true",
+        help='try loading *all* files in provided subdirectories, not just .sig files"',
     )
     subparser.add_argument(
-        '-s', '--sparseness', metavar='FLOAT', type=float, default=.0,
-        help='What percentage of internal nodes will not be saved; ranges '
-        'from 0.0 (save all nodes) to 1.0 (no nodes saved)'
+        "-s",
+        "--sparseness",
+        metavar="FLOAT",
+        type=float,
+        default=0.0,
+        help="What percentage of internal nodes will not be saved; ranges "
+        "from 0.0 (save all nodes) to 1.0 (no nodes saved)",
     )
     add_ksize_arg(subparser)
     add_moltype_args(subparser)
@@ -74,4 +91,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.commands.index(args)
diff --git a/src/sourmash/cli/info.py b/src/sourmash/cli/info.py
index b607112b7c..5d79790389 100644
--- a/src/sourmash/cli/info.py
+++ b/src/sourmash/cli/info.py
@@ -6,26 +6,29 @@
 from sourmash.logging import notify
 from sourmash.plugins import list_all_plugins
 
+
 def subparser(subparsers):
-    subparser = subparsers.add_parser('info')
+    subparser = subparsers.add_parser("info")
     subparser.add_argument(
-        '-v', '--verbose', action='store_true',
-        help='report versions of khmer and screed'
+        "-v",
+        "--verbose",
+        action="store_true",
+        help="report versions of khmer and screed",
     )
 
 
 def info(verbose=False):
     "Report sourmash version + version of installed dependencies."
-    notify(f'sourmash version {sourmash.VERSION}')
-    notify(f'- loaded from path: {os.path.dirname(__file__)}')
-    notify('')
+    notify(f"sourmash version {sourmash.VERSION}")
+    notify(f"- loaded from path: {os.path.dirname(__file__)}")
+    notify("")
 
     if verbose:
-        notify('khmer version: None (internal Nodegraph)')
-        notify('')
+        notify("khmer version: None (internal Nodegraph)")
+        notify("")
 
-        notify(f'screed version {screed.__version__}')
-        notify(f'- loaded from path: {os.path.dirname(screed.__file__)}')
+        notify(f"screed version {screed.__version__}")
+        notify(f"- loaded from path: {os.path.dirname(screed.__file__)}")
 
         list_all_plugins()
 
diff --git a/src/sourmash/cli/lca/__init__.py b/src/sourmash/cli/lca/__init__.py
index a403876d02..6fbb73619c 100644
--- a/src/sourmash/cli/lca/__init__.py
+++ b/src/sourmash/cli/lca/__init__.py
@@ -16,19 +16,24 @@
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('lca', formatter_class=RawDescriptionHelpFormatter, usage=SUPPRESS)
-    desc = 'Operations\n'
+    subparser = subparsers.add_parser(
+        "lca", formatter_class=RawDescriptionHelpFormatter, usage=SUPPRESS
+    )
+    desc = "Operations\n"
     clidir = os.path.dirname(__file__)
     ops = command_list(clidir)
     for subcmd in ops:
         docstring = getattr(sys.modules[__name__], subcmd).__doc__
-        helpstring = 'sourmash lca {op:s} --help'.format(op=subcmd)
-        desc += '        {hs:33s} {ds:s}\n'.format(hs=helpstring, ds=docstring)
+        helpstring = f"sourmash lca {subcmd:s} --help"
+        desc += f"        {helpstring:33s} {docstring:s}\n"
     s = subparser.add_subparsers(
-        title='Taxonomic utilities', dest='subcmd', metavar='subcmd', help=SUPPRESS,
-        description=desc
+        title="Taxonomic utilities",
+        dest="subcmd",
+        metavar="subcmd",
+        help=SUPPRESS,
+        description=desc,
     )
     for subcmd in ops:
         getattr(sys.modules[__name__], subcmd).subparser(s)
     subparser._action_groups.reverse()
-    subparser._optionals.title = 'Options'
+    subparser._optionals.title = "Options"
diff --git a/src/sourmash/cli/lca/classify.py b/src/sourmash/cli/lca/classify.py
index 7efe112bd8..55c6134f07 100644
--- a/src/sourmash/cli/lca/classify.py
+++ b/src/sourmash/cli/lca/classify.py
@@ -2,34 +2,49 @@
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('classify')
-    subparser.add_argument('--db', nargs='+', action='append',
-                           help='databases to use to classify')
-    subparser.add_argument('--query', nargs='*', default=[], action='append',
-                           help='query signatures to classify')
-    subparser.add_argument('--query-from-file',
-                           help='file containing list of signature files to query')
-    subparser.add_argument('--threshold', metavar='T', type=int, default=5,
-                           help="minimum number of hashes needed for a taxonomic classification (default: 5)")
+    subparser = subparsers.add_parser("classify")
     subparser.add_argument(
-        '--majority', action='store_true',
-        help='use majority vote classification instead of lca'
+        "--db", nargs="+", action="append", help="databases to use to classify"
     )
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "--query",
+        nargs="*",
+        default=[],
+        action="append",
+        help="query signatures to classify",
     )
     subparser.add_argument(
-        '-d', '--debug', action='store_true',
-        help='output debugging output'
+        "--query-from-file", help="file containing list of signature files to query"
     )
     subparser.add_argument(
-        '-o', '--output', metavar='FILE', default='-',
-        help='output CSV to the specified file; by default output to stdout'
+        "--threshold",
+        metavar="T",
+        type=int,
+        default=5,
+        help="minimum number of hashes needed for a taxonomic classification (default: 5)",
     )
-    subparser.add_argument('--scaled', type=float)
+    subparser.add_argument(
+        "--majority",
+        action="store_true",
+        help="use majority vote classification instead of lca",
+    )
+    subparser.add_argument(
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
+    )
+    subparser.add_argument(
+        "-d", "--debug", action="store_true", help="output debugging output"
+    )
+    subparser.add_argument(
+        "-o",
+        "--output",
+        metavar="FILE",
+        default="-",
+        help="output CSV to the specified file; by default output to stdout",
+    )
+    subparser.add_argument("--scaled", type=float)
 
 
 def main(args):
     import sourmash
+
     return sourmash.lca.command_classify.classify(args)
diff --git a/src/sourmash/cli/lca/compare_csv.py b/src/sourmash/cli/lca/compare_csv.py
index 1f62fe4aa0..6732940325 100644
--- a/src/sourmash/cli/lca/compare_csv.py
+++ b/src/sourmash/cli/lca/compare_csv.py
@@ -1,35 +1,41 @@
 """compare spreadsheets"""
 
+
 def subparser(subparsers):
     # Dirty hack to simultaneously support new and previous interface
     # If desired, this function can be removed with a major version bump.
-    for cmd in ('compare', 'compare_csv'):
+    for cmd in ("compare", "compare_csv"):
         subparser = subparsers.add_parser(cmd)
-        subparser.add_argument('csv1', help='taxonomy spreadsheet output by classify')
-        subparser.add_argument('csv2', help='custom taxonomy spreadsheet')
+        subparser.add_argument("csv1", help="taxonomy spreadsheet output by classify")
+        subparser.add_argument("csv2", help="custom taxonomy spreadsheet")
         subparser.add_argument(
-            '-q', '--quiet', action='store_true',
-            help='suppress non-error output'
+            "-q", "--quiet", action="store_true", help="suppress non-error output"
         )
         subparser.add_argument(
-            '-d', '--debug', action='store_true',
-            help='output debugging output'
+            "-d", "--debug", action="store_true", help="output debugging output"
         )
         subparser.add_argument(
-            '-C', '--start-column', metavar='C', default=2, type=int,
-            help='column at which taxonomic assignments start; default=2'
+            "-C",
+            "--start-column",
+            metavar="C",
+            default=2,
+            type=int,
+            help="column at which taxonomic assignments start; default=2",
         )
         subparser.add_argument(
-            '--tabs', action='store_true',
-            help='input spreadsheet is tab-delimited; default is commas'
+            "--tabs",
+            action="store_true",
+            help="input spreadsheet is tab-delimited; default is commas",
         )
         subparser.add_argument(
-            '--no-headers', action='store_true',
-            help='no headers present in taxonomy spreadsheet'
+            "--no-headers",
+            action="store_true",
+            help="no headers present in taxonomy spreadsheet",
         )
-        subparser.add_argument('-f', '--force', action='store_true')
+        subparser.add_argument("-f", "--force", action="store_true")
 
 
 def main(args):
     import sourmash
+
     return sourmash.lca.command_compare_csv.compare_csv(args)
diff --git a/src/sourmash/cli/lca/index.py b/src/sourmash/cli/lca/index.py
index 3e1e456273..afc0702e9f 100644
--- a/src/sourmash/cli/lca/index.py
+++ b/src/sourmash/cli/lca/index.py
@@ -1,69 +1,74 @@
 """create LCA database"""
 
-from sourmash.cli.utils import (add_ksize_arg, add_moltype_args,
-                                add_picklist_args)
+from sourmash.cli.utils import add_ksize_arg, add_moltype_args, add_picklist_args
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('index')
-    subparser.add_argument('csv', help='taxonomy spreadsheet')
-    subparser.add_argument('lca_db_out', help='output database name')
+    subparser = subparsers.add_parser("index")
+    subparser.add_argument("csv", help="taxonomy spreadsheet")
+    subparser.add_argument("lca_db_out", help="output database name")
     subparser.add_argument(
-        'signatures', nargs='*',
-        help='signatures or directory of signatures to index (optional if provided via --from-file)'
+        "signatures",
+        nargs="*",
+        help="signatures or directory of signatures to index (optional if provided via --from-file)",
     )
     subparser.add_argument(
-        '--from-file',
-        help='a text file containing a list of files to load signatures from'
+        "--from-file",
+        help="a text file containing a list of files to load signatures from",
     )
+    subparser.add_argument("--scaled", metavar="S", default=10000, type=float)
     subparser.add_argument(
-        '--scaled', metavar='S', default=10000, type=float
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-d", "--debug", action="store_true", help="output debugging output"
     )
     subparser.add_argument(
-        '-d', '--debug', action='store_true',
-        help='output debugging output'
+        "-C",
+        "--start-column",
+        metavar="C",
+        default=2,
+        type=int,
+        help="column at which taxonomic assignments start; default=2",
     )
     subparser.add_argument(
-        '-C', '--start-column', metavar='C', default=2, type=int,
-        help='column at which taxonomic assignments start; default=2'
+        "--tabs",
+        action="store_true",
+        help="input spreadsheet is tab-delimited; default is commas",
     )
     subparser.add_argument(
-        '--tabs', action='store_true',
-        help='input spreadsheet is tab-delimited; default is commas'
+        "--no-headers",
+        action="store_true",
+        help="no headers present in taxonomy spreadsheet",
     )
     subparser.add_argument(
-        '--no-headers', action='store_true',
-        help='no headers present in taxonomy spreadsheet'
+        "--split-identifiers",
+        action="store_true",
+        help="split names in signatures on whitespace",
     )
     subparser.add_argument(
-        '--split-identifiers', action='store_true',
-        help='split names in signatures on whitespace'
+        "--keep-identifier-versions",
+        action="store_true",
+        help="do not remove accession versions",
     )
+    subparser.add_argument("-f", "--force", action="store_true")
+    subparser.add_argument("--report", help="output a report on anomalies, if any")
     subparser.add_argument(
-        '--keep-identifier-versions', action='store_true',
-        help='do not remove accession versions'
+        "--require-taxonomy",
+        action="store_true",
+        help="ignore signatures with no taxonomy entry",
     )
-    subparser.add_argument('-f', '--force', action='store_true')
     subparser.add_argument(
-        '--report', help='output a report on anomalies, if any'
+        "--fail-on-missing-taxonomy",
+        action="store_true",
+        help="fail quickly if taxonomy is not available for an identifier",
     )
     subparser.add_argument(
-        '--require-taxonomy', action='store_true',
-        help='ignore signatures with no taxonomy entry'
-    )
-    subparser.add_argument(
-        '--fail-on-missing-taxonomy', action='store_true',
-        help='fail quickly if taxonomy is not available for an identifier',
-    )
-    subparser.add_argument(
-        '-F', '--database-format',
+        "-F",
+        "--database-format",
         help="format of output database; default is 'json')",
-        default='json',
-        choices=['json', 'sql'],
+        default="json",
+        choices=["json", "sql"],
     )
 
     add_ksize_arg(subparser, default=31)
@@ -73,4 +78,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.lca.command_index.index(args)
diff --git a/src/sourmash/cli/lca/rankinfo.py b/src/sourmash/cli/lca/rankinfo.py
index 6108dcdf4f..5d89612942 100644
--- a/src/sourmash/cli/lca/rankinfo.py
+++ b/src/sourmash/cli/lca/rankinfo.py
@@ -1,23 +1,25 @@
 """database rank info"""
 
+
 def subparser(subparsers):
-    subparser = subparsers.add_parser('rankinfo')
-    subparser.add_argument('db', nargs='+')
+    subparser = subparsers.add_parser("rankinfo")
+    subparser.add_argument("db", nargs="+")
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-d', '--debug', action='store_true',
-        help='output debugging output'
+        "-d", "--debug", action="store_true", help="output debugging output"
     )
-    subparser.add_argument('--scaled', metavar='FLOAT', type=float)
+    subparser.add_argument("--scaled", metavar="FLOAT", type=float)
     subparser.add_argument(
-        '--minimum-num', type=int, default=0,
-        help='Minimum number of different lineages a k-mer must be in to be counted'
+        "--minimum-num",
+        type=int,
+        default=0,
+        help="Minimum number of different lineages a k-mer must be in to be counted",
     )
 
 
 def main(args):
     import sourmash
+
     return sourmash.lca.command_rankinfo.rankinfo_main(args)
diff --git a/src/sourmash/cli/lca/summarize.py b/src/sourmash/cli/lca/summarize.py
index a3a8809e73..d9411a7f5b 100644
--- a/src/sourmash/cli/lca/summarize.py
+++ b/src/sourmash/cli/lca/summarize.py
@@ -2,35 +2,52 @@
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('summarize')
-    subparser.add_argument('--db', nargs='+', action='append',
-                           help='one or more LCA databases to use')
-    subparser.add_argument('--query', nargs='*', default=[], action='append',
-                           help='one or more signature files to use as queries')
-    subparser.add_argument('--query-from-file',
-                           help='file containing list of signature files to query')
-    subparser.add_argument('--threshold', metavar='T', type=int, default=5,
-                           help='minimum number of hashes to require for a match')
-    subparser.add_argument(
-        '-o', '--output', metavar='FILE',
-        help='file to which CSV output will be written'
-    )
-    subparser.add_argument('--scaled', metavar='FLOAT', type=float,
-                           help='scaled value to downsample to')
+    subparser = subparsers.add_parser("summarize")
+    subparser.add_argument(
+        "--db", nargs="+", action="append", help="one or more LCA databases to use"
+    )
+    subparser.add_argument(
+        "--query",
+        nargs="*",
+        default=[],
+        action="append",
+        help="one or more signature files to use as queries",
+    )
+    subparser.add_argument(
+        "--query-from-file", help="file containing list of signature files to query"
+    )
+    subparser.add_argument(
+        "--threshold",
+        metavar="T",
+        type=int,
+        default=5,
+        help="minimum number of hashes to require for a match",
+    )
+    subparser.add_argument(
+        "-o",
+        "--output",
+        metavar="FILE",
+        help="file to which CSV output will be written",
+    )
+    subparser.add_argument(
+        "--scaled", metavar="FLOAT", type=float, help="scaled value to downsample to"
+    )
 
-    subparser.add_argument('--ignore-abundance', action='store_true',
-                           help='ignore hash abundances in query signatures do not weight results')
+    subparser.add_argument(
+        "--ignore-abundance",
+        action="store_true",
+        help="ignore hash abundances in query signatures do not weight results",
+    )
 
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-d', '--debug', action='store_true',
-        help='output debugging output'
+        "-d", "--debug", action="store_true", help="output debugging output"
     )
 
 
 def main(args):
     import sourmash
+
     return sourmash.lca.command_summarize.summarize_main(args)
diff --git a/src/sourmash/cli/migrate.py b/src/sourmash/cli/migrate.py
index fc5ebe1560..b4140c5afe 100644
--- a/src/sourmash/cli/migrate.py
+++ b/src/sourmash/cli/migrate.py
@@ -1,10 +1,12 @@
 "'sourmash migrate' - migrate an SBT database to the latest version."
 
+
 def subparser(subparsers):
-    subparser = subparsers.add_parser('migrate')
-    subparser.add_argument('sbt_name', help='name to save SBT into')
+    subparser = subparsers.add_parser("migrate")
+    subparser.add_argument("sbt_name", help="name to save SBT into")
 
 
 def main(args):
     import sourmash
+
     return sourmash.commands.migrate(args)
diff --git a/src/sourmash/cli/multigather.py b/src/sourmash/cli/multigather.py
index cf20a32cd2..15f7f1fc71 100644
--- a/src/sourmash/cli/multigather.py
+++ b/src/sourmash/cli/multigather.py
@@ -1,6 +1,6 @@
 "'sourmash multigather' - gather many signatures against multiple databases."
 
-usage="""
+usage = """
 
 The `multigather` subcommand runs 'gather' for multiple query sequences
 against the same collection of sequences.  The main use for multigather
@@ -40,52 +40,57 @@
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('multigather')
+    subparser = subparsers.add_parser("multigather")
     subparser.add_argument(
-        '--query', nargs='*', default=[], action='append',
-        help='query signature'
+        "--query", nargs="*", default=[], action="append", help="query signature"
     )
     subparser.add_argument(
-        '--query-from-file',
-        help='file containing list of signature files to query'
+        "--query-from-file", help="file containing list of signature files to query"
     )
     subparser.add_argument(
-        '--db', nargs='+', action='append',
-        help='signatures/SBTs to search',
+        "--db",
+        nargs="+",
+        action="append",
+        help="signatures/SBTs to search",
     )
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
+    subparser.add_argument("-d", "--debug", action="store_true")
     subparser.add_argument(
-        '-d', '--debug', action='store_true'
+        "--threshold-bp",
+        metavar="REAL",
+        type=float,
+        default=5e4,
+        help="threshold (in bp) for reporting results (default=50,000)",
     )
     subparser.add_argument(
-        '--threshold-bp', metavar='REAL', type=float, default=5e4,
-        help='threshold (in bp) for reporting results (default=50,000)'
+        "--ignore-abundance",
+        action="store_true",
+        help="do NOT use k-mer abundances if present",
     )
     subparser.add_argument(
-        '--ignore-abundance',  action='store_true',
-        help='do NOT use k-mer abundances if present'
+        "--estimate-ani-ci",
+        action="store_true",
+        help="also output confidence intervals for ANI estimates",
     )
     subparser.add_argument(
-        '--estimate-ani-ci', action='store_true',
-        help='also output confidence intervals for ANI estimates'
+        "--fail-on-empty-database",
+        action="store_true",
+        help="stop at databases that contain no compatible signatures",
     )
     subparser.add_argument(
-        '--fail-on-empty-database', action='store_true',
-        help='stop at databases that contain no compatible signatures'
-    )
-    subparser.add_argument(
-        '--no-fail-on-empty-database', action='store_false',
-        dest='fail_on_empty_database',
-        help='continue past databases that contain no compatible signatures'
+        "--no-fail-on-empty-database",
+        action="store_false",
+        dest="fail_on_empty_database",
+        help="continue past databases that contain no compatible signatures",
     )
     subparser.set_defaults(fail_on_empty_database=True)
 
     subparser.add_argument(
-        '--output-dir', '--outdir',
-        help='output CSV results to this directory',
+        "--output-dir",
+        "--outdir",
+        help="output CSV results to this directory",
     )
 
     add_ksize_arg(subparser)
@@ -95,4 +100,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.commands.multigather(args)
diff --git a/src/sourmash/cli/plot.py b/src/sourmash/cli/plot.py
index a548683c39..718a5c8528 100644
--- a/src/sourmash/cli/plot.py
+++ b/src/sourmash/cli/plot.py
@@ -1,64 +1,80 @@
 """plot distance matrix made by 'compare'"""
 
+
 def subparser(subparsers):
-    subparser = subparsers.add_parser('plot')
-    subparser.add_argument(
-        'distances', help='output from "sourmash compare"'
-    )
+    subparser = subparsers.add_parser("plot")
+    subparser.add_argument("distances", help='output from "sourmash compare"')
     subparser.add_argument(
-        '--pdf', action='store_true',
-        help='output PDF; default is PNG'
+        "--pdf", action="store_true", help="output PDF; default is PNG"
     )
     subparser.add_argument(
-        '--labels', action='store_true', default=None,
-        help='show sample labels on dendrogram/matrix'
+        "--labels",
+        action="store_true",
+        default=None,
+        help="show sample labels on dendrogram/matrix",
     )
     subparser.add_argument(
-        '--no-labels', action='store_false', dest='labels',
-        help='do not show sample labels'
+        "--no-labels",
+        action="store_false",
+        dest="labels",
+        help="do not show sample labels",
     )
     subparser.add_argument(
-        '--labeltext',
-        help='filename containing list of labels (overrides signature names); implies --labels'
+        "--labeltext",
+        help="filename containing list of labels (overrides signature names); implies --labels",
     )
     subparser.add_argument(
-        '--indices', action='store_true', default=None,
-        help='show sample indices but not labels; overridden by --labels'
+        "--indices",
+        action="store_true",
+        default=None,
+        help="show sample indices but not labels; overridden by --labels",
     )
     subparser.add_argument(
-        '--no-indices', action='store_false', dest='indices',
-        help='do not show sample indices'
+        "--no-indices",
+        action="store_false",
+        dest="indices",
+        help="do not show sample indices",
     )
     subparser.add_argument(
-        '--vmin', default=0.0, type=float,
-        help='lower limit of heatmap scale; default=%(default)f'
+        "--vmin",
+        default=0.0,
+        type=float,
+        help="lower limit of heatmap scale; default=%(default)f",
     )
     subparser.add_argument(
-        '--vmax', default=1.0, type=float,
-        help='upper limit of heatmap scale; default=%(default)f'
+        "--vmax",
+        default=1.0,
+        type=float,
+        help="upper limit of heatmap scale; default=%(default)f",
     )
     subparser.add_argument(
-        '--subsample', type=int, metavar='N',
-        help='randomly downsample to this many samples, max'
+        "--subsample",
+        type=int,
+        metavar="N",
+        help="randomly downsample to this many samples, max",
     )
     subparser.add_argument(
-        '--subsample-seed', type=int, default=1, metavar='S',
-        help='random seed for --subsample; default=1'
+        "--subsample-seed",
+        type=int,
+        default=1,
+        metavar="S",
+        help="random seed for --subsample; default=1",
     )
     subparser.add_argument(
-        '-f', '--force', action='store_true',
-        help='forcibly plot non-distance matrices'
+        "-f", "--force", action="store_true", help="forcibly plot non-distance matrices"
     )
     subparser.add_argument(
-        '--output-dir', metavar='DIR', help='directory for output plots'
+        "--output-dir", metavar="DIR", help="directory for output plots"
     )
     subparser.add_argument(
-        '--csv', metavar='F',
-        help='write clustered matrix and labels out in CSV format (with column'
-        ' headers) to this file'
+        "--csv",
+        metavar="F",
+        help="write clustered matrix and labels out in CSV format (with column"
+        " headers) to this file",
     )
 
 
 def main(args):
     import sourmash
+
     return sourmash.commands.plot(args)
diff --git a/src/sourmash/cli/prefetch.py b/src/sourmash/cli/prefetch.py
index 3727960292..55ee063d0b 100644
--- a/src/sourmash/cli/prefetch.py
+++ b/src/sourmash/cli/prefetch.py
@@ -1,66 +1,77 @@
 """search a signature against dbs, find all overlaps"""
 
-from sourmash.cli.utils import (add_ksize_arg, add_moltype_args,
-                                add_picklist_args, add_scaled_arg,
-                                add_pattern_args)
+from sourmash.cli.utils import (
+    add_ksize_arg,
+    add_moltype_args,
+    add_picklist_args,
+    add_scaled_arg,
+    add_pattern_args,
+)
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('prefetch')
-    subparser.add_argument('query', help='query signature')
-    subparser.add_argument("databases",
+    subparser = subparsers.add_parser("prefetch")
+    subparser.add_argument("query", help="query signature")
+    subparser.add_argument(
+        "databases",
         nargs="*",
         help="one or more databases to search",
     )
     subparser.add_argument(
         "--db-from-file",
         default=None,
-        help="list of paths containing signatures to search"
-    )
-    subparser.add_argument(
-        "--linear", action='store_true',
-        help="force linear traversal of indexes to minimize loading time and memory use"
+        help="list of paths containing signatures to search",
     )
     subparser.add_argument(
-        '--no-linear', dest="linear", action='store_false',
+        "--linear",
+        action="store_true",
+        help="force linear traversal of indexes to minimize loading time and memory use",
     )
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "--no-linear",
+        dest="linear",
+        action="store_false",
     )
     subparser.add_argument(
-        '-d', '--debug', action='store_true'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
+    subparser.add_argument("-d", "--debug", action="store_true")
     subparser.add_argument(
-        '-o', '--output', metavar='FILE',
-        help='output CSV containing matches to this file'
+        "-o",
+        "--output",
+        metavar="FILE",
+        help="output CSV containing matches to this file",
     )
     subparser.add_argument(
-        '--save-matches', metavar='FILE',
-        help='save all matching signatures from the databases to the '
-        'specified file or directory'
+        "--save-matches",
+        metavar="FILE",
+        help="save all matching signatures from the databases to the "
+        "specified file or directory",
     )
     subparser.add_argument(
-        '--threshold-bp', metavar='REAL', type=float, default=5e4,
-        help='reporting threshold (in bp) for estimated overlap with remaining query hashes (default=50kb)'
+        "--threshold-bp",
+        metavar="REAL",
+        type=float,
+        default=5e4,
+        help="reporting threshold (in bp) for estimated overlap with remaining query hashes (default=50kb)",
     )
     subparser.add_argument(
-        '--save-unmatched-hashes', metavar='FILE',
-        help='output unmatched query hashes as a signature to the '
-        'specified file'
+        "--save-unmatched-hashes",
+        metavar="FILE",
+        help="output unmatched query hashes as a signature to the " "specified file",
     )
     subparser.add_argument(
-        '--save-matching-hashes', metavar='FILE',
-        help='output matching query hashes as a signature to the '
-        'specified file'
+        "--save-matching-hashes",
+        metavar="FILE",
+        help="output matching query hashes as a signature to the " "specified file",
     )
     subparser.add_argument(
-        '--md5', default=None,
-        help='select the signature with this md5 as query'
+        "--md5", default=None, help="select the signature with this md5 as query"
     )
     subparser.add_argument(
-        '--estimate-ani-ci', action='store_true',
-        help='also output confidence intervals for ANI estimates'
+        "--estimate-ani-ci",
+        action="store_true",
+        help="also output confidence intervals for ANI estimates",
     )
     add_ksize_arg(subparser)
     add_moltype_args(subparser)
@@ -71,4 +82,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.commands.prefetch(args)
diff --git a/src/sourmash/cli/sbt_combine.py b/src/sourmash/cli/sbt_combine.py
index 1b5ce0febf..20c09fe57a 100644
--- a/src/sourmash/cli/sbt_combine.py
+++ b/src/sourmash/cli/sbt_combine.py
@@ -1,17 +1,14 @@
 """combine multiple Sequence Bloom Trees"""
 
+
 def subparser(subparsers):
-    subparser = subparsers.add_parser('sbt_combine')
-    subparser.add_argument('sbt_name', help='name to save SBT into')
-    subparser.add_argument(
-        'sbts', nargs='+',
-        help='SBTs to combine to form a new SBT'
-    )
-    subparser.add_argument(
-        '-x', '--bf-size', metavar='S', type=float, default=1e5
-    )
+    subparser = subparsers.add_parser("sbt_combine")
+    subparser.add_argument("sbt_name", help="name to save SBT into")
+    subparser.add_argument("sbts", nargs="+", help="SBTs to combine to form a new SBT")
+    subparser.add_argument("-x", "--bf-size", metavar="S", type=float, default=1e5)
 
 
 def main(args):
     import sourmash
+
     return sourmash.commands.sbt_combine(args)
diff --git a/src/sourmash/cli/scripts/__init__.py b/src/sourmash/cli/scripts/__init__.py
index 7062ff6c71..9655f05c98 100644
--- a/src/sourmash/cli/scripts/__init__.py
+++ b/src/sourmash/cli/scripts/__init__.py
@@ -21,16 +21,20 @@
 # by sourmash.plugins.add_cli_scripts.
 _extension_dict = {}
 
+
 def __getattr__(name):
     if name in _extension_dict:
         return _extension_dict[name]
     raise AttributeError(name)
 
+
 def subparser(subparsers):
-    subparser = subparsers.add_parser('scripts',
-                                      usage=argparse.SUPPRESS,
-                                      formatter_class=argparse.RawDescriptionHelpFormatter,
-                                      aliases=['ext'])
+    subparser = subparsers.add_parser(
+        "scripts",
+        usage=argparse.SUPPRESS,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        aliases=["ext"],
+    )
 
     # get individual help strings:
     descrs = list(sourmash.plugins.get_cli_scripts_descriptions())
@@ -39,10 +43,12 @@ def subparser(subparsers):
     else:
         description = "(No script plugins detected!)"
 
-    s = subparser.add_subparsers(title="available plugin/extension commands",
-                                 dest='subcmd',
-                                 metavar='subcmd',
-                                 help=argparse.SUPPRESS,
-                                 description=description)
+    s = subparser.add_subparsers(
+        title="available plugin/extension commands",
+        dest="subcmd",
+        metavar="subcmd",
+        help=argparse.SUPPRESS,
+        description=description,
+    )
 
     _extension_dict.update(sourmash.plugins.add_cli_scripts(s))
diff --git a/src/sourmash/cli/search.py b/src/sourmash/cli/search.py
index 2c11873963..46bf46723b 100644
--- a/src/sourmash/cli/search.py
+++ b/src/sourmash/cli/search.py
@@ -1,6 +1,6 @@
 """search a signature against other signatures"""
 
-usage="""
+usage = """
 
 The `search` subcommand searches a collection of signatures or SBTs
 for matches to the query signature.  It can search for matches with
@@ -41,77 +41,95 @@
 ---
 """
 
-from sourmash.cli.utils import (add_ksize_arg, add_moltype_args,
-                                add_picklist_args, add_scaled_arg,
-                                add_pattern_args)
+from sourmash.cli.utils import (
+    add_ksize_arg,
+    add_moltype_args,
+    add_picklist_args,
+    add_scaled_arg,
+    add_pattern_args,
+)
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('search', description=__doc__, usage=usage)
+    subparser = subparsers.add_parser("search", description=__doc__, usage=usage)
+    subparser.add_argument("query", help="query signature")
     subparser.add_argument(
-        'query', help='query signature'
+        "databases",
+        nargs="+",
+        help="signatures/SBTs to search",
     )
     subparser.add_argument(
-        'databases', nargs='+',
-        help='signatures/SBTs to search',
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-d", "--debug", action="store_true", help="output debug information"
     )
     subparser.add_argument(
-        '-d', '--debug', action='store_true',
-        help='output debug information'
+        "-t",
+        "--threshold",
+        metavar="T",
+        default=0.08,
+        type=float,
+        help="minimum threshold for reporting matches; default=0.08",
     )
     subparser.add_argument(
-        '-t', '--threshold', metavar='T', default=0.08, type=float,
-        help='minimum threshold for reporting matches; default=0.08'
+        "--save-matches",
+        metavar="FILE",
+        help="output matching signatures to the specified file",
     )
     subparser.add_argument(
-        '--save-matches', metavar='FILE',
-        help='output matching signatures to the specified file'
+        "--best-only",
+        action="store_true",
+        help="report only the best match (with greater speed)",
     )
     subparser.add_argument(
-        '--best-only', action='store_true',
-        help='report only the best match (with greater speed)'
+        "-n",
+        "--num-results",
+        default=3,
+        type=int,
+        metavar="N",
+        help="number of results to display to user; 0 to report all",
     )
     subparser.add_argument(
-        '-n', '--num-results', default=3, type=int, metavar='N',
-        help='number of results to display to user; 0 to report all'
+        "--containment",
+        action="store_true",
+        help="score based on containment rather than similarity",
     )
     subparser.add_argument(
-        '--containment', action='store_true',
-        help='score based on containment rather than similarity'
+        "--max-containment",
+        action="store_true",
+        help="score based on max containment rather than similarity",
     )
     subparser.add_argument(
-        '--max-containment', action='store_true',
-        help='score based on max containment rather than similarity'
+        "--estimate-ani-ci",
+        action="store_true",
+        help="for containment searches, also output confidence intervals for ANI estimates",
     )
     subparser.add_argument(
-        '--estimate-ani-ci', action='store_true',
-        help='for containment searches, also output confidence intervals for ANI estimates'
+        "--ignore-abundance",
+        action="store_true",
+        help="do NOT use k-mer abundances if present; note: has no effect if "
+        "--containment or --max-containment is specified",
     )
     subparser.add_argument(
-        '--ignore-abundance', action='store_true',
-        help='do NOT use k-mer abundances if present; note: has no effect if '
-        '--containment or --max-containment is specified'
+        "-o",
+        "--output",
+        metavar="FILE",
+        help="output CSV containing matches to this file",
     )
     subparser.add_argument(
-        '-o', '--output', metavar='FILE',
-        help='output CSV containing matches to this file'
+        "--md5", default=None, help="select the signature with this md5 as query"
     )
     subparser.add_argument(
-        '--md5', default=None,
-        help='select the signature with this md5 as query'
+        "--fail-on-empty-database",
+        action="store_true",
+        help="stop at databases that contain no compatible signatures",
     )
     subparser.add_argument(
-        '--fail-on-empty-database', action='store_true',
-        help='stop at databases that contain no compatible signatures'
-    )
-    subparser.add_argument(
-        '--no-fail-on-empty-database', action='store_false',
-        dest='fail_on_empty_database',
-        help='continue past databases that contain no compatible signatures'
+        "--no-fail-on-empty-database",
+        action="store_false",
+        dest="fail_on_empty_database",
+        help="continue past databases that contain no compatible signatures",
     )
     subparser.set_defaults(fail_on_empty_database=True)
 
@@ -124,4 +142,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.commands.search(args)
diff --git a/src/sourmash/cli/sig/__init__.py b/src/sourmash/cli/sig/__init__.py
index f256a7473d..2ea27abf1d 100644
--- a/src/sourmash/cli/sig/__init__.py
+++ b/src/sourmash/cli/sig/__init__.py
@@ -33,19 +33,27 @@
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('sig', formatter_class=RawDescriptionHelpFormatter, usage=SUPPRESS, aliases=['signature'])
-    desc = 'Operations\n'
+    subparser = subparsers.add_parser(
+        "sig",
+        formatter_class=RawDescriptionHelpFormatter,
+        usage=SUPPRESS,
+        aliases=["signature"],
+    )
+    desc = "Operations\n"
     clidir = os.path.dirname(__file__)
     ops = command_list(clidir)
     for subcmd in ops:
         docstring = getattr(sys.modules[__name__], subcmd).__doc__
-        helpstring = 'sourmash sig {op:s} --help'.format(op=subcmd)
-        desc += '        {hs:33s} {ds:s}\n'.format(hs=helpstring, ds=docstring)
+        helpstring = f"sourmash sig {subcmd:s} --help"
+        desc += f"        {helpstring:33s} {docstring:s}\n"
     s = subparser.add_subparsers(
-        title='Manipulate signature files', dest='subcmd', metavar='subcmd', help=SUPPRESS,
-        description=desc
+        title="Manipulate signature files",
+        dest="subcmd",
+        metavar="subcmd",
+        help=SUPPRESS,
+        description=desc,
     )
     for subcmd in ops:
         getattr(sys.modules[__name__], subcmd).subparser(s)
     subparser._action_groups.reverse()
-    subparser._optionals.title = 'Options'
+    subparser._optionals.title = "Options"
diff --git a/src/sourmash/cli/sig/cat.py b/src/sourmash/cli/sig/cat.py
index ed85932f5f..b84905f254 100644
--- a/src/sourmash/cli/sig/cat.py
+++ b/src/sourmash/cli/sig/cat.py
@@ -1,6 +1,6 @@
 """concatenate signature files"""
 
-usage="""
+usage = """
 
 ### `sourmash signature cat` - concatenate multiple signatures together
 
@@ -15,37 +15,43 @@
 
 """
 
-from sourmash.cli.utils import (add_moltype_args, add_ksize_arg,
-                                add_picklist_args, add_pattern_args)
+from sourmash.cli.utils import (
+    add_moltype_args,
+    add_ksize_arg,
+    add_picklist_args,
+    add_pattern_args,
+)
 
 
 def subparser(subparsers):
     # working on this
-    subparser = subparsers.add_parser('cat', description=__doc__, usage=usage)
-    subparser.add_argument('signatures', nargs='*')
+    subparser = subparsers.add_parser("cat", description=__doc__, usage=usage)
+    subparser.add_argument("signatures", nargs="*")
     subparser.add_argument(
-        '--from-file',
-        help='a text file containing a list of files to load signatures from'
+        "--from-file",
+        help="a text file containing a list of files to load signatures from",
     )
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-d', '--debug', action='store_true',
-        help='provide debugging output'
+        "-d", "--debug", action="store_true", help="provide debugging output"
     )
     subparser.add_argument(
-        '-o', '--output', metavar='FILE', default='-',
-        help='output signature to this file (default stdout)'
+        "-o",
+        "--output",
+        metavar="FILE",
+        default="-",
+        help="output signature to this file (default stdout)",
     )
     subparser.add_argument(
-        '-u', '--unique', action='store_true',
-        help='keep only distinct signatures, removing duplicates (based on md5sum)'
+        "-u",
+        "--unique",
+        action="store_true",
+        help="keep only distinct signatures, removing duplicates (based on md5sum)",
     )
     subparser.add_argument(
-        '-f', '--force', action='store_true',
-        help='try to load all files as signatures'
+        "-f", "--force", action="store_true", help="try to load all files as signatures"
     )
     add_ksize_arg(subparser)
     add_moltype_args(subparser)
@@ -55,4 +61,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.sig.__main__.cat(args)
diff --git a/src/sourmash/cli/sig/check.py b/src/sourmash/cli/sig/check.py
index b9dd353501..a4c940eecb 100644
--- a/src/sourmash/cli/sig/check.py
+++ b/src/sourmash/cli/sig/check.py
@@ -1,6 +1,6 @@
 """check signature collections against a picklist"""
 
-usage="""
+usage = """
 
     sourmash sig check <filenames> --picklist ... -o miss.csv -m manifest.csv
 
@@ -15,51 +15,57 @@
 
 """
 
-from sourmash.cli.utils import (add_moltype_args, add_ksize_arg,
-                                add_picklist_args, add_pattern_args)
+from sourmash.cli.utils import (
+    add_moltype_args,
+    add_ksize_arg,
+    add_picklist_args,
+    add_pattern_args,
+)
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('check', usage=usage)
-    subparser.add_argument('signatures', nargs='*')
+    subparser = subparsers.add_parser("check", usage=usage)
+    subparser.add_argument("signatures", nargs="*")
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-d', '--debug', action='store_true',
-        help='provide debugging output'
+        "-d", "--debug", action="store_true", help="provide debugging output"
     )
     subparser.add_argument(
-        '-o', '--output-missing', metavar='FILE',
-        help='output picklist with remaining unmatched entries to this file',
+        "-o",
+        "--output-missing",
+        metavar="FILE",
+        help="output picklist with remaining unmatched entries to this file",
     )
     subparser.add_argument(
-        '-f', '--force', action='store_true',
-        help='try to load all files as signatures'
+        "-f", "--force", action="store_true", help="try to load all files as signatures"
     )
     subparser.add_argument(
-        '--from-file',
-        help='a text file containing a list of files to load signatures from'
+        "--from-file",
+        help="a text file containing a list of files to load signatures from",
     )
     subparser.add_argument(
-        '-m', '--save-manifest-matching',
-        help='save a manifest of the matching entries to this file.'
+        "-m",
+        "--save-manifest-matching",
+        help="save a manifest of the matching entries to this file.",
     )
     subparser.add_argument(
-        '--fail-if-missing', action='store_true',
-        help='exit with an error code (-1) if there are any missing picklist values.'
+        "--fail-if-missing",
+        action="store_true",
+        help="exit with an error code (-1) if there are any missing picklist values.",
     )
     subparser.add_argument(
-        '--no-require-manifest',
-        help='do not require a manifest; generate dynamically if needed',
-        action='store_true'
+        "--no-require-manifest",
+        help="do not require a manifest; generate dynamically if needed",
+        action="store_true",
     )
     subparser.add_argument(
-        '-F', '--manifest-format',
+        "-F",
+        "--manifest-format",
         help="format of manifest output file; default is 'csv')",
-        default='csv',
-        choices=['csv', 'sql'],
+        default="csv",
+        choices=["csv", "sql"],
     )
 
     add_ksize_arg(subparser)
@@ -70,4 +76,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.sig.__main__.check(args)
diff --git a/src/sourmash/cli/sig/collect.py b/src/sourmash/cli/sig/collect.py
index 397b0bf34e..1e5d8ded2f 100644
--- a/src/sourmash/cli/sig/collect.py
+++ b/src/sourmash/cli/sig/collect.py
@@ -1,6 +1,6 @@
 """collect manifest information across many files"""
 
-usage="""
+usage = """
 
     sourmash sig collect <filenames> -o all.sqlmf
 
@@ -13,45 +13,49 @@
 
 """
 
-from sourmash.cli.utils import (add_moltype_args, add_ksize_arg,
-                                add_picklist_args, add_pattern_args)
+from sourmash.cli.utils import (
+    add_moltype_args,
+    add_ksize_arg,
+    add_picklist_args,
+    add_pattern_args,
+)
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('collect', usage=usage)
-    subparser.add_argument('locations', nargs='*',
-                           help='locations of input signatures')
-    subparser.add_argument('-o', '--output', help='manifest output file',
-                   required=True)
+    subparser = subparsers.add_parser("collect", usage=usage)
+    subparser.add_argument("locations", nargs="*", help="locations of input signatures")
+    subparser.add_argument("-o", "--output", help="manifest output file", required=True)
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-d', '--debug', action='store_true',
-        help='provide debugging output'
+        "-d", "--debug", action="store_true", help="provide debugging output"
     )
     subparser.add_argument(
-        '--from-file',
-        help='a text file containing a list of files to load signatures from'
+        "--from-file",
+        help="a text file containing a list of files to load signatures from",
     )
     subparser.add_argument(
-        '--no-require-manifest',
-        help='do not require a manifest; generate dynamically if needed',
-        action='store_true'
+        "--no-require-manifest",
+        help="do not require a manifest; generate dynamically if needed",
+        action="store_true",
     )
     subparser.add_argument(
-        '-F', '--manifest-format',
+        "-F",
+        "--manifest-format",
         help="format of manifest output file; default is 'csv')",
-        default='sql',
-        choices=['csv', 'sql'],
+        default="sql",
+        choices=["csv", "sql"],
     )
 
-    subparser.add_argument('--merge-previous', action='store_true',
-                           help='merge new manifests into existing')
-    subparser.add_argument('--abspath',
-                           help="convert all locations to absolute paths",
-                           action='store_true')
+    subparser.add_argument(
+        "--merge-previous",
+        action="store_true",
+        help="merge new manifests into existing",
+    )
+    subparser.add_argument(
+        "--abspath", help="convert all locations to absolute paths", action="store_true"
+    )
 
     add_ksize_arg(subparser)
     add_moltype_args(subparser)
@@ -59,4 +63,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.sig.__main__.collect(args)
diff --git a/src/sourmash/cli/sig/describe.py b/src/sourmash/cli/sig/describe.py
index c59ea1fede..a7984e89d3 100644
--- a/src/sourmash/cli/sig/describe.py
+++ b/src/sourmash/cli/sig/describe.py
@@ -1,6 +1,6 @@
 """show details of signature"""
 
-usage="""
+usage = """
 
 ### `sourmash signature describe` - display detailed information about signatures
 
@@ -22,32 +22,32 @@
 
 """
 
-from sourmash.cli.utils import (add_moltype_args, add_ksize_arg,
-                                add_picklist_args, add_pattern_args)
+from sourmash.cli.utils import (
+    add_moltype_args,
+    add_ksize_arg,
+    add_picklist_args,
+    add_pattern_args,
+)
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('describe', description=__doc__, usage=usage)
-    subparser.add_argument('signatures', nargs='*')
+    subparser = subparsers.add_parser("describe", description=__doc__, usage=usage)
+    subparser.add_argument("signatures", nargs="*")
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-d', '--debug', action='store_true',
-        help='provide debugging output'
+        "-d", "--debug", action="store_true", help="provide debugging output"
     )
     subparser.add_argument(
-        '--csv', metavar='FILE',
-        help='output information to a CSV file'
+        "--csv", metavar="FILE", help="output information to a CSV file"
     )
     subparser.add_argument(
-        '-f', '--force', action='store_true',
-        help='try to load all files as signatures'
+        "-f", "--force", action="store_true", help="try to load all files as signatures"
     )
     subparser.add_argument(
-        '--from-file',
-        help='a text file containing a list of files to load signatures from'
+        "--from-file",
+        help="a text file containing a list of files to load signatures from",
     )
     add_ksize_arg(subparser)
     add_moltype_args(subparser)
@@ -57,4 +57,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.sig.__main__.describe(args)
diff --git a/src/sourmash/cli/sig/downsample.py b/src/sourmash/cli/sig/downsample.py
index a06b7d2eb5..7a39221d29 100644
--- a/src/sourmash/cli/sig/downsample.py
+++ b/src/sourmash/cli/sig/downsample.py
@@ -1,6 +1,6 @@
 """downsample one or more signatures"""
 
-usage="""
+usage = """
 
 ### `sourmash signature downsample` - decrease the size of a signature
 
@@ -26,33 +26,36 @@
 
 """
 
-from sourmash.cli.utils import (add_moltype_args, add_ksize_arg,
-                                add_picklist_args, add_num_arg)
+from sourmash.cli.utils import (
+    add_moltype_args,
+    add_ksize_arg,
+    add_picklist_args,
+    add_num_arg,
+)
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('downsample', description=__doc__, usage=usage)
-    subparser.add_argument('signatures', nargs="*")
+    subparser = subparsers.add_parser("downsample", description=__doc__, usage=usage)
+    subparser.add_argument("signatures", nargs="*")
     subparser.add_argument(
-        '--scaled', type=int, default=0,
-        help='scaled value to downsample to'
+        "--scaled", type=int, default=0, help="scaled value to downsample to"
     )
     subparser.add_argument(
-        '--from-file',
-        help='a text file containing a list of files to load signatures from'
+        "--from-file",
+        help="a text file containing a list of files to load signatures from",
     )
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-o', '--output', metavar='FILE',
-        help='output signature to this file (default stdout)',
-        default='-',
+        "-o",
+        "--output",
+        metavar="FILE",
+        help="output signature to this file (default stdout)",
+        default="-",
     )
     subparser.add_argument(
-        '-f', '--force', action='store_true',
-        help='try to load all files as signatures'
+        "-f", "--force", action="store_true", help="try to load all files as signatures"
     )
     add_ksize_arg(subparser)
     add_moltype_args(subparser)
@@ -62,4 +65,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.sig.__main__.downsample(args)
diff --git a/src/sourmash/cli/sig/export.py b/src/sourmash/cli/sig/export.py
index 0299dba5d1..b6a4142d39 100644
--- a/src/sourmash/cli/sig/export.py
+++ b/src/sourmash/cli/sig/export.py
@@ -1,6 +1,6 @@
 """export a signature, e.g. to mash"""
 
-usage="""
+usage = """
 
 ### `sourmash signature export` - export signatures to mash.
 
@@ -17,19 +17,19 @@
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('export', description=__doc__, usage=usage)
-    subparser.add_argument('filename')
+    subparser = subparsers.add_parser("export", description=__doc__, usage=usage)
+    subparser.add_argument("filename")
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-o', '--output', metavar='FILE',
-        help='output signature to this file (default stdout)'
+        "-o",
+        "--output",
+        metavar="FILE",
+        help="output signature to this file (default stdout)",
     )
     subparser.add_argument(
-        '--md5', default=None,
-        help='select the signature with this md5 as query'
+        "--md5", default=None, help="select the signature with this md5 as query"
     )
     add_ksize_arg(subparser)
     add_moltype_args(subparser)
@@ -37,4 +37,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.sig.__main__.export(args)
diff --git a/src/sourmash/cli/sig/extract.py b/src/sourmash/cli/sig/extract.py
index a482526290..d3c483bb5e 100644
--- a/src/sourmash/cli/sig/extract.py
+++ b/src/sourmash/cli/sig/extract.py
@@ -1,6 +1,6 @@
 """extract one or more signatures"""
 
-usage="""
+usage = """
 
 ### `sourmash signature extract` - extract signatures from a collection
 
@@ -37,37 +37,43 @@
 
 """
 
-from sourmash.cli.utils import (add_moltype_args, add_ksize_arg,
-                                add_picklist_args, add_pattern_args)
+from sourmash.cli.utils import (
+    add_moltype_args,
+    add_ksize_arg,
+    add_picklist_args,
+    add_pattern_args,
+)
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('extract', description=__doc__, usage=usage)
-    subparser.add_argument('signatures', nargs='*')
+    subparser = subparsers.add_parser("extract", description=__doc__, usage=usage)
+    subparser.add_argument("signatures", nargs="*")
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-o', '--output', metavar='FILE',
-        help='output signature to this file (default stdout)',
-        default='-',
+        "-o",
+        "--output",
+        metavar="FILE",
+        help="output signature to this file (default stdout)",
+        default="-",
     )
     subparser.add_argument(
-        '--md5', default=None,
-        help='select signatures whose md5 contains this substring'
+        "--md5",
+        default=None,
+        help="select signatures whose md5 contains this substring",
     )
     subparser.add_argument(
-        '--name', default=None,
-        help='select signatures whose name contains this substring'
+        "--name",
+        default=None,
+        help="select signatures whose name contains this substring",
     )
     subparser.add_argument(
-        '-f', '--force', action='store_true',
-        help='try to load all files as signatures'
+        "-f", "--force", action="store_true", help="try to load all files as signatures"
     )
     subparser.add_argument(
-        '--from-file',
-        help='a text file containing a list of files to load signatures from'
+        "--from-file",
+        help="a text file containing a list of files to load signatures from",
     )
     add_ksize_arg(subparser)
     add_moltype_args(subparser)
@@ -77,4 +83,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.sig.__main__.extract(args)
diff --git a/src/sourmash/cli/sig/fileinfo.py b/src/sourmash/cli/sig/fileinfo.py
index 0b5e71df71..52a894fafb 100644
--- a/src/sourmash/cli/sig/fileinfo.py
+++ b/src/sourmash/cli/sig/fileinfo.py
@@ -1,6 +1,6 @@
 """provide summary information on the given file"""
 
-usage="""
+usage = """
 
     sourmash sig fileinfo <filename>
 
@@ -14,33 +14,27 @@
 """
 
 
-
 def subparser(subparsers):
-    subparser = subparsers.add_parser('fileinfo', aliases=['summarize'],
-                                      usage=usage)
-    subparser.add_argument('path')
+    subparser = subparsers.add_parser("fileinfo", aliases=["summarize"], usage=usage)
+    subparser.add_argument("path")
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-d', '--debug', action='store_true',
-        help='output debug information'
+        "-d", "--debug", action="store_true", help="output debug information"
     )
     subparser.add_argument(
-        '-f', '--force', action='store_true',
-        help='try to load all files as signatures'
+        "-f", "--force", action="store_true", help="try to load all files as signatures"
     )
     subparser.add_argument(
-        '--rebuild-manifest', help='forcibly rebuild the manifest',
-        action='store_true'
+        "--rebuild-manifest", help="forcibly rebuild the manifest", action="store_true"
     )
     subparser.add_argument(
-        '--json-out', help='output information in JSON format only',
-        action='store_true'
+        "--json-out", help="output information in JSON format only", action="store_true"
     )
 
 
 def main(args):
     import sourmash
+
     return sourmash.sig.__main__.fileinfo(args)
diff --git a/src/sourmash/cli/sig/filter.py b/src/sourmash/cli/sig/filter.py
index 4f5f020d83..3cfaa2c7a2 100644
--- a/src/sourmash/cli/sig/filter.py
+++ b/src/sourmash/cli/sig/filter.py
@@ -1,6 +1,6 @@
 """filter k-mers on abundance"""
 
-usage="""
+usage = """
 
 ### `sourmash signature filter` - remove hashes based on abundance
 
@@ -25,32 +25,43 @@
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('filter', description=__doc__, usage=usage)
-    subparser.add_argument('signatures', nargs='+')
+    subparser = subparsers.add_parser("filter", description=__doc__, usage=usage)
+    subparser.add_argument("signatures", nargs="+")
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-o', '--output', metavar='FILE',
-        help='output signature to this file (default stdout)',
-        default='-'
+        "-o",
+        "--output",
+        metavar="FILE",
+        help="output signature to this file (default stdout)",
+        default="-",
     )
     subparser.add_argument(
-        '--md5', type=str, default=None,
-        help='select signatures whose md5 contains this substring'
+        "--md5",
+        type=str,
+        default=None,
+        help="select signatures whose md5 contains this substring",
     )
     subparser.add_argument(
-        '--name', type=str, default=None,
-        help='select signatures whose name contains this substring'
+        "--name",
+        type=str,
+        default=None,
+        help="select signatures whose name contains this substring",
     )
     subparser.add_argument(
-        '-m', '--min-abundance', type=int, default=1,
-        help='keep hashes >= this minimum abundance'
+        "-m",
+        "--min-abundance",
+        type=int,
+        default=1,
+        help="keep hashes >= this minimum abundance",
     )
     subparser.add_argument(
-        '-M', '--max-abundance', type=int, default=None,
-        help='keep hashes <= this maximum abundance'
+        "-M",
+        "--max-abundance",
+        type=int,
+        default=None,
+        help="keep hashes <= this maximum abundance",
     )
     add_ksize_arg(subparser)
     add_moltype_args(subparser)
@@ -58,4 +69,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.sig.__main__.filter(args)
diff --git a/src/sourmash/cli/sig/flatten.py b/src/sourmash/cli/sig/flatten.py
index ca87b171c1..fa75f3434c 100644
--- a/src/sourmash/cli/sig/flatten.py
+++ b/src/sourmash/cli/sig/flatten.py
@@ -1,6 +1,6 @@
 """remove abundances"""
 
-usage="""
+usage = """
 
 ### `sourmash signature flatten` - remove abundance information from signatures
 
@@ -18,37 +18,38 @@
 
 """
 
-from sourmash.cli.utils import (add_moltype_args, add_ksize_arg,
-                                add_picklist_args)
+from sourmash.cli.utils import add_moltype_args, add_ksize_arg, add_picklist_args
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('flatten', description=__doc__, usage=usage)
-    subparser.add_argument('signatures', nargs='*')
+    subparser = subparsers.add_parser("flatten", description=__doc__, usage=usage)
+    subparser.add_argument("signatures", nargs="*")
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-o', '--output', metavar='FILE',
-        help='output signature to this file (default stdout)',
-        default='-',
+        "-o",
+        "--output",
+        metavar="FILE",
+        help="output signature to this file (default stdout)",
+        default="-",
     )
     subparser.add_argument(
-        '--md5', default=None,
-        help='select signatures whose md5 contains this substring'
+        "--md5",
+        default=None,
+        help="select signatures whose md5 contains this substring",
     )
     subparser.add_argument(
-        '--name', default=None,
-        help='select signatures whose name contains this substring'
+        "--name",
+        default=None,
+        help="select signatures whose name contains this substring",
     )
     subparser.add_argument(
-        '-f', '--force', action='store_true',
-        help='try to load all files as signatures'
+        "-f", "--force", action="store_true", help="try to load all files as signatures"
     )
     subparser.add_argument(
-        '--from-file',
-        help='a text file containing a list of files to load signatures from'
+        "--from-file",
+        help="a text file containing a list of files to load signatures from",
     )
     add_ksize_arg(subparser)
     add_moltype_args(subparser)
@@ -57,4 +58,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.sig.__main__.flatten(args)
diff --git a/src/sourmash/cli/sig/grep.py b/src/sourmash/cli/sig/grep.py
index 03d93299da..bf1c5ccf4a 100644
--- a/src/sourmash/cli/sig/grep.py
+++ b/src/sourmash/cli/sig/grep.py
@@ -1,6 +1,6 @@
 """extract one or more signatures by substr/regex match"""
 
-usage="""
+usage = """
     sourmash sig grep <pattern> <filename> [... <filenames>]
 
 This will search for the provided pattern in the files or databases,
@@ -26,63 +26,67 @@
 
 """
 
-from sourmash.cli.utils import (add_moltype_args, add_ksize_arg,
-                                add_picklist_args)
+from sourmash.cli.utils import add_moltype_args, add_ksize_arg, add_picklist_args
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('grep', usage=usage)
-    subparser.add_argument('pattern', help='search pattern (string/regex)')
-    subparser.add_argument('signatures', nargs='*')
+    subparser = subparsers.add_parser("grep", usage=usage)
+    subparser.add_argument("pattern", help="search pattern (string/regex)")
+    subparser.add_argument("signatures", nargs="*")
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-d', '--debug', action='store_true',
-        help='output debug information'
+        "-d", "--debug", action="store_true", help="output debug information"
     )
     subparser.add_argument(
-        '-o', '--output', metavar='FILE',
-        help='output matching signatures to this file (default stdout)',
-        default='-',
+        "-o",
+        "--output",
+        metavar="FILE",
+        help="output matching signatures to this file (default stdout)",
+        default="-",
     )
     subparser.add_argument(
-        '-f', '--force', action='store_true',
-        help='try to load all files as signatures, independent of filename'
+        "-f",
+        "--force",
+        action="store_true",
+        help="try to load all files as signatures, independent of filename",
     )
     subparser.add_argument(
-        '--from-file',
-        help='a text file containing a list of files to load signatures from'
+        "--from-file",
+        help="a text file containing a list of files to load signatures from",
     )
     subparser.add_argument(
-        '-v', '--invert-match',
+        "-v",
+        "--invert-match",
         help="select non-matching signatures",
-        action="store_true"
+        action="store_true",
     )
     subparser.add_argument(
-        '-i', '--ignore-case',
+        "-i",
+        "--ignore-case",
         help="ignore case distinctions (search lower and upper case both)",
-        action="store_true"
+        action="store_true",
     )
     subparser.add_argument(
-        '--no-require-manifest',
-        help='do not require a manifest; generate dynamically if needed',
-        action='store_true'
+        "--no-require-manifest",
+        help="do not require a manifest; generate dynamically if needed",
+        action="store_true",
     )
     subparser.add_argument(
-        '--csv',
-        help='save CSV file containing signature data in manifest format'
+        "--csv", help="save CSV file containing signature data in manifest format"
     )
     subparser.add_argument(
-        '--silent', '--no-signatures-output',
+        "--silent",
+        "--no-signatures-output",
         help="do not output signatures",
-        action='store_true',
+        action="store_true",
     )
     subparser.add_argument(
-        '-c', '--count',
+        "-c",
+        "--count",
         help="only output a count of discovered signatures; implies --silent",
-        action='store_true'
+        action="store_true",
     )
     add_ksize_arg(subparser)
     add_moltype_args(subparser)
@@ -91,4 +95,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash.sig.grep
+
     return sourmash.sig.grep.main(args)
diff --git a/src/sourmash/cli/sig/inflate.py b/src/sourmash/cli/sig/inflate.py
index c5a247727a..50b86e6dcf 100644
--- a/src/sourmash/cli/sig/inflate.py
+++ b/src/sourmash/cli/sig/inflate.py
@@ -1,24 +1,24 @@
 """borrow abundances from one signature => one or more other signatures"""
 
-from sourmash.cli.utils import (add_moltype_args, add_ksize_arg,
-                                add_picklist_args)
+from sourmash.cli.utils import add_moltype_args, add_ksize_arg, add_picklist_args
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('inflate')
-    subparser.add_argument('signature_from')
-    subparser.add_argument('other_sigs', nargs='+')
+    subparser = subparsers.add_parser("inflate")
+    subparser.add_argument("signature_from")
+    subparser.add_argument("other_sigs", nargs="+")
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-o', '--output', metavar='FILE', default='-',
-        help='output signature to this file (default stdout)'
+        "-o",
+        "--output",
+        metavar="FILE",
+        default="-",
+        help="output signature to this file (default stdout)",
     )
     subparser.add_argument(
-        '-f', '--force', action='store_true',
-        help='try to load all files as signatures'
+        "-f", "--force", action="store_true", help="try to load all files as signatures"
     )
     add_ksize_arg(subparser)
     add_moltype_args(subparser)
@@ -27,4 +27,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.sig.__main__.inflate(args)
diff --git a/src/sourmash/cli/sig/ingest.py b/src/sourmash/cli/sig/ingest.py
index 9c7d9e0547..99e84f7a63 100644
--- a/src/sourmash/cli/sig/ingest.py
+++ b/src/sourmash/cli/sig/ingest.py
@@ -1,6 +1,6 @@
 """ingest/import a mash or other signature"""
 
-usage="""
+usage = """
 
    sourmash sig ingest --csv <input filename> [ <more inputs> ] -o <output>
 
@@ -16,21 +16,25 @@
 def subparser(subparsers):
     # Dirty hack to simultaneously support new and previous interface
     # If desired, this function can be removed with a major version bump.
-    for cmd in ('ingest', 'import'):
+    for cmd in ("ingest", "import"):
         subparser = subparsers.add_parser(cmd, usage=usage)
-        subparser.add_argument('--csv', action='store_true',
-                               help='import in Mash CSV format')
-        subparser.add_argument('filenames', nargs='+')
         subparser.add_argument(
-            '-q', '--quiet', action='store_true',
-            help='suppress non-error output'
+            "--csv", action="store_true", help="import in Mash CSV format"
         )
+        subparser.add_argument("filenames", nargs="+")
         subparser.add_argument(
-            '-o', '--output', metavar='FILE', default='-',
-            help='output signature to this file (default stdout)'
+            "-q", "--quiet", action="store_true", help="suppress non-error output"
+        )
+        subparser.add_argument(
+            "-o",
+            "--output",
+            metavar="FILE",
+            default="-",
+            help="output signature to this file (default stdout)",
         )
 
 
 def main(args):
     import sourmash
+
     return sourmash.sig.__main__.ingest(args)
diff --git a/src/sourmash/cli/sig/intersect.py b/src/sourmash/cli/sig/intersect.py
index 4a5ea4db23..521e83f10f 100644
--- a/src/sourmash/cli/sig/intersect.py
+++ b/src/sourmash/cli/sig/intersect.py
@@ -1,6 +1,6 @@
 """intersect two or more signatures"""
 
-usage="""
+usage = """
 
 ### `sourmash signature intersect` - intersect two (or more) signatures
 
@@ -22,32 +22,34 @@
 
 """
 
-from sourmash.cli.utils import (add_moltype_args, add_ksize_arg,
-                                add_picklist_args)
+from sourmash.cli.utils import add_moltype_args, add_ksize_arg, add_picklist_args
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('intersect', description=__doc__, usage=usage)
-    subparser.add_argument('signatures', nargs='*')
+    subparser = subparsers.add_parser("intersect", description=__doc__, usage=usage)
+    subparser.add_argument("signatures", nargs="*")
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-o', '--output', metavar='FILE', default='-',
-        help='output signature to this file (default stdout)'
+        "-o",
+        "--output",
+        metavar="FILE",
+        default="-",
+        help="output signature to this file (default stdout)",
     )
     subparser.add_argument(
-        '-A', '--abundances-from', metavar='FILE',
-        help='intersect with & take abundances from this signature'
+        "-A",
+        "--abundances-from",
+        metavar="FILE",
+        help="intersect with & take abundances from this signature",
     )
     subparser.add_argument(
-        '-f', '--force', action='store_true',
-        help='try to load all files as signatures'
+        "-f", "--force", action="store_true", help="try to load all files as signatures"
     )
     subparser.add_argument(
-        '--from-file',
-        help='a text file containing a list of files to load signatures from'
+        "--from-file",
+        help="a text file containing a list of files to load signatures from",
     )
     add_ksize_arg(subparser)
     add_moltype_args(subparser)
@@ -56,4 +58,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.sig.__main__.intersect(args)
diff --git a/src/sourmash/cli/sig/kmers.py b/src/sourmash/cli/sig/kmers.py
index 08863f33c9..98d7ee9d8d 100644
--- a/src/sourmash/cli/sig/kmers.py
+++ b/src/sourmash/cli/sig/kmers.py
@@ -1,6 +1,6 @@
 """show k-mers/sequences matching the signature hashes"""
 
-usage="""
+usage = """
 
 ### `sourmash signature kmers` - extract k-mers and/or sequences that match to signatures
 
@@ -48,44 +48,52 @@
 
 """
 
-from sourmash.cli.utils import (add_moltype_args, add_ksize_arg,
-                                add_picklist_args)
+from sourmash.cli.utils import add_moltype_args, add_ksize_arg, add_picklist_args
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('kmers', description=__doc__, usage=usage)
-    subparser.add_argument('--signatures', nargs='*', default=[])
+    subparser = subparsers.add_parser("kmers", description=__doc__, usage=usage)
+    subparser.add_argument("--signatures", nargs="*", default=[])
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-f', '--force', action='store_true',
-        help='try to load all files as signatures'
+        "-f", "--force", action="store_true", help="try to load all files as signatures"
     )
     subparser.add_argument(
-        '--from-file',
-        help='a text file containing a list of files to load signatures from'
+        "--from-file",
+        help="a text file containing a list of files to load signatures from",
     )
     add_ksize_arg(subparser)
     add_moltype_args(subparser)
     add_picklist_args(subparser)
 
-    subparser.add_argument('--sequences', nargs='+', required=True,
-                           help="FASTA/FASTQ/bz2/gz files with sequences")
+    subparser.add_argument(
+        "--sequences",
+        nargs="+",
+        required=True,
+        help="FASTA/FASTQ/bz2/gz files with sequences",
+    )
 
-    subparser.add_argument('--save-kmers',
-                           help="save k-mers and hash values to a CSV file")
-    subparser.add_argument('--save-sequences',
-                           help="save sequences with matching hashes to a FASTA file")
-    subparser.add_argument('--translate', action="store_true",
-                           help="translate DNA k-mers into amino acids (for protein, dayhoff, and hp sketches)")
     subparser.add_argument(
-        '--check-sequence', action='store_true',
-        help='complain if input sequence is invalid (NOTE: only checks DNA)'
+        "--save-kmers", help="save k-mers and hash values to a CSV file"
+    )
+    subparser.add_argument(
+        "--save-sequences", help="save sequences with matching hashes to a FASTA file"
+    )
+    subparser.add_argument(
+        "--translate",
+        action="store_true",
+        help="translate DNA k-mers into amino acids (for protein, dayhoff, and hp sketches)",
+    )
+    subparser.add_argument(
+        "--check-sequence",
+        action="store_true",
+        help="complain if input sequence is invalid (NOTE: only checks DNA)",
     )
 
 
 def main(args):
     import sourmash
+
     return sourmash.sig.__main__.kmers(args)
diff --git a/src/sourmash/cli/sig/manifest.py b/src/sourmash/cli/sig/manifest.py
index e066dbda67..72f00500c4 100644
--- a/src/sourmash/cli/sig/manifest.py
+++ b/src/sourmash/cli/sig/manifest.py
@@ -1,6 +1,6 @@
 """create a manifest for a collection of signatures"""
 
-usage="""
+usage = """
 
     sourmash sig manifest <filename> -o manifest.csv
 
@@ -17,36 +17,40 @@
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('manifest', usage=usage)
-    subparser.add_argument('location')
+    subparser = subparsers.add_parser("manifest", usage=usage)
+    subparser.add_argument("location")
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-d', '--debug', action='store_true',
-        help='output debug information'
+        "-d", "--debug", action="store_true", help="output debug information"
     )
     subparser.add_argument(
-        '-o', '--output', '--csv', metavar='FILE',
-        help='output information to a CSV file',
+        "-o",
+        "--output",
+        "--csv",
+        metavar="FILE",
+        help="output information to a CSV file",
         required=True,
     )
     subparser.add_argument(
-        '-f', '--force', action='store_true',
-        help='try to load all files as signatures'
+        "-f", "--force", action="store_true", help="try to load all files as signatures"
     )
     subparser.add_argument(
-        '--no-rebuild-manifest', help='use existing manifest if available',
-        action='store_true'
+        "--no-rebuild-manifest",
+        help="use existing manifest if available",
+        action="store_true",
     )
     subparser.add_argument(
-        '-F', '--manifest-format',
+        "-F",
+        "--manifest-format",
         help="format of manifest output file; default is 'csv')",
-        default='csv',
-        choices=['csv', 'sql'],
+        default="csv",
+        choices=["csv", "sql"],
     )
 
+
 def main(args):
     import sourmash
+
     return sourmash.sig.__main__.manifest(args)
diff --git a/src/sourmash/cli/sig/merge.py b/src/sourmash/cli/sig/merge.py
index 6de8b77d16..026749a5f0 100644
--- a/src/sourmash/cli/sig/merge.py
+++ b/src/sourmash/cli/sig/merge.py
@@ -1,6 +1,6 @@
 """merge one or more signatures"""
 
-usage="""
+usage = """
 
 ### `sourmash signature merge` - merge two or more signatures into one
 
@@ -24,36 +24,32 @@
 
 """
 
-from sourmash.cli.utils import (add_moltype_args, add_ksize_arg,
-                                add_picklist_args)
+from sourmash.cli.utils import add_moltype_args, add_ksize_arg, add_picklist_args
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('merge', description=__doc__, usage=usage)
-    subparser.add_argument('signatures', nargs='*')
+    subparser = subparsers.add_parser("merge", description=__doc__, usage=usage)
+    subparser.add_argument("signatures", nargs="*")
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-o', '--output', metavar='FILE', default='-',
-        help='output signature to this file (default stdout)'
+        "-o",
+        "--output",
+        metavar="FILE",
+        default="-",
+        help="output signature to this file (default stdout)",
     )
     subparser.add_argument(
-        '--flatten', action='store_true',
-        help='remove abundances from all signatures'
+        "--flatten", action="store_true", help="remove abundances from all signatures"
     )
+    subparser.add_argument("--name", help="rename merged signature")
     subparser.add_argument(
-        '--name',
-        help='rename merged signature'
+        "-f", "--force", action="store_true", help="try to load all files as signatures"
     )
     subparser.add_argument(
-        '-f', '--force', action='store_true',
-        help='try to load all files as signatures'
-    )
-    subparser.add_argument(
-        '--from-file',
-        help='a text file containing a list of files to load signatures from'
+        "--from-file",
+        help="a text file containing a list of files to load signatures from",
     )
     add_ksize_arg(subparser)
     add_moltype_args(subparser)
@@ -62,4 +58,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.sig.__main__.merge(args)
diff --git a/src/sourmash/cli/sig/overlap.py b/src/sourmash/cli/sig/overlap.py
index 373336253c..c268e62a85 100644
--- a/src/sourmash/cli/sig/overlap.py
+++ b/src/sourmash/cli/sig/overlap.py
@@ -1,6 +1,6 @@
 """see detailed comparison of signatures"""
 
-usage="""
+usage = """
 
 ### `sourmash signature overlap` - detailed comparison of two signatures' overlap
 
@@ -28,12 +28,11 @@
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('overlap', description=__doc__, usage=usage)
-    subparser.add_argument('signature1')
-    subparser.add_argument('signature2')
+    subparser = subparsers.add_parser("overlap", description=__doc__, usage=usage)
+    subparser.add_argument("signature1")
+    subparser.add_argument("signature2")
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     add_ksize_arg(subparser)
     add_moltype_args(subparser)
@@ -41,4 +40,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.sig.__main__.overlap(args)
diff --git a/src/sourmash/cli/sig/rename.py b/src/sourmash/cli/sig/rename.py
index 2b360fa8d3..4ed25612fc 100644
--- a/src/sourmash/cli/sig/rename.py
+++ b/src/sourmash/cli/sig/rename.py
@@ -1,6 +1,6 @@
 """rename signature"""
 
-usage="""
+usage = """
 
 ### `sourmash signature rename` - rename a signature
 
@@ -17,34 +17,37 @@
 
 """
 
-from sourmash.cli.utils import (add_moltype_args, add_ksize_arg,
-                                add_picklist_args, add_pattern_args)
+from sourmash.cli.utils import (
+    add_moltype_args,
+    add_ksize_arg,
+    add_picklist_args,
+    add_pattern_args,
+)
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('rename', description=__doc__, usage=usage)
-    subparser.add_argument('signatures', nargs='*')
-    subparser.add_argument('name')
+    subparser = subparsers.add_parser("rename", description=__doc__, usage=usage)
+    subparser.add_argument("signatures", nargs="*")
+    subparser.add_argument("name")
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-d', '--debug', action='store_true',
-        help='print debugging output'
+        "-d", "--debug", action="store_true", help="print debugging output"
     )
     subparser.add_argument(
-        '-o', '--output', metavar='FILE', 
-        help='output renamed signature to this file (default stdout)',
-        default='-'
+        "-o",
+        "--output",
+        metavar="FILE",
+        help="output renamed signature to this file (default stdout)",
+        default="-",
     )
     subparser.add_argument(
-        '-f', '--force', action='store_true',
-        help='try to load all files as signatures'
+        "-f", "--force", action="store_true", help="try to load all files as signatures"
     )
     subparser.add_argument(
-        '--from-file',
-        help='a text file containing a list of files to load signatures from'
+        "--from-file",
+        help="a text file containing a list of files to load signatures from",
     )
     add_ksize_arg(subparser)
     add_moltype_args(subparser)
@@ -54,4 +57,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.sig.__main__.rename(args)
diff --git a/src/sourmash/cli/sig/split.py b/src/sourmash/cli/sig/split.py
index e4587b3e0f..bf98fc71fe 100644
--- a/src/sourmash/cli/sig/split.py
+++ b/src/sourmash/cli/sig/split.py
@@ -1,6 +1,6 @@
 """split signature files"""
 
-usage="""
+usage = """
 
 ### `sourmash signature split` - split signatures into individual files
 
@@ -36,32 +36,33 @@
 
 """
 
-from sourmash.cli.utils import (add_moltype_args, add_ksize_arg,
-                                add_picklist_args)
+from sourmash.cli.utils import add_moltype_args, add_ksize_arg, add_picklist_args
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('split', description=__doc__, usage=usage)
-    subparser.add_argument('signatures', nargs='*')
+    subparser = subparsers.add_parser("split", description=__doc__, usage=usage)
+    subparser.add_argument("signatures", nargs="*")
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '--output-dir', '--outdir',
-        help='output signatures to this directory',
+        "--output-dir",
+        "--outdir",
+        help="output signatures to this directory",
     )
     subparser.add_argument(
-        '-f', '--force', action='store_true',
-        help='try to load all files as signatures'
+        "-f", "--force", action="store_true", help="try to load all files as signatures"
     )
     subparser.add_argument(
-        '--from-file',
-        help='a text file containing a list of files to load signatures from'
+        "--from-file",
+        help="a text file containing a list of files to load signatures from",
     )
     subparser.add_argument(
-        '-E', '--extension', type=str, default='.sig',
-        help="write files with this extension ('.sig' by default)"
+        "-E",
+        "--extension",
+        type=str,
+        default=".sig",
+        help="write files with this extension ('.sig' by default)",
     )
     add_ksize_arg(subparser)
     add_moltype_args(subparser)
@@ -70,4 +71,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.sig.__main__.split(args)
diff --git a/src/sourmash/cli/sig/subtract.py b/src/sourmash/cli/sig/subtract.py
index 118d91fe41..69a349ace3 100644
--- a/src/sourmash/cli/sig/subtract.py
+++ b/src/sourmash/cli/sig/subtract.py
@@ -1,6 +1,6 @@
 """subtract one or more signatures"""
 
-usage="""
+usage = """
 
 ### `sourmash signature subtract` - subtract other signatures from a signature
 
@@ -22,28 +22,33 @@
 
 """
 
-from sourmash.cli.utils import (add_moltype_args, add_ksize_arg)
+from sourmash.cli.utils import add_moltype_args, add_ksize_arg
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('subtract', description=__doc__, usage=usage)
-    subparser.add_argument('signature_from')
-    subparser.add_argument('subtraction_sigs', nargs='+')
+    subparser = subparsers.add_parser("subtract", description=__doc__, usage=usage)
+    subparser.add_argument("signature_from")
+    subparser.add_argument("subtraction_sigs", nargs="+")
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-o', '--output', metavar='FILE', default='-',
-        help='output signature to this file (default stdout)'
+        "-o",
+        "--output",
+        metavar="FILE",
+        default="-",
+        help="output signature to this file (default stdout)",
     )
     subparser.add_argument(
-        '--flatten', action='store_true',
-        help='remove abundance from signatures before subtracting'
+        "--flatten",
+        action="store_true",
+        help="remove abundance from signatures before subtracting",
     )
     subparser.add_argument(
-        '-A', '--abundances-from', metavar='FILE',
-        help='intersect with & take abundances from this signature'
+        "-A",
+        "--abundances-from",
+        metavar="FILE",
+        help="intersect with & take abundances from this signature",
     )
     add_ksize_arg(subparser)
     add_moltype_args(subparser)
@@ -51,4 +56,5 @@ def subparser(subparsers):
 
 def main(args):
     import sourmash
+
     return sourmash.sig.__main__.subtract(args)
diff --git a/src/sourmash/cli/sketch/__init__.py b/src/sourmash/cli/sketch/__init__.py
index 22abf26ed1..999ce1d3b9 100644
--- a/src/sourmash/cli/sketch/__init__.py
+++ b/src/sourmash/cli/sketch/__init__.py
@@ -18,19 +18,24 @@
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('sketch', formatter_class=RawDescriptionHelpFormatter, usage=SUPPRESS)
-    desc = 'Operations\n'
+    subparser = subparsers.add_parser(
+        "sketch", formatter_class=RawDescriptionHelpFormatter, usage=SUPPRESS
+    )
+    desc = "Operations\n"
     clidir = os.path.dirname(__file__)
     ops = command_list(clidir)
     for subcmd in ops:
         docstring = getattr(sys.modules[__name__], subcmd).__doc__
-        helpstring = 'sourmash sketch {op:s} --help'.format(op=subcmd)
-        desc += '        {hs:33s} {ds:s}\n'.format(hs=helpstring, ds=docstring)
+        helpstring = f"sourmash sketch {subcmd:s} --help"
+        desc += f"        {helpstring:33s} {docstring:s}\n"
     s = subparser.add_subparsers(
-        title='Create signatures', dest='subcmd', metavar='subcmd', help=SUPPRESS,
-        description=desc
+        title="Create signatures",
+        dest="subcmd",
+        metavar="subcmd",
+        help=SUPPRESS,
+        description=desc,
     )
     for subcmd in ops:
         getattr(sys.modules[__name__], subcmd).subparser(s)
     subparser._action_groups.reverse()
-    subparser._optionals.title = 'Options'
+    subparser._optionals.title = "Options"
diff --git a/src/sourmash/cli/sketch/dna.py b/src/sourmash/cli/sketch/dna.py
index 1d82f9df65..19f6de7509 100644
--- a/src/sourmash/cli/sketch/dna.py
+++ b/src/sourmash/cli/sketch/dna.py
@@ -1,6 +1,6 @@
 """create DNA signatures"""
 
-usage="""
+usage = """
 
     sourmash sketch dna data/*.fna.gz
 
@@ -25,66 +25,79 @@
 from sourmash.logging import notify, print_results, error
 
 from sourmash import command_sketch
-assert command_sketch.DEFAULTS['dna'] == 'k=31,scaled=1000,noabund'
+
+assert command_sketch.DEFAULTS["dna"] == "k=31,scaled=1000,noabund"
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('dna',
-                                      aliases=['rna', 'nucleotide', 'nt'],
-                                      usage=usage)
-    subparser.add_argument(
-        '--license', default='CC0', type=str,
-        help='signature license. Currently only CC0 is supported.'
+    subparser = subparsers.add_parser(
+        "dna", aliases=["rna", "nucleotide", "nt"], usage=usage
     )
     subparser.add_argument(
-        '--check-sequence', action='store_true',
-        help='complain if input sequence is invalid DNA'
+        "--license",
+        default="CC0",
+        type=str,
+        help="signature license. Currently only CC0 is supported.",
     )
     subparser.add_argument(
-        '-p', '--param-string', default=[],
-        help='signature parameters to use.', action='append',
+        "--check-sequence",
+        action="store_true",
+        help="complain if input sequence is invalid DNA",
     )
-    
     subparser.add_argument(
-        'filenames', nargs='*', help='file(s) of sequences'
+        "-p",
+        "--param-string",
+        default=[],
+        help="signature parameters to use.",
+        action="append",
     )
-    file_args = subparser.add_argument_group('File handling options')
+
+    subparser.add_argument("filenames", nargs="*", help="file(s) of sequences")
+    file_args = subparser.add_argument_group("File handling options")
     file_args.add_argument(
-        '-f', '--force', action='store_true',
-        help='recompute signatures even if the file exists'
+        "-f",
+        "--force",
+        action="store_true",
+        help="recompute signatures even if the file exists",
     )
     subparser.add_argument(
-        '--from-file',
-        help='a text file containing a list of sequence files to load'
+        "--from-file", help="a text file containing a list of sequence files to load"
     )
     file_args.add_argument(
-        '-o', '--output',
-        help='output computed signatures to this file'
+        "-o", "--output", help="output computed signatures to this file"
     )
     file_args.add_argument(
-        '--merge', '--name', type=str, default='', metavar="FILE",
-        help='merge all input files into one signature file with the '
-        'specified name'
+        "--merge",
+        "--name",
+        type=str,
+        default="",
+        metavar="FILE",
+        help="merge all input files into one signature file with the " "specified name",
     )
     file_args.add_argument(
-        '--output-dir', '--outdir',
-        help='output computed signatures to this directory',
+        "--output-dir",
+        "--outdir",
+        help="output computed signatures to this directory",
     )
     file_args.add_argument(
-        '--singleton', action='store_true',
-        help='compute a signature for each sequence record individually'
+        "--singleton",
+        action="store_true",
+        help="compute a signature for each sequence record individually",
     )
     file_args.add_argument(
-        '--name-from-first', action='store_true',
-        help='name the signature generated from each file after the first '
-        'record in the file'
+        "--name-from-first",
+        action="store_true",
+        help="name the signature generated from each file after the first "
+        "record in the file",
     )
     file_args.add_argument(
-        '--randomize', action='store_true',
-        help='shuffle the list of input filenames randomly'
+        "--randomize",
+        action="store_true",
+        help="shuffle the list of input filenames randomly",
     )
 
 
 def main(args):
     import sourmash.command_sketch
+
     return sourmash.command_sketch.dna(args)
diff --git a/src/sourmash/cli/sketch/fromfile.py b/src/sourmash/cli/sketch/fromfile.py
index 08a3e44661..6bd57d26ad 100644
--- a/src/sourmash/cli/sketch/fromfile.py
+++ b/src/sourmash/cli/sketch/fromfile.py
@@ -1,6 +1,6 @@
 """create signatures from a CSV file"""
 
-usage="""
+usage = """
 
     sourmash sketch fromfile <csv file> --output-signatures <location> -p <...>
 
@@ -28,55 +28,66 @@
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('fromfile',
-                                      usage=usage)
+    subparser = subparsers.add_parser("fromfile", usage=usage)
     subparser.add_argument(
-        'csvs', nargs='+',
-        help="input CSVs providing 'name', 'genome_filename', and 'protein_filename'"
+        "csvs",
+        nargs="+",
+        help="input CSVs providing 'name', 'genome_filename', and 'protein_filename'",
     )
     subparser.add_argument(
-        '-p', '--param-string', default=[],
-        help='signature parameters to use.', action='append',
+        "-p",
+        "--param-string",
+        default=[],
+        help="signature parameters to use.",
+        action="append",
     )
     subparser.add_argument(
-        '--already-done', nargs='+', default=[],
-        help='one or more collections of existing signatures to avoid recalculating'
+        "--already-done",
+        nargs="+",
+        default=[],
+        help="one or more collections of existing signatures to avoid recalculating",
     )
     subparser.add_argument(
-        '--license', default='CC0', type=str,
-        help='signature license. Currently only CC0 is supported.'
+        "--license",
+        default="CC0",
+        type=str,
+        help="signature license. Currently only CC0 is supported.",
     )
     subparser.add_argument(
-        '--check-sequence', action='store_true',
-        help='complain if input sequence is invalid (NOTE: only checks DNA)'
+        "--check-sequence",
+        action="store_true",
+        help="complain if input sequence is invalid (NOTE: only checks DNA)",
     )
-    file_args = subparser.add_argument_group('File handling options')
+    file_args = subparser.add_argument_group("File handling options")
     file_args.add_argument(
-        '-o', '--output-signatures',
-        help='output computed signatures to this file',
+        "-o",
+        "--output-signatures",
+        help="output computed signatures to this file",
     )
     file_args.add_argument(
-        '--force-output-already-exists', action='store_true',
-        help='overwrite/append to --output-signatures location'
+        "--force-output-already-exists",
+        action="store_true",
+        help="overwrite/append to --output-signatures location",
     )
     file_args.add_argument(
-        '--ignore-missing', action='store_true',
-        help='proceed with building possible signatures, even if some input files are missing'
+        "--ignore-missing",
+        action="store_true",
+        help="proceed with building possible signatures, even if some input files are missing",
     )
     file_args.add_argument(
-        '--output-csv-info',
-        help='output information about what signatures need to be generated'
+        "--output-csv-info",
+        help="output information about what signatures need to be generated",
     )
     file_args.add_argument(
-        '--output-manifest-matching',
-        help='output a manifest file of already-existing signatures'
+        "--output-manifest-matching",
+        help="output a manifest file of already-existing signatures",
     )
     file_args.add_argument(
-        '--report-duplicated', action='store_true',
-        help='report duplicated names'
+        "--report-duplicated", action="store_true", help="report duplicated names"
     )
 
 
 def main(args):
     import sourmash.command_sketch
+
     return sourmash.command_sketch.fromfile(args)
diff --git a/src/sourmash/cli/sketch/protein.py b/src/sourmash/cli/sketch/protein.py
index 24324ea905..3092d35367 100644
--- a/src/sourmash/cli/sketch/protein.py
+++ b/src/sourmash/cli/sketch/protein.py
@@ -1,6 +1,6 @@
 """create protein signatures"""
 
-usage="""
+usage = """
 
     sourmash sketch protein data/*.fna.gz
 
@@ -26,69 +26,82 @@
 from sourmash.logging import notify, print_results, error
 
 from sourmash import command_sketch
-assert command_sketch.DEFAULTS['protein'] == 'k=10,scaled=200,noabund'
+
+assert command_sketch.DEFAULTS["protein"] == "k=10,scaled=200,noabund"
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('protein', aliases=['aa', 'prot'],
-                                      usage=usage)
-    subparser.add_argument(
-        '--license', default='CC0', type=str,
-        help='signature license. Currently only CC0 is supported.'
-    )
+    subparser = subparsers.add_parser("protein", aliases=["aa", "prot"], usage=usage)
     subparser.add_argument(
-        '-p', '--param-string', default=[],
-        help='signature parameters to use.', action='append',
+        "--license",
+        default="CC0",
+        type=str,
+        help="signature license. Currently only CC0 is supported.",
     )
-    
     subparser.add_argument(
-        'filenames', nargs='*', help='file(s) of sequences'
+        "-p",
+        "--param-string",
+        default=[],
+        help="signature parameters to use.",
+        action="append",
     )
-    file_args = subparser.add_argument_group('File handling options')
+
+    subparser.add_argument("filenames", nargs="*", help="file(s) of sequences")
+    file_args = subparser.add_argument_group("File handling options")
     file_args.add_argument(
-        '-f', '--force', action='store_true',
-        help='recompute signatures even if the file exists'
+        "-f",
+        "--force",
+        action="store_true",
+        help="recompute signatures even if the file exists",
     )
     file_args.add_argument(
-        '-o', '--output',
-        help='output computed signatures to this file'
+        "-o", "--output", help="output computed signatures to this file"
     )
     subparser.add_argument(
-        '--from-file',
-        help='a text file containing a list of sequence files to load'
+        "--from-file", help="a text file containing a list of sequence files to load"
     )
     file_args.add_argument(
-        '--merge', '--name', type=str, default='', metavar="FILE",
-        help='merge all input files into one signature file with the '
-        'specified name'
+        "--merge",
+        "--name",
+        type=str,
+        default="",
+        metavar="FILE",
+        help="merge all input files into one signature file with the " "specified name",
     )
     file_args.add_argument(
-        '--output-dir', '--outdir',
-        help='output computed signatures to this directory',
+        "--output-dir",
+        "--outdir",
+        help="output computed signatures to this directory",
     )
     file_args.add_argument(
-        '--singleton', action='store_true',
-        help='compute a signature for each sequence record individually'
+        "--singleton",
+        action="store_true",
+        help="compute a signature for each sequence record individually",
     )
     file_args.add_argument(
-        '--name-from-first', action='store_true',
-        help='name the signature generated from each file after the first '
-        'record in the file'
+        "--name-from-first",
+        action="store_true",
+        help="name the signature generated from each file after the first "
+        "record in the file",
     )
     file_args.add_argument(
-        '--randomize', action='store_true',
-        help='shuffle the list of input filenames randomly'
+        "--randomize",
+        action="store_true",
+        help="shuffle the list of input filenames randomly",
     )
     file_args.add_argument(
-        '--dayhoff', action='store_true',
-        help='compute sketches using the dayhoff alphabet instead'
+        "--dayhoff",
+        action="store_true",
+        help="compute sketches using the dayhoff alphabet instead",
     )
     file_args.add_argument(
-        '--hp', action='store_true',
-        help='compute sketches using the dayhoff alphabet instead'
+        "--hp",
+        action="store_true",
+        help="compute sketches using the dayhoff alphabet instead",
     )
 
 
 def main(args):
     import sourmash.command_sketch
+
     return sourmash.command_sketch.protein(args)
diff --git a/src/sourmash/cli/sketch/translate.py b/src/sourmash/cli/sketch/translate.py
index df48d4818a..f5bccab46f 100644
--- a/src/sourmash/cli/sketch/translate.py
+++ b/src/sourmash/cli/sketch/translate.py
@@ -1,6 +1,6 @@
 """create protein signature from DNA/RNA sequence"""
 
-usage="""
+usage = """
 
     sourmash sketch translate data/*.fna.gz
 
@@ -24,75 +24,90 @@
 """
 
 from sourmash import command_sketch
-assert command_sketch.DEFAULTS['protein'] == 'k=10,scaled=200,noabund'
+
+assert command_sketch.DEFAULTS["protein"] == "k=10,scaled=200,noabund"
 
 import sourmash
 from sourmash.logging import notify, print_results, error
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('translate', usage=usage)
-    subparser.add_argument(
-        '--license', default='CC0', type=str,
-        help='signature license. Currently only CC0 is supported.'
-    )
+    subparser = subparsers.add_parser("translate", usage=usage)
     subparser.add_argument(
-        '--check-sequence', action='store_true',
-        help='complain if input sequence is invalid DNA'
+        "--license",
+        default="CC0",
+        type=str,
+        help="signature license. Currently only CC0 is supported.",
     )
     subparser.add_argument(
-        '-p', '--param-string', default=[],
-        help='signature parameters to use.', action='append',
+        "--check-sequence",
+        action="store_true",
+        help="complain if input sequence is invalid DNA",
     )
-    
     subparser.add_argument(
-        'filenames', nargs='*', help='file(s) of sequences'
+        "-p",
+        "--param-string",
+        default=[],
+        help="signature parameters to use.",
+        action="append",
     )
-    file_args = subparser.add_argument_group('File handling options')
+
+    subparser.add_argument("filenames", nargs="*", help="file(s) of sequences")
+    file_args = subparser.add_argument_group("File handling options")
     file_args.add_argument(
-        '-f', '--force', action='store_true',
-        help='recompute signatures even if the file exists'
+        "-f",
+        "--force",
+        action="store_true",
+        help="recompute signatures even if the file exists",
     )
     file_args.add_argument(
-        '-o', '--output',
-        help='output computed signatures to this file'
+        "-o", "--output", help="output computed signatures to this file"
     )
     subparser.add_argument(
-        '--from-file',
-        help='a text file containing a list of sequence files to load'
+        "--from-file", help="a text file containing a list of sequence files to load"
     )
     file_args.add_argument(
-        '--merge', '--name', type=str, default='', metavar="FILE",
-        help='merge all input files into one signature file with the '
-        'specified name'
+        "--merge",
+        "--name",
+        type=str,
+        default="",
+        metavar="FILE",
+        help="merge all input files into one signature file with the " "specified name",
     )
     file_args.add_argument(
-        '--output-dir', '--outdir',
-        help='output computed signatures to this directory',
+        "--output-dir",
+        "--outdir",
+        help="output computed signatures to this directory",
     )
     file_args.add_argument(
-        '--singleton', action='store_true',
-        help='compute a signature for each sequence record individually'
+        "--singleton",
+        action="store_true",
+        help="compute a signature for each sequence record individually",
     )
     file_args.add_argument(
-        '--name-from-first', action='store_true',
-        help='name the signature generated from each file after the first '
-        'record in the file'
+        "--name-from-first",
+        action="store_true",
+        help="name the signature generated from each file after the first "
+        "record in the file",
     )
     file_args.add_argument(
-        '--randomize', action='store_true',
-        help='shuffle the list of input filenames randomly'
+        "--randomize",
+        action="store_true",
+        help="shuffle the list of input filenames randomly",
     )
     file_args.add_argument(
-        '--dayhoff', action='store_true',
-        help='compute sketches using the dayhoff alphabet instead'
+        "--dayhoff",
+        action="store_true",
+        help="compute sketches using the dayhoff alphabet instead",
     )
     file_args.add_argument(
-        '--hp', action='store_true',
-        help='compute sketches using the dayhoff alphabet instead'
+        "--hp",
+        action="store_true",
+        help="compute sketches using the dayhoff alphabet instead",
     )
 
 
 def main(args):
     import sourmash.command_sketch
+
     return sourmash.command_sketch.translate(args)
diff --git a/src/sourmash/cli/storage/__init__.py b/src/sourmash/cli/storage/__init__.py
index 8ad0b2ada1..42f1a292b2 100644
--- a/src/sourmash/cli/storage/__init__.py
+++ b/src/sourmash/cli/storage/__init__.py
@@ -12,19 +12,24 @@
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('storage', formatter_class=RawDescriptionHelpFormatter, usage=SUPPRESS)
-    desc = 'Operations\n'
+    subparser = subparsers.add_parser(
+        "storage", formatter_class=RawDescriptionHelpFormatter, usage=SUPPRESS
+    )
+    desc = "Operations\n"
     clidir = os.path.dirname(__file__)
     ops = command_list(clidir)
     for subcmd in ops:
         docstring = getattr(sys.modules[__name__], subcmd).__doc__
-        helpstring = 'sourmash storage {op:s} --help'.format(op=subcmd)
-        desc += '        {hs:33s} {ds:s}\n'.format(hs=helpstring, ds=docstring)
+        helpstring = f"sourmash storage {subcmd:s} --help"
+        desc += f"        {helpstring:33s} {docstring:s}\n"
     s = subparser.add_subparsers(
-        title='Storage utilities', dest='subcmd', metavar='subcmd', help=SUPPRESS,
-        description=desc
+        title="Storage utilities",
+        dest="subcmd",
+        metavar="subcmd",
+        help=SUPPRESS,
+        description=desc,
     )
     for subcmd in ops:
         getattr(sys.modules[__name__], subcmd).subparser(s)
     subparser._action_groups.reverse()
-    subparser._optionals.title = 'Options'
+    subparser._optionals.title = "Options"
diff --git a/src/sourmash/cli/storage/convert.py b/src/sourmash/cli/storage/convert.py
index 0aa5c23fa5..7efbc2e7ff 100644
--- a/src/sourmash/cli/storage/convert.py
+++ b/src/sourmash/cli/storage/convert.py
@@ -1,16 +1,13 @@
 "'sourmash storage convert' - convert an SBT to use a different back end."
 
+
 def subparser(subparsers):
-    subparser = subparsers.add_parser('convert')
-    subparser.add_argument(
-        'sbt', help='name to save SBT into'
-    )
-    subparser.add_argument(
-        '-b', '--backend', type=str,
-        help='Backend to convert to'
-    )
+    subparser = subparsers.add_parser("convert")
+    subparser.add_argument("sbt", help="name to save SBT into")
+    subparser.add_argument("-b", "--backend", type=str, help="Backend to convert to")
 
 
 def main(args):
     import sourmash
+
     return sourmash.sbt.convert_cmd(args.sbt, args.backend)
diff --git a/src/sourmash/cli/tax/__init__.py b/src/sourmash/cli/tax/__init__.py
index b8bf95f8d8..0b58299f56 100644
--- a/src/sourmash/cli/tax/__init__.py
+++ b/src/sourmash/cli/tax/__init__.py
@@ -18,19 +18,27 @@
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('tax', formatter_class=RawDescriptionHelpFormatter, usage=SUPPRESS, aliases=['taxonomy'])
-    desc = 'Operations\n'
+    subparser = subparsers.add_parser(
+        "tax",
+        formatter_class=RawDescriptionHelpFormatter,
+        usage=SUPPRESS,
+        aliases=["taxonomy"],
+    )
+    desc = "Operations\n"
     clidir = os.path.dirname(__file__)
     ops = command_list(clidir)
     for subcmd in ops:
         docstring = getattr(sys.modules[__name__], subcmd).__doc__
-        helpstring = 'sourmash tax {op:s} --help'.format(op=subcmd)
-        desc += '        {hs:33s} {ds:s}\n'.format(hs=helpstring, ds=docstring)
+        helpstring = f"sourmash tax {subcmd:s} --help"
+        desc += f"        {helpstring:33s} {docstring:s}\n"
     s = subparser.add_subparsers(
-        title="Integrate taxonomy information based on 'gather' results", dest='subcmd', metavar='subcmd', help=SUPPRESS,
-        description=desc
+        title="Integrate taxonomy information based on 'gather' results",
+        dest="subcmd",
+        metavar="subcmd",
+        help=SUPPRESS,
+        description=desc,
     )
     for subcmd in ops:
         getattr(sys.modules[__name__], subcmd).subparser(s)
     subparser._action_groups.reverse()
-    subparser._optionals.title = 'Options'
+    subparser._optionals.title = "Options"
diff --git a/src/sourmash/cli/tax/annotate.py b/src/sourmash/cli/tax/annotate.py
index 501a02fd58..7541440fc2 100644
--- a/src/sourmash/cli/tax/annotate.py
+++ b/src/sourmash/cli/tax/annotate.py
@@ -1,6 +1,6 @@
 """annotate gather results with taxonomy information"""
 
-usage="""
+usage = """
 
     sourmash tax annotate --gather-csv <gather_csv> [ ... ] --taxonomy-csv <taxonomy_csv> [ ... ]
 
@@ -19,53 +19,70 @@
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('annotate',
-                                      aliases=['annotate'],
-                                      usage=usage)
+    subparser = subparsers.add_parser("annotate", aliases=["annotate"], usage=usage)
     subparser.add_argument(
-        '-g', '--gather-csv', nargs='*', default = [], action='extend',
-        help='CSV output files from sourmash gather'
+        "-g",
+        "--gather-csv",
+        nargs="*",
+        default=[],
+        action="extend",
+        help="CSV output files from sourmash gather",
     )
     subparser.add_argument(
-        '--from-file',  metavar='FILE', default=None,
-        help='input many gather results as a text file, with one gather CSV per line'
+        "--from-file",
+        metavar="FILE",
+        default=None,
+        help="input many gather results as a text file, with one gather CSV per line",
     )
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-t', '--taxonomy-csv', '--taxonomy', metavar='FILE',
-        nargs='*', required=True, action="extend",
-        help='database lineages CSV'
+        "-t",
+        "--taxonomy-csv",
+        "--taxonomy",
+        metavar="FILE",
+        nargs="*",
+        required=True,
+        action="extend",
+        help="database lineages CSV",
     )
     subparser.add_argument(
-        '-o', '--output-dir', default= "",
-        help='directory for output files'
+        "-o", "--output-dir", default="", help="directory for output files"
     )
     subparser.add_argument(
-        '--keep-full-identifiers', action='store_true',
-        help='do not split identifiers on whitespace'
+        "--keep-full-identifiers",
+        action="store_true",
+        help="do not split identifiers on whitespace",
     )
     subparser.add_argument(
-        '--keep-identifier-versions', action='store_true',
-        help='after splitting identifiers, do not remove accession versions'
+        "--keep-identifier-versions",
+        action="store_true",
+        help="after splitting identifiers, do not remove accession versions",
     )
     subparser.add_argument(
-        '--fail-on-missing-taxonomy', action='store_true',
-        help='fail quickly if taxonomy is not available for an identifier',
+        "--fail-on-missing-taxonomy",
+        action="store_true",
+        help="fail quickly if taxonomy is not available for an identifier",
     )
     subparser.add_argument(
-        '-f', '--force', action = 'store_true',
-        help='continue past errors in file and taxonomy loading',
+        "-f",
+        "--force",
+        action="store_true",
+        help="continue past errors in file and taxonomy loading",
     )
     subparser.add_argument(
-        '--lins', '--lin-taxonomy', action='store_true', default=False,
-        help='use LIN taxonomy in place of standard taxonomic ranks.  Note that the taxonomy CSV must contain LIN lineage information.'
+        "--lins",
+        "--lin-taxonomy",
+        action="store_true",
+        default=False,
+        help="use LIN taxonomy in place of standard taxonomic ranks.  Note that the taxonomy CSV must contain LIN lineage information.",
     )
 
+
 def main(args):
-    import sourmash
     if not args.gather_csv and not args.from_file:
-        raise ValueError(f"No gather CSVs found! Please input via '-g' or '--from-file'.")
+        raise ValueError(
+            "No gather CSVs found! Please input via '-g' or '--from-file'."
+        )
     return sourmash.tax.__main__.annotate(args)
diff --git a/src/sourmash/cli/tax/genome.py b/src/sourmash/cli/tax/genome.py
index 3f3ee41578..b9712658a4 100644
--- a/src/sourmash/cli/tax/genome.py
+++ b/src/sourmash/cli/tax/genome.py
@@ -1,6 +1,6 @@
 """classify genomes from gather results"""
 
-usage="""
+usage = """
 
     sourmash tax genome --gather-csv <gather_csv> [ ... ] --taxonomy-csv <taxonomy-csv> [ ... ]
 
@@ -34,81 +34,114 @@
 import argparse
 import sourmash
 from sourmash.logging import notify, print_results, error
-from sourmash.cli.utils import add_tax_threshold_arg, check_rank, check_tax_outputs, add_rank_arg
+from sourmash.cli.utils import (
+    add_tax_threshold_arg,
+    check_rank,
+    check_tax_outputs,
+    add_rank_arg,
+)
+
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('genome',
-                                      aliases=['classify'],
-                                      usage=usage)
+    subparser = subparsers.add_parser("genome", aliases=["classify"], usage=usage)
     subparser.add_argument(
-        '-g', '--gather-csv', action='extend', nargs='*', default = [],
-        help='CSVs output by sourmash gather for this sample'
+        "-g",
+        "--gather-csv",
+        action="extend",
+        nargs="*",
+        default=[],
+        help="CSVs output by sourmash gather for this sample",
     )
     subparser.add_argument(
-        '--from-file',  metavar='FILE', default=None,
-        help='input many gather results as a text file, with one gather CSV per line'
+        "--from-file",
+        metavar="FILE",
+        default=None,
+        help="input many gather results as a text file, with one gather CSV per line",
     )
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-t', '--taxonomy-csv', '--taxonomy', metavar='FILE',
-        nargs='*', required=True, action='extend',
-        help='database lineages CSV'
+        "-t",
+        "--taxonomy-csv",
+        "--taxonomy",
+        metavar="FILE",
+        nargs="*",
+        required=True,
+        action="extend",
+        help="database lineages CSV",
     )
     subparser.add_argument(
-        '-o', '--output-base', default='-',
-        help='base filepath for output file(s) (default stdout)'
+        "-o",
+        "--output-base",
+        default="-",
+        help="base filepath for output file(s) (default stdout)",
     )
     subparser.add_argument(
-        '--output-dir', default= "",
-        help='directory for output files'
+        "--output-dir", default="", help="directory for output files"
     )
     subparser.add_argument(
-        '--keep-full-identifiers', action='store_true',
-        help='do not split identifiers on whitespace'
+        "--keep-full-identifiers",
+        action="store_true",
+        help="do not split identifiers on whitespace",
     )
     subparser.add_argument(
-        '--keep-identifier-versions', action='store_true',
-        help='after splitting identifiers, do not remove accession versions'
+        "--keep-identifier-versions",
+        action="store_true",
+        help="after splitting identifiers, do not remove accession versions",
     )
     subparser.add_argument(
-        '--fail-on-missing-taxonomy', action='store_true',
-        help='fail quickly if taxonomy is not available for an identifier',
+        "--fail-on-missing-taxonomy",
+        action="store_true",
+        help="fail quickly if taxonomy is not available for an identifier",
     )
     subparser.add_argument(
-        '-F', '--output-format', default=[], nargs='*', action='extend',
+        "-F",
+        "--output-format",
+        default=[],
+        nargs="*",
+        action="extend",
         choices=["csv_summary", "krona", "human", "lineage_csv"],
-        help='choose output format(s)',
+        help="choose output format(s)",
     )
     subparser.add_argument(
-        '-f', '--force', action = 'store_true',
-        help='continue past survivable errors in loading taxonomy database or gather results',
+        "-f",
+        "--force",
+        action="store_true",
+        help="continue past survivable errors in loading taxonomy database or gather results",
     )
     subparser.add_argument(
-        '--lins', '--lin-taxonomy', action='store_true', default=False,
-        help="use LIN taxonomy in place of standard taxonomic ranks.  Note that the taxonomy CSV must contain 'lin' lineage information."
+        "--lins",
+        "--lin-taxonomy",
+        action="store_true",
+        default=False,
+        help="use LIN taxonomy in place of standard taxonomic ranks.  Note that the taxonomy CSV must contain 'lin' lineage information.",
     )
     subparser.add_argument(
-        '--lingroup', '--lingroups', metavar='FILE', default=None,
-        help="CSV containing 'name', 'lin' columns, where 'lin' is the lingroup prefix. Will restrict classification to these groups."
+        "--lingroup",
+        "--lingroups",
+        metavar="FILE",
+        default=None,
+        help="CSV containing 'name', 'lin' columns, where 'lin' is the lingroup prefix. Will restrict classification to these groups.",
     )
     add_tax_threshold_arg(subparser, 0.1)
     add_rank_arg(subparser)
 
 
 def main(args):
-    import sourmash
     try:
         if not args.gather_csv and not args.from_file:
-            raise ValueError(f"No gather CSVs found! Please input via '-g' or '--from-file'.")
+            raise ValueError(
+                "No gather CSVs found! Please input via '-g' or '--from-file'."
+            )
         if args.rank:
             args.rank = check_rank(args)
-        args.output_format = check_tax_outputs(args, rank_required = ['krona'])
+        args.output_format = check_tax_outputs(args, rank_required=["krona"])
 
     except ValueError as exc:
         error(f"ERROR: {str(exc)}")
-        import sys; sys.exit(-1)
+        import sys
+
+        sys.exit(-1)
 
     return sourmash.tax.__main__.genome(args)
diff --git a/src/sourmash/cli/tax/grep.py b/src/sourmash/cli/tax/grep.py
index 9aa5db3b89..13c25783fa 100644
--- a/src/sourmash/cli/tax/grep.py
+++ b/src/sourmash/cli/tax/grep.py
@@ -1,6 +1,6 @@
 """search taxonomies and output picklists."""
 
-usage="""
+usage = """
 
     sourmash tax grep <term> --taxonomy-csv <taxonomy_file> [ ... ]
 
@@ -21,55 +21,69 @@
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('grep', usage=usage)
-    subparser.add_argument('pattern')
-    subparser.add_argument('-r', '--rank',
-                           help="search only this rank",
-                           choices=['superkingdom',
-                                    'phylum',
-                                    'class',
-                                    'order',
-                                    'family',
-                                    'genus',
-                                    'species'])
+    subparser = subparsers.add_parser("grep", usage=usage)
+    subparser.add_argument("pattern")
     subparser.add_argument(
-        '-v', '--invert-match',
-        help="select non-matching lineages",
-        action="store_true"
+        "-r",
+        "--rank",
+        help="search only this rank",
+        choices=[
+            "superkingdom",
+            "phylum",
+            "class",
+            "order",
+            "family",
+            "genus",
+            "species",
+        ],
     )
     subparser.add_argument(
-        '-i', '--ignore-case',
+        "-v", "--invert-match", help="select non-matching lineages", action="store_true"
+    )
+    subparser.add_argument(
+        "-i",
+        "--ignore-case",
         help="ignore case distinctions (search lower and upper case both)",
-        action="store_true"
+        action="store_true",
     )
     subparser.add_argument(
-        '--silent', '--no-picklist-output',
+        "--silent",
+        "--no-picklist-output",
         help="do not output picklist",
-        action='store_true',
+        action="store_true",
     )
     subparser.add_argument(
-        '-c', '--count',
+        "-c",
+        "--count",
         help="only output a count of discovered lineages; implies --silent",
-        action='store_true'
+        action="store_true",
     )
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-t', '--taxonomy-csv', '--taxonomy', metavar='FILE',
-        nargs="+", required=True, action="extend",
-        help='database lineages'
+        "-t",
+        "--taxonomy-csv",
+        "--taxonomy",
+        metavar="FILE",
+        nargs="+",
+        required=True,
+        action="extend",
+        help="database lineages",
     )
     subparser.add_argument(
-        '-o', '--output', default='-',
-        help='output file (defaults to stdout)',
+        "-o",
+        "--output",
+        default="-",
+        help="output file (defaults to stdout)",
     )
     subparser.add_argument(
-        '-f', '--force', action = 'store_true',
-        help='continue past errors in file and taxonomy loading',
+        "-f",
+        "--force",
+        action="store_true",
+        help="continue past errors in file and taxonomy loading",
     )
 
+
 def main(args):
-    import sourmash
     return sourmash.tax.__main__.grep(args)
diff --git a/src/sourmash/cli/tax/metagenome.py b/src/sourmash/cli/tax/metagenome.py
index 1e3cd50313..563c6c3d81 100644
--- a/src/sourmash/cli/tax/metagenome.py
+++ b/src/sourmash/cli/tax/metagenome.py
@@ -1,6 +1,6 @@
 """summarize metagenome gather results"""
 
-usage="""
+usage = """
 
     sourmash tax metagenome --gather-csv <gather_csv> [ ... ] --taxonomy-csv <taxonomy-csv> [ ... ]
 
@@ -26,77 +26,118 @@
 from sourmash.cli.utils import add_rank_arg, check_rank, check_tax_outputs
 
 
-
 def subparser(subparsers):
-    subparser = subparsers.add_parser('metagenome',
-                                      usage=usage)
+    subparser = subparsers.add_parser("metagenome", usage=usage)
     subparser.add_argument(
-        '-g', '--gather-csv', action="extend", nargs='*', default = [],
-        help='CSVs from sourmash gather'
+        "-g",
+        "--gather-csv",
+        action="extend",
+        nargs="*",
+        default=[],
+        help="CSVs from sourmash gather",
     )
     subparser.add_argument(
-        '--from-file',  metavar='FILE', default = None,
-        help='input many gather results as a text file, with one gather CSV per line'
+        "--from-file",
+        metavar="FILE",
+        default=None,
+        help="input many gather results as a text file, with one gather CSV per line",
     )
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-o', '--output-base', default='-',
-        help='base filepath for output file(s) (default stdout)'
+        "-o",
+        "--output-base",
+        default="-",
+        help="base filepath for output file(s) (default stdout)",
     )
     subparser.add_argument(
-        '--output-dir', default= "",
-        help='directory for output files'
+        "--output-dir", default="", help="directory for output files"
     )
     subparser.add_argument(
-        '-t', '--taxonomy-csv', '--taxonomy', metavar='FILE',
-        action="extend", nargs='+', required=True,
-        help='database lineages CSV'
+        "-t",
+        "--taxonomy-csv",
+        "--taxonomy",
+        metavar="FILE",
+        action="extend",
+        nargs="+",
+        required=True,
+        help="database lineages CSV",
     )
     subparser.add_argument(
-        '--keep-full-identifiers', action='store_true',
-        help='do not split identifiers on whitespace'
+        "--keep-full-identifiers",
+        action="store_true",
+        help="do not split identifiers on whitespace",
     )
     subparser.add_argument(
-        '--keep-identifier-versions', action='store_true',
-        help='after splitting identifiers, do not remove accession versions'
+        "--keep-identifier-versions",
+        action="store_true",
+        help="after splitting identifiers, do not remove accession versions",
     )
     subparser.add_argument(
-        '--fail-on-missing-taxonomy', action='store_true',
-        help='fail quickly if taxonomy is not available for an identifier',
+        "--fail-on-missing-taxonomy",
+        action="store_true",
+        help="fail quickly if taxonomy is not available for an identifier",
     )
     subparser.add_argument(
-        '-F', '--output-format', default=[], nargs='*', action="extend",
-        choices=["human", "csv_summary", "krona", "lineage_summary", "kreport", "lingroup", "bioboxes"],
-        help='choose output format(s)',
+        "-F",
+        "--output-format",
+        default=[],
+        nargs="*",
+        action="extend",
+        choices=[
+            "human",
+            "csv_summary",
+            "krona",
+            "lineage_summary",
+            "kreport",
+            "lingroup",
+            "bioboxes",
+        ],
+        help="choose output format(s)",
     )
     subparser.add_argument(
-        '-f', '--force', action = 'store_true',
-        help='continue past errors in taxonomy database loading',
+        "-f",
+        "--force",
+        action="store_true",
+        help="continue past errors in taxonomy database loading",
     )
     subparser.add_argument(
-        '--lins', '--lin-taxonomy', action='store_true', default=False,
-        help="use LIN taxonomy in place of standard taxonomic ranks.  Note that the taxonomy CSV must contain 'lin' lineage information."
+        "--lins",
+        "--lin-taxonomy",
+        action="store_true",
+        default=False,
+        help="use LIN taxonomy in place of standard taxonomic ranks.  Note that the taxonomy CSV must contain 'lin' lineage information.",
     )
     subparser.add_argument(
-        '--lingroup', '--lingroups', metavar='FILE', default=None,
-        help="CSV containing 'name', 'lin' columns, where 'lin' is the lingroup prefix. Will produce a 'lingroup' report containing taxonomic summarization for each group."
+        "--lingroup",
+        "--lingroups",
+        metavar="FILE",
+        default=None,
+        help="CSV containing 'name', 'lin' columns, where 'lin' is the lingroup prefix. Will produce a 'lingroup' report containing taxonomic summarization for each group.",
     )
     add_rank_arg(subparser)
 
+
 def main(args):
-    import sourmash
     try:
         if not args.gather_csv and not args.from_file:
-            raise ValueError(f"No gather CSVs found! Please input via '-g' or '--from-file'.")
+            raise ValueError(
+                "No gather CSVs found! Please input via '-g' or '--from-file'."
+            )
         if args.rank:
             args.rank = check_rank(args)
-        args.output_format = check_tax_outputs(args, rank_required = ['krona', 'lineage_summary'], incompatible_with_lins = ['bioboxes', 'kreport'], use_lingroup_format=True)
+        args.output_format = check_tax_outputs(
+            args,
+            rank_required=["krona", "lineage_summary"],
+            incompatible_with_lins=["bioboxes", "kreport"],
+            use_lingroup_format=True,
+        )
 
     except ValueError as exc:
         error(f"ERROR: {str(exc)}")
-        import sys; sys.exit(-1)
+        import sys
+
+        sys.exit(-1)
 
     return sourmash.tax.__main__.metagenome(args)
diff --git a/src/sourmash/cli/tax/prepare.py b/src/sourmash/cli/tax/prepare.py
index de2e58521b..88e4a9f504 100644
--- a/src/sourmash/cli/tax/prepare.py
+++ b/src/sourmash/cli/tax/prepare.py
@@ -1,6 +1,6 @@
 """combine multiple taxonomy databases into one."""
 
-usage="""
+usage = """
 
     sourmash tax prepare --taxonomy-csv <taxonomy_file> [ ... ] -o <output>
 
@@ -17,44 +17,55 @@
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('prepare',
-                                      usage=usage)
+    subparser = subparsers.add_parser("prepare", usage=usage)
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-t', '--taxonomy-csv', '--taxonomy', metavar='FILE',
-        nargs="+", required=True, action="extend",
-        help='database lineages'
+        "-t",
+        "--taxonomy-csv",
+        "--taxonomy",
+        metavar="FILE",
+        nargs="+",
+        required=True,
+        action="extend",
+        help="database lineages",
     )
     subparser.add_argument(
-        '-o', '--output', required=True,
-        help='output file',
+        "-o",
+        "--output",
+        required=True,
+        help="output file",
     )
     subparser.add_argument(
-        '-F', '--database-format',
+        "-F",
+        "--database-format",
         help="format of output file; default is 'sql')",
-        default='sql',
-        choices=['csv', 'sql'],
+        default="sql",
+        choices=["csv", "sql"],
     )
     subparser.add_argument(
-        '--keep-full-identifiers', action='store_true',
-        help='do not split identifiers on whitespace'
+        "--keep-full-identifiers",
+        action="store_true",
+        help="do not split identifiers on whitespace",
     )
     subparser.add_argument(
-        '--keep-identifier-versions', action='store_true',
-        help='after splitting identifiers, do not remove accession versions'
+        "--keep-identifier-versions",
+        action="store_true",
+        help="after splitting identifiers, do not remove accession versions",
     )
     subparser.add_argument(
-        '--fail-on-missing-taxonomy', action='store_true',
-        help='fail quickly if taxonomy is not available for an identifier',
+        "--fail-on-missing-taxonomy",
+        action="store_true",
+        help="fail quickly if taxonomy is not available for an identifier",
     )
     subparser.add_argument(
-        '-f', '--force', action = 'store_true',
-        help='continue past errors in file and taxonomy loading',
+        "-f",
+        "--force",
+        action="store_true",
+        help="continue past errors in file and taxonomy loading",
     )
 
+
 def main(args):
-    import sourmash
     return sourmash.tax.__main__.prepare(args)
diff --git a/src/sourmash/cli/tax/summarize.py b/src/sourmash/cli/tax/summarize.py
index 06a109e95c..d430677b8f 100644
--- a/src/sourmash/cli/tax/summarize.py
+++ b/src/sourmash/cli/tax/summarize.py
@@ -1,6 +1,6 @@
 """summarize taxonomy/lineage information"""
 
-usage="""
+usage = """
 
     sourmash tax summarize <taxonomy_file> [ <more files> ... ]
 
@@ -18,39 +18,46 @@
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('summarize',
-                                      usage=usage)
+    subparser = subparsers.add_parser("summarize", usage=usage)
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        'taxonomy_files',
-        metavar='FILE',
-        nargs="+", action="extend",
-        help='database lineages'
+        "taxonomy_files",
+        metavar="FILE",
+        nargs="+",
+        action="extend",
+        help="database lineages",
     )
     subparser.add_argument(
-        '-o', '--output-lineage-information',
-        help='output a CSV file containing individual lineage counts',
+        "-o",
+        "--output-lineage-information",
+        help="output a CSV file containing individual lineage counts",
     )
     subparser.add_argument(
-        '--keep-full-identifiers', action='store_true',
-        help='do not split identifiers on whitespace'
+        "--keep-full-identifiers",
+        action="store_true",
+        help="do not split identifiers on whitespace",
     )
     subparser.add_argument(
-        '--keep-identifier-versions', action='store_true',
-        help='after splitting identifiers, do not remove accession versions'
+        "--keep-identifier-versions",
+        action="store_true",
+        help="after splitting identifiers, do not remove accession versions",
     )
     subparser.add_argument(
-        '-f', '--force', action = 'store_true',
-        help='continue past errors in file and taxonomy loading',
+        "-f",
+        "--force",
+        action="store_true",
+        help="continue past errors in file and taxonomy loading",
     )
     subparser.add_argument(
-        '--lins', '--lin-taxonomy', action='store_true', default=False,
-        help='use LIN taxonomy in place of standard taxonomic ranks.'
+        "--lins",
+        "--lin-taxonomy",
+        action="store_true",
+        default=False,
+        help="use LIN taxonomy in place of standard taxonomic ranks.",
     )
 
+
 def main(args):
-    import sourmash
     return sourmash.tax.__main__.summarize(args)
diff --git a/src/sourmash/cli/utils.py b/src/sourmash/cli/utils.py
index e0d8975b09..26da5ead5f 100644
--- a/src/sourmash/cli/utils.py
+++ b/src/sourmash/cli/utils.py
@@ -7,38 +7,66 @@
 
 def add_moltype_args(parser):
     parser.add_argument(
-        '--protein', dest='protein', action='store_true',
-        help='choose a protein signature; by default, a nucleotide signature is used'
+        "--protein",
+        dest="protein",
+        action="store_true",
+        help="choose a protein signature; by default, a nucleotide signature is used",
     )
     parser.add_argument(
-        '--no-protein', dest='protein', action='store_false',
-        help='do not choose a protein signature')
+        "--no-protein",
+        dest="protein",
+        action="store_false",
+        help="do not choose a protein signature",
+    )
     parser.set_defaults(protein=False)
 
     parser.add_argument(
-        '--dayhoff', dest='dayhoff', action='store_true',
-        help='choose Dayhoff-encoded amino acid signatures'
+        "--dayhoff",
+        dest="dayhoff",
+        action="store_true",
+        help="choose Dayhoff-encoded amino acid signatures",
     )
     parser.add_argument(
-        '--no-dayhoff', dest='dayhoff', action='store_false',
-        help='do not choose Dayhoff-encoded amino acid signatures')
+        "--no-dayhoff",
+        dest="dayhoff",
+        action="store_false",
+        help="do not choose Dayhoff-encoded amino acid signatures",
+    )
     parser.set_defaults(dayhoff=False)
 
     parser.add_argument(
-        '--hp', '--hydrophobic-polar', dest='hp', action='store_true',
-        help='choose hydrophobic-polar-encoded amino acid signatures'
+        "--hp",
+        "--hydrophobic-polar",
+        dest="hp",
+        action="store_true",
+        help="choose hydrophobic-polar-encoded amino acid signatures",
     )
     parser.add_argument(
-        '--no-hp', '--no-hydrophobic-polar', dest='hp', action='store_false',
-        help='do not choose hydrophobic-polar-encoded amino acid signatures')
+        "--no-hp",
+        "--no-hydrophobic-polar",
+        dest="hp",
+        action="store_false",
+        help="do not choose hydrophobic-polar-encoded amino acid signatures",
+    )
     parser.set_defaults(hp=False)
 
     parser.add_argument(
-        '--dna', '--rna', '--nucleotide', dest='dna', default=None, action='store_true',
-        help='choose a nucleotide signature (default: True)')
+        "--dna",
+        "--rna",
+        "--nucleotide",
+        dest="dna",
+        default=None,
+        action="store_true",
+        help="choose a nucleotide signature (default: True)",
+    )
     parser.add_argument(
-        '--no-dna', '--no-rna', '--no-nucleotide', dest='dna', action='store_false',
-        help='do not choose a nucleotide signature')
+        "--no-dna",
+        "--no-rna",
+        "--no-nucleotide",
+        dest="dna",
+        action="store_false",
+        help="do not choose a nucleotide signature",
+    )
     parser.set_defaults(dna=None)
 
 
@@ -52,16 +80,21 @@ def add_ksize_arg(parser, *, default=None):
     if default:
         message = f"k-mer size to select; default={default}"
     else:
-        message = f"k-mer size to select; no default."
+        message = "k-mer size to select; no default."
 
     parser.add_argument(
-        '-k', '--ksize', metavar='K', default=default, type=int,
+        "-k",
+        "--ksize",
+        metavar="K",
+        default=default,
+        type=int,
         help=message,
     )
 
-#https://stackoverflow.com/questions/55324449/how-to-specify-a-minimum-or-maximum-float-value-with-argparse#55410582
+
+# https://stackoverflow.com/questions/55324449/how-to-specify-a-minimum-or-maximum-float-value-with-argparse#55410582
 def range_limited_float_type(arg):
-    """ Type function for argparse - a float within some predefined bounds """
+    """Type function for argparse - a float within some predefined bounds"""
     min_val = 0
     max_val = 1
     try:
@@ -69,119 +102,168 @@ def range_limited_float_type(arg):
     except ValueError:
         raise argparse.ArgumentTypeError("\n\tERROR: Must be a floating point number.")
     if f < min_val or f > max_val:
-        raise argparse.ArgumentTypeError(f"\n\tERROR: Argument must be >{str(min_val)} and <{str(max_val)}.")
+        raise argparse.ArgumentTypeError(
+            f"\n\tERROR: Argument must be >{str(min_val)} and <{str(max_val)}."
+        )
     return f
 
 
 def add_tax_threshold_arg(parser, containment_default=0.1, ani_default=None):
     parser.add_argument(
-        '--containment-threshold', default=containment_default, type=range_limited_float_type,
-        help=f'minimum containment threshold for classification; default={containment_default}',
+        "--containment-threshold",
+        default=containment_default,
+        type=range_limited_float_type,
+        help=f"minimum containment threshold for classification; default={containment_default}",
     )
     parser.add_argument(
-        '--ani-threshold', '--aai-threshold', default=ani_default, type=range_limited_float_type,
-        help=f'minimum ANI threshold (nucleotide gather) or AAI threshold (protein gather) for classification; default={ani_default}',
+        "--ani-threshold",
+        "--aai-threshold",
+        default=ani_default,
+        type=range_limited_float_type,
+        help=f"minimum ANI threshold (nucleotide gather) or AAI threshold (protein gather) for classification; default={ani_default}",
     )
 
 
 def add_picklist_args(parser):
     parser.add_argument(
-        '--picklist', default=None,
-        help="select signatures based on a picklist, i.e. 'file.csv:colname:coltype'"
+        "--picklist",
+        default=None,
+        help="select signatures based on a picklist, i.e. 'file.csv:colname:coltype'",
     )
     parser.add_argument(
-        '--picklist-require-all', default=False, action='store_true',
-        help="require that all picklist values be found or else fail"
+        "--picklist-require-all",
+        default=False,
+        action="store_true",
+        help="require that all picklist values be found or else fail",
     )
 
 
 def add_pattern_args(parser):
     parser.add_argument(
-        '--include-db-pattern',
+        "--include-db-pattern",
         default=None,
-        help='search only signatures that match this pattern in name, filename, or md5'
+        help="search only signatures that match this pattern in name, filename, or md5",
     )
     parser.add_argument(
-        '--exclude-db-pattern',
+        "--exclude-db-pattern",
         default=None,
-        help='search only signatures that do not match this pattern in name, filename, or md5'
+        help="search only signatures that do not match this pattern in name, filename, or md5",
     )
 
 
 def opfilter(path):
-    return not path.startswith('__') and path not in ['utils']
+    return not path.startswith("__") and path not in ["utils"]
 
 
 def command_list(dirpath):
-    paths = glob(os.path.join(dirpath, '*.py'))
+    paths = glob(os.path.join(dirpath, "*.py"))
     filenames = [os.path.basename(path) for path in paths]
-    basenames = [os.path.splitext(path)[0] for path in filenames if not path.startswith('__')]
+    basenames = [
+        os.path.splitext(path)[0] for path in filenames if not path.startswith("__")
+    ]
     basenames = filter(opfilter, basenames)
     return sorted(basenames)
 
 
 def add_scaled_arg(parser, default=None):
     parser.add_argument(
-        '--scaled', metavar='FLOAT', type=check_scaled_bounds,
-        help='downsample to this scaled; value should be between 100 and 1e6'
+        "--scaled",
+        metavar="FLOAT",
+        type=check_scaled_bounds,
+        help="downsample to this scaled; value should be between 100 and 1e6",
     )
 
 
 def add_num_arg(parser, default=0):
     parser.add_argument(
-        '-n', '--num-hashes', '--num', metavar='N', type=check_num_bounds, default=default,
-        help='num value should be between 50 and 50000'
+        "-n",
+        "--num-hashes",
+        "--num",
+        metavar="N",
+        type=check_num_bounds,
+        default=default,
+        help="num value should be between 50 and 50000",
     )
 
 
 def check_rank(args):
-    """ Check '--rank'/'--position'/'--lin-position' argument matches selected taxonomy."""
-    standard_ranks =['strain', 'species', 'genus', 'family', 'order', 'class', 'phylum', 'superkingdom']
+    """Check '--rank'/'--position'/'--lin-position' argument matches selected taxonomy."""
+    standard_ranks = [
+        "strain",
+        "species",
+        "genus",
+        "family",
+        "order",
+        "class",
+        "phylum",
+        "superkingdom",
+    ]
     if args.lins:
-        if args.rank.isdigit(): 
+        if args.rank.isdigit():
             return str(args.rank)
-        raise argparse.ArgumentTypeError(f"Invalid '--rank'/'--position' input: '{args.rank}'. '--lins' is specified. Rank must be an integer corresponding to a LIN position.")
+        raise argparse.ArgumentTypeError(
+            f"Invalid '--rank'/'--position' input: '{args.rank}'. '--lins' is specified. Rank must be an integer corresponding to a LIN position."
+        )
     elif args.rank in standard_ranks:
         return args.rank
     else:
-        raise argparse.ArgumentTypeError(f"Invalid '--rank'/'--position' input: '{args.rank}'. Please choose: 'strain', 'species', 'genus', 'family', 'order', 'class', 'phylum', 'superkingdom'")
+        raise argparse.ArgumentTypeError(
+            f"Invalid '--rank'/'--position' input: '{args.rank}'. Please choose: 'strain', 'species', 'genus', 'family', 'order', 'class', 'phylum', 'superkingdom'"
+        )
 
 
 def add_rank_arg(parser):
     parser.add_argument(
-        '-r', '--rank',
-        '--position', '--lin-position',
+        "-r",
+        "--rank",
+        "--position",
+        "--lin-position",
         help="For non-default output formats. Classify to this rank (tax genome) or summarize taxonomy at this rank and above (tax metagenome). \
               Note that the taxonomy CSV must contain lineage information at this rank, and that LIN positions start at 0. \
-              Choices: 'strain', 'species', 'genus', 'family', 'order', 'class', 'phylum', 'superkingdom' or an integer LIN position"
+              Choices: 'strain', 'species', 'genus', 'family', 'order', 'class', 'phylum', 'superkingdom' or an integer LIN position",
     )
 
 
-def check_tax_outputs(args, rank_required = ["krona"], incompatible_with_lins = None, use_lingroup_format=False):
+def check_tax_outputs(
+    args,
+    rank_required=["krona"],
+    incompatible_with_lins=None,
+    use_lingroup_format=False,
+):
     "Handle ouput format combinations"
     # check that rank is passed for formats requiring rank.
     if not args.rank:
         if any(x in rank_required for x in args.output_format):
-            raise ValueError(f"Rank (--rank) is required for {', '.join(rank_required)} output formats.")
+            raise ValueError(
+                f"Rank (--rank) is required for {', '.join(rank_required)} output formats."
+            )
 
     if args.lins:
         # check for outputs incompatible with lins
         if incompatible_with_lins:
             if any(x in args.output_format for x in incompatible_with_lins):
-                raise ValueError(f"The following outputs are incompatible with '--lins': : {', '.join(incompatible_with_lins)}")
+                raise ValueError(
+                    f"The following outputs are incompatible with '--lins': : {', '.join(incompatible_with_lins)}"
+                )
         # check that lingroup file exists if needed
         if args.lingroup:
             if use_lingroup_format and "lingroup" not in args.output_format:
                 args.output_format.append("lingroup")
         elif "lingroup" in args.output_format:
-            raise ValueError(f"Must provide lingroup csv via '--lingroup' in order to output a lingroup report.")
+            raise ValueError(
+                "Must provide lingroup csv via '--lingroup' in order to output a lingroup report."
+            )
     elif args.lingroup or "lingroup" in args.output_format:
-        raise ValueError(f"Must enable LIN taxonomy via '--lins' in order to use lingroups.")
+        raise ValueError(
+            "Must enable LIN taxonomy via '--lins' in order to use lingroups."
+        )
 
     # check that only one output format is specified if writing to stdout
     if len(args.output_format) > 1:
         if args.output_base == "-":
-            raise ValueError(f"Writing to stdout is incompatible with multiple output formats {args.output_format}")
+            raise ValueError(
+                f"Writing to stdout is incompatible with multiple output formats {args.output_format}"
+            )
     elif not args.output_format:
         # change to "human" for 5.0
         args.output_format = ["csv_summary"]
diff --git a/src/sourmash/cli/watch.py b/src/sourmash/cli/watch.py
index 7828d376e2..a82c06d1a3 100644
--- a/src/sourmash/cli/watch.py
+++ b/src/sourmash/cli/watch.py
@@ -4,33 +4,36 @@
 
 
 def subparser(subparsers):
-    subparser = subparsers.add_parser('watch')
-    subparser.add_argument('sbt_name', help='name of SBT to search')
-    subparser.add_argument('inp_file', nargs='?', default='/dev/stdin')
+    subparser = subparsers.add_parser("watch")
+    subparser.add_argument("sbt_name", help="name of SBT to search")
+    subparser.add_argument("inp_file", nargs="?", default="/dev/stdin")
     subparser.add_argument(
-        '-q', '--quiet', action='store_true',
-        help='suppress non-error output'
+        "-q", "--quiet", action="store_true", help="suppress non-error output"
     )
     subparser.add_argument(
-        '-o', '--output',
-        help='save signature generated from data here'
+        "-o", "--output", help="save signature generated from data here"
     )
     subparser.add_argument(
-        '--threshold', metavar='T', default=0.05, type=float,
-        help='minimum threshold for matches (default=0.05)'
+        "--threshold",
+        metavar="T",
+        default=0.05,
+        type=float,
+        help="minimum threshold for matches (default=0.05)",
     )
     subparser.add_argument(
-        '--input-is-protein', action='store_true',
-        help='Consume protein sequences - no translation needed'
+        "--input-is-protein",
+        action="store_true",
+        help="Consume protein sequences - no translation needed",
     )
     add_moltype_args(subparser)
     subparser.add_argument(
-        '--name', type=str, default='stdin',
-        help='name to use for generated signature'
+        "--name", type=str, default="stdin", help="name to use for generated signature"
     )
     add_ksize_arg(subparser)
     add_num_arg(subparser, 500)
 
+
 def main(args):
     import sourmash
+
     return sourmash.commands.watch(args)
diff --git a/src/sourmash/command_compute.py b/src/sourmash/command_compute.py
index 2dca0ae936..46c4f455f6 100644
--- a/src/sourmash/command_compute.py
+++ b/src/sourmash/command_compute.py
@@ -13,7 +13,7 @@
 from .utils import RustObject
 from ._lowlevel import ffi, lib
 
-DEFAULT_COMPUTE_K = '21,31,51'
+DEFAULT_COMPUTE_K = "21,31,51"
 DEFAULT_MMHASH_SEED = 42
 DEFAULT_LINE_COUNT = 1500
 
@@ -33,82 +33,82 @@ def compute(args):
     """
     set_quiet(args.quiet)
 
-    if args.license != 'CC0':
-        error('error: sourmash only supports CC0-licensed signatures. sorry!')
+    if args.license != "CC0":
+        error("error: sourmash only supports CC0-licensed signatures. sorry!")
         sys.exit(-1)
 
     if args.input_is_protein and args.dna:
-        notify('WARNING: input is protein, turning off nucleotide hashing')
+        notify("WARNING: input is protein, turning off nucleotide hashing")
         args.dna = False
         args.protein = True
 
     if args.scaled:
         if args.scaled < 1:
-            error('ERROR: --scaled value must be >= 1')
+            error("ERROR: --scaled value must be >= 1")
             sys.exit(-1)
         if args.scaled != round(args.scaled, 0):
-            error('ERROR: --scaled value must be integer value')
+            error("ERROR: --scaled value must be integer value")
             sys.exit(-1)
         if args.scaled >= 1e9:
-            notify('WARNING: scaled value is nonsensical!? Continuing anyway.')
+            notify("WARNING: scaled value is nonsensical!? Continuing anyway.")
 
         if args.num_hashes != 0:
-            notify('setting num_hashes to 0 because --scaled is set')
+            notify("setting num_hashes to 0 because --scaled is set")
             args.num_hashes = 0
 
-    notify('computing signatures for files: {}', ", ".join(args.filenames))
+    notify("computing signatures for files: {}", ", ".join(args.filenames))
 
     if args.randomize:
-        notify('randomizing file list because of --randomize')
+        notify("randomizing file list because of --randomize")
         random.shuffle(args.filenames)
 
     # get list of k-mer sizes for which to compute sketches
     ksizes = args.ksizes
 
-    notify('Computing signature for ksizes: {}', str(ksizes))
+    notify("Computing signature for ksizes: {}", str(ksizes))
     num_sigs = 0
     if args.dna and args.protein:
-        notify('Computing both nucleotide and protein signatures.')
-        num_sigs = 2*len(ksizes)
+        notify("Computing both nucleotide and protein signatures.")
+        num_sigs = 2 * len(ksizes)
     elif args.dna and args.dayhoff:
-        notify('Computing both nucleotide and Dayhoff-encoded protein '
-               'signatures.')
-        num_sigs = 2*len(ksizes)
+        notify("Computing both nucleotide and Dayhoff-encoded protein " "signatures.")
+        num_sigs = 2 * len(ksizes)
     elif args.dna and args.hp:
-        notify('Computing both nucleotide and hp-encoded protein '
-               'signatures.')
-        num_sigs = 2*len(ksizes)
+        notify("Computing both nucleotide and hp-encoded protein " "signatures.")
+        num_sigs = 2 * len(ksizes)
     elif args.dna:
-        notify('Computing only nucleotide (and not protein) signatures.')
+        notify("Computing only nucleotide (and not protein) signatures.")
         num_sigs = len(ksizes)
     elif args.protein:
-        notify('Computing only protein (and not nucleotide) signatures.')
+        notify("Computing only protein (and not nucleotide) signatures.")
         num_sigs = len(ksizes)
     elif args.dayhoff:
-        notify('Computing only Dayhoff-encoded protein (and not nucleotide) '
-               'signatures.')
+        notify(
+            "Computing only Dayhoff-encoded protein (and not nucleotide) " "signatures."
+        )
         num_sigs = len(ksizes)
     elif args.hp:
-        notify('Computing only hp-encoded protein (and not nucleotide) '
-               'signatures.')
+        notify("Computing only hp-encoded protein (and not nucleotide) " "signatures.")
         num_sigs = len(ksizes)
 
-    if (args.protein or args.dayhoff or args.hp):
+    if args.protein or args.dayhoff or args.hp:
         notify("")
-        notify("WARNING: you are using 'compute' to make a protein/dayhoff/hp signature,")
+        notify(
+            "WARNING: you are using 'compute' to make a protein/dayhoff/hp signature,"
+        )
         notify("WARNING: but the meaning of ksize has changed in 4.0. Please see the")
         notify("WARNING: migration guide to sourmash v4.0 at http://sourmash.rtfd.io/")
         notify("")
-        bad_ksizes = [ str(k) for k in ksizes if k % 3 != 0 ]
+        bad_ksizes = [str(k) for k in ksizes if k % 3 != 0]
         if bad_ksizes:
-            error('protein ksizes must be divisible by 3, sorry!')
-            error('bad ksizes: {}', ", ".join(bad_ksizes))
+            error("protein ksizes must be divisible by 3, sorry!")
+            error("bad ksizes: {}", ", ".join(bad_ksizes))
             sys.exit(-1)
 
-    notify('Computing a total of {} signature(s) for each input.', num_sigs)
+    notify("Computing a total of {} signature(s) for each input.", num_sigs)
 
     if num_sigs == 0:
-        error('...nothing to calculate!? Exiting!')
+        error("...nothing to calculate!? Exiting!")
         sys.exit(-1)
 
     if args.merge and not args.output:
@@ -120,32 +120,35 @@ def compute(args):
         sys.exit(-1)
 
     if args.track_abundance:
-        notify('Tracking abundance of input k-mers.')
+        notify("Tracking abundance of input k-mers.")
 
     signatures_factory = _signatures_for_compute_factory(args)
 
-    if args.merge:               # single name specified - combine all
+    if args.merge:  # single name specified - combine all
         _compute_merged(args, signatures_factory)
-    else:                        # compute individual signatures
+    else:  # compute individual signatures
         _compute_individual(args, signatures_factory)
 
 
 class _signatures_for_compute_factory:
     "Build signatures on demand, based on args input to 'compute'."
+
     def __init__(self, args):
         self.args = args
 
     def __call__(self):
         args = self.args
-        params = ComputeParameters(ksizes=args.ksizes,
-                                   seed=args.seed,
-                                   protein=args.protein,
-                                   dayhoff=args.dayhoff,
-                                   hp=args.hp,
-                                   dna=args.dna,
-                                   num_hashes=args.num_hashes,
-                                   track_abundance=args.track_abundance,
-                                   scaled=args.scaled)
+        params = ComputeParameters(
+            ksizes=args.ksizes,
+            seed=args.seed,
+            protein=args.protein,
+            dayhoff=args.dayhoff,
+            hp=args.hp,
+            dna=args.dna,
+            num_hashes=args.num_hashes,
+            track_abundance=args.track_abundance,
+            scaled=args.scaled,
+        )
         sig = SourmashSignature.from_params(params)
         return [sig]
 
@@ -167,14 +170,14 @@ def _compute_individual(args, signatures_factory):
     for filename in args.filenames:
         if open_output_each_time:
             # for each input file, construct output filename
-            sigfile = os.path.basename(filename) + '.sig'
+            sigfile = os.path.basename(filename) + ".sig"
             if args.output_dir:
                 sigfile = os.path.join(args.output_dir, sigfile)
 
             # does it already exist? skip if so.
             if os.path.exists(sigfile) and not args.force:
-                notify('skipping {} - already done', filename)
-                continue        # go on to next file.
+                notify("skipping {} - already done", filename)
+                continue  # go on to next file.
 
             # nope? ok, let's save to it.
             assert not save_sigs
@@ -204,8 +207,12 @@ def _compute_individual(args, signatures_factory):
                 for n, record in enumerate(screed_iter):
                     sigs = signatures_factory()
                     try:
-                        add_seq(sigs, record.sequence,
-                                args.input_is_protein, args.check_sequence)
+                        add_seq(
+                            sigs,
+                            record.sequence,
+                            args.input_is_protein,
+                            args.check_sequence,
+                        )
                     except ValueError as exc:
                         error(f"ERROR when reading from '{filename}' - ")
                         error(str(exc))
@@ -214,50 +221,63 @@ def _compute_individual(args, signatures_factory):
                     set_sig_name(sigs, filename, name=record.name)
                     save_sigs_to_location(sigs, save_sigs)
 
-                notify('calculated {} signatures for {} sequences in {}',
-                       len(save_sigs), n + 1, filename)
+                notify(
+                    "calculated {} signatures for {} sequences in {}",
+                    len(save_sigs),
+                    n + 1,
+                    filename,
+                )
 
             # nope; make a single sig for the whole file
             else:
                 sigs = signatures_factory()
 
                 # consume & calculate signatures
-                notify(f'... reading sequences from {filename}')
+                notify(f"... reading sequences from {filename}")
                 name = None
                 for n, record in enumerate(screed_iter):
                     if n % 10000 == 0:
                         if n:
-                            notify('\r...{} {}', filename, n, end='')
+                            notify("\r...{} {}", filename, n, end="")
                         elif args.name_from_first:
                             name = record.name
 
                     try:
-                        add_seq(sigs, record.sequence,
-                                args.input_is_protein, args.check_sequence)
+                        add_seq(
+                            sigs,
+                            record.sequence,
+                            args.input_is_protein,
+                            args.check_sequence,
+                        )
                     except ValueError as exc:
                         error(f"ERROR when reading from '{filename}' - ")
                         error(str(exc))
                         sys.exit(-1)
 
-                notify('...{} {} sequences', filename, n, end='')
+                notify("...{} {} sequences", filename, n, end="")
 
                 set_sig_name(sigs, filename, name)
                 save_sigs_to_location(sigs, save_sigs)
 
-                notify(f'calculated {len(sigs)} signatures for {n+1} sequences in {filename}')
+                notify(
+                    f"calculated {len(sigs)} signatures for {n+1} sequences in {filename}"
+                )
 
         # if not args.output, close output for every input filename.
         if open_output_each_time:
             save_sigs.close()
-            notify(f"saved {len(save_sigs)} signature(s) to '{save_sigs.location}'. Note: signature license is CC0.")
+            notify(
+                f"saved {len(save_sigs)} signature(s) to '{save_sigs.location}'. Note: signature license is CC0."
+            )
             save_sigs = None
 
-
     # if --output-dir specified, all collected signatures => args.output,
     # and we need to close here.
     if args.output and save_sigs is not None:
         save_sigs.close()
-        notify(f"saved {len(save_sigs)} signature(s) to '{save_sigs.location}'. Note: signature license is CC0.")
+        notify(
+            f"saved {len(save_sigs)} signature(s) to '{save_sigs.location}'. Note: signature license is CC0."
+        )
 
 
 def _compute_merged(args, signatures_factory):
@@ -267,26 +287,30 @@ def _compute_merged(args, signatures_factory):
     total_seq = 0
     for filename in args.filenames:
         # consume & calculate signatures
-        notify('... reading sequences from {}', filename)
+        notify("... reading sequences from {}", filename)
 
         n = None
         with screed.open(filename) as f:
             for n, record in enumerate(f):
                 if n % 10000 == 0 and n:
-                    notify('\r... {} {}', filename, n, end='')
+                    notify("\r... {} {}", filename, n, end="")
 
-                add_seq(sigs, record.sequence,
-                        args.input_is_protein, args.check_sequence)
+                add_seq(
+                    sigs, record.sequence, args.input_is_protein, args.check_sequence
+                )
         if n is not None:
-            notify('... {} {} sequences', filename, n + 1)
+            notify("... {} {} sequences", filename, n + 1)
             total_seq += n + 1
         else:
             notify(f"no sequences found in '{filename}'?!")
 
     if total_seq:
         set_sig_name(sigs, filename, name=args.merge)
-        notify('calculated 1 signature for {} sequences taken from {} files',
-               total_seq, len(args.filenames))
+        notify(
+            "calculated 1 signature for {} sequences taken from {} files",
+            total_seq,
+            len(args.filenames),
+        )
 
         # at end, save!
         save_siglist(sigs, args.output)
@@ -301,8 +325,8 @@ def add_seq(sigs, seq, input_is_protein, check_sequence):
 
 
 def set_sig_name(sigs, filename, name=None):
-    if filename == '-':         # if stdin, set filename to empty.
-        filename = ''
+    if filename == "-":  # if stdin, set filename to empty.
+        filename = ""
     for sig in sigs:
         if name is not None:
             sig._name = name
@@ -332,17 +356,19 @@ def save_sigs_to_location(siglist, save_sig):
 class ComputeParameters(RustObject):
     __dealloc_func__ = lib.computeparams_free
 
-    def __init__(self,
-                 *,
-                 ksizes=(21, 31, 51),
-                 seed=42,
-                 protein=False,
-                 dayhoff=False,
-                 hp=False,
-                 dna=True,
-                 num_hashes=500,
-                 track_abundance=False,
-                 scaled=0):
+    def __init__(
+        self,
+        *,
+        ksizes=(21, 31, 51),
+        seed=42,
+        protein=False,
+        dayhoff=False,
+        hp=False,
+        dna=True,
+        num_hashes=500,
+        track_abundance=False,
+        scaled=0,
+    ):
         self._objptr = lib.computeparams_new()
 
         self.seed = seed
@@ -359,31 +385,33 @@ def __init__(self,
     def from_manifest_row(cls, row):
         "convert a CollectionManifest row into a ComputeParameters object"
         is_dna = is_protein = is_dayhoff = is_hp = False
-        if row['moltype'] == 'DNA':
+        if row["moltype"] == "DNA":
             is_dna = True
-        elif row['moltype'] == 'protein':
+        elif row["moltype"] == "protein":
             is_protein = True
-        elif row['moltype'] == 'hp':
+        elif row["moltype"] == "hp":
             is_hp = True
-        elif row['moltype'] == 'dayhoff':
+        elif row["moltype"] == "dayhoff":
             is_dayhoff = True
         else:
             assert 0
 
         if is_dna:
-            ksize = row['ksize']
+            ksize = row["ksize"]
         else:
-            ksize = row['ksize'] * 3
-
-        p = cls(ksizes=[ksize],
-                seed=DEFAULT_MMHASH_SEED,
-                protein=is_protein,
-                dayhoff=is_dayhoff,
-                hp=is_hp,
-                dna=is_dna,
-                num_hashes=row['num'],
-                track_abundance=row['with_abundance'],
-                scaled=row['scaled'])
+            ksize = row["ksize"] * 3
+
+        p = cls(
+            ksizes=[ksize],
+            seed=DEFAULT_MMHASH_SEED,
+            protein=is_protein,
+            dayhoff=is_dayhoff,
+            hp=is_hp,
+            dna=is_dna,
+            num_hashes=row["num"],
+            track_abundance=row["with_abundance"],
+            scaled=row["scaled"],
+        )
 
         return p
 
@@ -400,7 +428,7 @@ def to_param_str(self):
         elif self.dayhoff:
             pi.append("dayhoff")
         else:
-            assert 0            # must be one of the previous
+            assert 0  # must be one of the previous
 
         if self.dna:
             kstr = [f"k={k}" for k in self.ksizes]
@@ -431,15 +459,17 @@ def __repr__(self):
         return f"ComputeParameters(ksizes={self.ksizes}, seed={self.seed}, protein={self.protein}, dayhoff={self.dayhoff}, hp={self.hp}, dna={self.dna}, num_hashes={self.num_hashes}, track_abundance={self.track_abundance}, scaled={self.scaled})"
 
     def __eq__(self, other):
-        return (self.ksizes == other.ksizes and
-                self.seed == other.seed and
-                self.protein == other.protein and
-                self.dayhoff == other.dayhoff and
-                self.hp == other.hp and
-                self.dna == other.dna and
-                self.num_hashes == other.num_hashes and
-                self.track_abundance == other.track_abundance and
-                self.scaled == other.scaled)
+        return (
+            self.ksizes == other.ksizes
+            and self.seed == other.seed
+            and self.protein == other.protein
+            and self.dayhoff == other.dayhoff
+            and self.hp == other.hp
+            and self.dna == other.dna
+            and self.num_hashes == other.num_hashes
+            and self.track_abundance == other.track_abundance
+            and self.scaled == other.scaled
+        )
 
     @staticmethod
     def from_args(args):
@@ -509,11 +539,16 @@ def dna(self, v):
 
     @property
     def moltype(self):
-        if self.dna: moltype = 'DNA'
-        elif self.protein: moltype = 'protein'
-        elif self.hp: moltype = 'hp'
-        elif self.dayhoff: moltype = 'dayhoff'
-        else: assert 0
+        if self.dna:
+            moltype = "DNA"
+        elif self.protein:
+            moltype = "protein"
+        elif self.hp:
+            moltype = "hp"
+        elif self.dayhoff:
+            moltype = "dayhoff"
+        else:
+            assert 0
 
         return moltype
 
diff --git a/src/sourmash/command_sketch.py b/src/sourmash/command_sketch.py
index f79e3a5fc8..508cac7c01 100644
--- a/src/sourmash/command_sketch.py
+++ b/src/sourmash/command_sketch.py
@@ -12,19 +12,24 @@
 import sourmash
 from .signature import SourmashSignature
 from .logging import notify, error, set_quiet, print_results
-from .command_compute import (_compute_individual, _compute_merged,
-                              ComputeParameters, add_seq, set_sig_name,
-                              DEFAULT_MMHASH_SEED)
+from .command_compute import (
+    _compute_individual,
+    _compute_merged,
+    ComputeParameters,
+    add_seq,
+    set_sig_name,
+    DEFAULT_MMHASH_SEED,
+)
 from sourmash import sourmash_args
 from sourmash.sourmash_args import check_scaled_bounds, check_num_bounds
 from sourmash.sig.__main__ import _summarize_manifest, _SketchInfo
 from sourmash.manifest import CollectionManifest
 
 DEFAULTS = dict(
-    dna='k=31,scaled=1000,noabund',
-    protein='k=10,scaled=200,noabund',
-    dayhoff='k=16,scaled=200,noabund',
-    hp='k=42,scaled=200,noabund'
+    dna="k=31,scaled=1000,noabund",
+    protein="k=10,scaled=200,noabund",
+    dayhoff="k=16,scaled=200,noabund",
+    hp="k=42,scaled=200,noabund",
 )
 
 
@@ -32,21 +37,21 @@ def _parse_params_str(params_str):
     "Parse a parameter string of the form 'k=ks,num=num,scaled=scaled,abund'."
     moltype = None
     params = {}
-    params['ksize'] = []
-    items = params_str.split(',')
+    params["ksize"] = []
+    items = params_str.split(",")
     for item in items:
-        if item == 'abund':
-            params['track_abundance'] = True
-        elif item == 'noabund':
-            params['track_abundance'] = False
-        elif item.startswith('k'):
-            if len(item) < 3 or item[1] != '=':
+        if item == "abund":
+            params["track_abundance"] = True
+        elif item == "noabund":
+            params["track_abundance"] = False
+        elif item.startswith("k"):
+            if len(item) < 3 or item[1] != "=":
                 raise ValueError("k takes a parameter, e.g. 'k=31'")
-            params['ksize'].append(int(item[2:]))
-        elif item.startswith('num'):
-            if len(item) < 5 or item[3] != '=':
+            params["ksize"].append(int(item[2:]))
+        elif item.startswith("num"):
+            if len(item) < 5 or item[3] != "=":
                 raise ValueError("num takes a parameter, e.g. 'num=500'")
-            if params.get('scaled'):
+            if params.get("scaled"):
                 raise ValueError("cannot set both num and scaled in a single minhash")
             try:
                 num = item[4:]
@@ -56,12 +61,12 @@ def _parse_params_str(params_str):
 
             num = check_num_bounds(num)
 
-            params['num'] = int(item[4:])
-            params['scaled'] = 0
-        elif item.startswith('scaled'):
-            if len(item) < 8 or item[6] != '=':
+            params["num"] = int(item[4:])
+            params["scaled"] = 0
+        elif item.startswith("scaled"):
+            if len(item) < 8 or item[6] != "=":
                 raise ValueError("scaled takes a parameter, e.g. 'scaled=1000'")
-            if params.get('num'):
+            if params.get("num"):
                 raise ValueError("cannot set both num and scaled in a single minhash")
             try:
                 scaled = item[7:]
@@ -71,13 +76,13 @@ def _parse_params_str(params_str):
 
             scaled = check_scaled_bounds(scaled)
 
-            params['scaled'] = scaled
-            params['num'] = 0
-        elif item.startswith('seed'):
-            if len(item) < 6 or item[4] != '=':
+            params["scaled"] = scaled
+            params["num"] = 0
+        elif item.startswith("seed"):
+            if len(item) < 6 or item[4] != "=":
                 raise ValueError("seed takes a parameter, e.g. 'seed=42'")
-            params['seed'] = int(item[5:])
-        elif item in ('protein', 'dayhoff', 'hp', 'dna'):
+            params["seed"] = int(item[5:])
+        elif item in ("protein", "dayhoff", "hp", "dna"):
             moltype = item
         else:
             raise ValueError(f"unknown component '{item}' in params string")
@@ -87,12 +92,13 @@ def _parse_params_str(params_str):
 
 class _signatures_for_sketch_factory:
     "Build sigs on demand, based on args input to 'sketch'."
+
     def __init__(self, params_str_list, default_moltype):
         # first, set up defaults per-moltype
         defaults = {}
         for moltype, pstr in DEFAULTS.items():
             mt, d = _parse_params_str(pstr)
-            assert mt is None             # defaults cannot have moltype set!
+            assert mt is None  # defaults cannot have moltype set!
             defaults[moltype] = d
         self.defaults = defaults
 
@@ -105,19 +111,27 @@ def __init__(self, params_str_list, default_moltype):
             # provided.
             for params_str in params_str_list:
                 moltype, params = _parse_params_str(params_str)
-                if moltype and moltype != 'dna' and default_moltype == 'dna':
-                    raise ValueError(f"Incompatible sketch type ({default_moltype}) and parameter override ({moltype}) in '{params_str}'; maybe use 'sketch translate'?")
-                elif moltype == 'dna' and default_moltype and default_moltype != 'dna':
-                    raise ValueError(f"Incompatible sketch type ({default_moltype}) and parameter override ({moltype}) in '{params_str}'")
+                if moltype and moltype != "dna" and default_moltype == "dna":
+                    raise ValueError(
+                        f"Incompatible sketch type ({default_moltype}) and parameter override ({moltype}) in '{params_str}'; maybe use 'sketch translate'?"
+                    )
+                elif moltype == "dna" and default_moltype and default_moltype != "dna":
+                    raise ValueError(
+                        f"Incompatible sketch type ({default_moltype}) and parameter override ({moltype}) in '{params_str}'"
+                    )
                 elif moltype is None:
                     if default_moltype is None:
-                        raise ValueError(f"No default moltype and none specified in param string")
+                        raise ValueError(
+                            "No default moltype and none specified in param string"
+                        )
                     moltype = default_moltype
 
                 self.params_list.append((moltype, params))
         else:
             if default_moltype is None:
-                raise ValueError(f"No default moltype and none specified in param string")
+                raise ValueError(
+                    "No default moltype and none specified in param string"
+                )
             # no params str? default to a single sig, using default_moltype.
             self.params_list.append((default_moltype, {}))
 
@@ -125,38 +139,37 @@ def get_compute_params(self, *, split_ksizes=False):
         for moltype, params_d in self.params_list:
             # get defaults for this moltype from self.defaults:
             default_params = self.defaults[moltype]
-            def_seed = default_params.get('seed', DEFAULT_MMHASH_SEED)
-            def_num = default_params.get('num', 0)
-            def_abund = default_params['track_abundance']
-            def_scaled = default_params.get('scaled', 0)
-            def_dna = default_params.get('is_dna', moltype == 'dna')
-            def_protein = default_params.get('is_protein',
-                                             moltype == 'protein')
-            def_dayhoff = default_params.get('is_dayhoff',
-                                             moltype == 'dayhoff')
-            def_hp = default_params.get('is_hp', moltype == 'hp')
+            def_seed = default_params.get("seed", DEFAULT_MMHASH_SEED)
+            def_num = default_params.get("num", 0)
+            def_abund = default_params["track_abundance"]
+            def_scaled = default_params.get("scaled", 0)
+            def_dna = default_params.get("is_dna", moltype == "dna")
+            def_protein = default_params.get("is_protein", moltype == "protein")
+            def_dayhoff = default_params.get("is_dayhoff", moltype == "dayhoff")
+            def_hp = default_params.get("is_hp", moltype == "hp")
 
             # handle ksize specially, for now - multiply by three?
-            def_ksizes = default_params['ksize']
-            ksizes = params_d.get('ksize')
+            def_ksizes = default_params["ksize"]
+            ksizes = params_d.get("ksize")
             if not ksizes:
                 ksizes = def_ksizes
 
             # 'command sketch' adjusts k-mer sizes by 3 if non-DNA sketch.
             if self.mult_ksize_by_3 and not def_dna:
-                ksizes = [ k*3 for k in ksizes ]
-
-            make_param = lambda ksizes: ComputeParameters(
-                                            ksizes=ksizes,
-                                            seed=params_d.get('seed', def_seed),
-                                            protein=def_protein,
-                                            dayhoff=def_dayhoff,
-                                            hp=def_hp,
-                                            dna=def_dna,
-                                            num_hashes=params_d.get('num', def_num),
-                                            track_abundance=params_d.get('track_abundance',
-                                                                         def_abund),
-                                            scaled=params_d.get('scaled', def_scaled))
+                ksizes = [k * 3 for k in ksizes]
+
+            def make_param(ksizes):
+                return ComputeParameters(
+                    ksizes=ksizes,
+                    seed=params_d.get("seed", def_seed),
+                    protein=def_protein,
+                    dayhoff=def_dayhoff,
+                    hp=def_hp,
+                    dna=def_dna,
+                    num_hashes=params_d.get("num", def_num),
+                    track_abundance=params_d.get("track_abundance", def_abund),
+                    scaled=params_d.get("scaled", def_scaled),
+                )
 
             if split_ksizes:
                 for ksize in ksizes:
@@ -179,6 +192,7 @@ def __call__(self, *, split_ksizes=False):
 def _add_from_file_to_filenames(args):
     "Add filenames from --from-file to args.filenames"
     from .sourmash_args import load_pathlist_from_file
+
     if args.from_file:
         file_list = load_pathlist_from_file(args.from_file)
         args.filenames.extend(file_list)
@@ -189,11 +203,11 @@ def _execute_sketch(args, signatures_factory):
     set_quiet(args.quiet)
 
     if not args.filenames:
-        error('error: no input filenames provided! nothing to do - exiting.')
+        error("error: no input filenames provided! nothing to do - exiting.")
         sys.exit(-1)
 
-    if args.license != 'CC0':
-        error('error: sourmash only supports CC0-licensed signatures. sorry!')
+    if args.license != "CC0":
+        error("error: sourmash only supports CC0-licensed signatures. sorry!")
         sys.exit(-1)
 
     notify(f'computing signatures for files: {", ".join(args.filenames)}')
@@ -208,15 +222,15 @@ def _execute_sketch(args, signatures_factory):
 
     # get number of output sigs:
     num_sigs = len(signatures_factory.params_list)
-    notify(f'Computing a total of {num_sigs} signature(s) for each input.')
+    notify(f"Computing a total of {num_sigs} signature(s) for each input.")
 
     if num_sigs == 0:
-        error('...nothing to calculate!? Exiting!')
+        error("...nothing to calculate!? Exiting!")
         sys.exit(-1)
 
-    if args.merge:               # single name specified - combine all
+    if args.merge:  # single name specified - combine all
         _compute_merged(args, signatures_factory)
-    else:                        # compute individual signatures
+    else:  # compute individual signatures
         _compute_individual(args, signatures_factory)
 
 
@@ -229,8 +243,7 @@ def dna(args):
     args.input_is_protein = False
 
     try:
-        signatures_factory = _signatures_for_sketch_factory(args.param_string,
-                                                            'dna')
+        signatures_factory = _signatures_for_sketch_factory(args.param_string, "dna")
     except ValueError as e:
         error(f"Error creating signatures: {str(e)}")
         sys.exit(-1)
@@ -252,15 +265,14 @@ def protein(args):
     if args.dayhoff and args.hp:
         raise ValueError("cannot set both --dayhoff and --hp")
     if args.dayhoff:
-        moltype = 'dayhoff'
+        moltype = "dayhoff"
     elif args.hp:
-        moltype = 'hp'
+        moltype = "hp"
     else:
-        moltype = 'protein'
+        moltype = "protein"
 
     try:
-        signatures_factory = _signatures_for_sketch_factory(args.param_string,
-                                                            moltype)
+        signatures_factory = _signatures_for_sketch_factory(args.param_string, moltype)
     except ValueError as e:
         error(f"Error creating signatures: {str(e)}")
         sys.exit(-1)
@@ -281,15 +293,14 @@ def translate(args):
     if args.dayhoff and args.hp:
         raise ValueError("cannot set both --dayhoff and --hp")
     if args.dayhoff:
-        moltype = 'dayhoff'
+        moltype = "dayhoff"
     elif args.hp:
-        moltype = 'hp'
+        moltype = "hp"
     else:
-        moltype = 'protein'
+        moltype = "protein"
 
     try:
-        signatures_factory = _signatures_for_sketch_factory(args.param_string,
-                                                            moltype)
+        signatures_factory = _signatures_for_sketch_factory(args.param_string, moltype)
     except ValueError as e:
         error(f"Error creating signatures: {str(e)}")
         sys.exit(-1)
@@ -317,47 +328,51 @@ def _compute_sigs(to_build, output, *, check_sequence=False):
 
             is_dna = param_objs[0].dna
             for p in param_objs:
-                if p.dna: assert is_dna
+                if p.dna:
+                    assert is_dna
                 sig = SourmashSignature.from_params(p)
                 sigs.append(sig)
 
             input_is_protein = not is_dna
 
             # read sequence records & sketch
-            notify(f'... reading sequences from {filename}')
+            notify(f"... reading sequences from {filename}")
             for n, record in enumerate(screed_iter):
                 if n % 10000 == 0:
                     if n:
-                        notify('\r...{} {}', filename, n, end='')
+                        notify("\r...{} {}", filename, n, end="")
 
                 try:
-                    add_seq(sigs, record.sequence, input_is_protein,
-                            check_sequence)
+                    add_seq(sigs, record.sequence, input_is_protein, check_sequence)
                 except ValueError as exc:
                     error(f"ERROR when reading from '{filename}' - ")
                     error(str(exc))
                     sys.exit(-1)
 
-            notify('...{} {} sequences', filename, n, end='')
+            notify("...{} {} sequences", filename, n, end="")
 
             set_sig_name(sigs, filename, name)
             for sig in sigs:
                 save_sigs.add(sig)
 
-            notify(f'calculated {len(sigs)} signatures for {n+1} sequences in {filename}')
-
+            notify(
+                f"calculated {len(sigs)} signatures for {n+1} sequences in {filename}"
+            )
 
     save_sigs.close()
-    notify(f"saved {len(save_sigs)} signature(s) to '{save_sigs.location}'. Note: signature license is CC0.")
+    notify(
+        f"saved {len(save_sigs)} signature(s) to '{save_sigs.location}'. Note: signature license is CC0."
+    )
 
 
 def _output_csv_info(filename, sigs_to_build):
     "output information about what signatures to build, in CSV format"
     output_n = 0
     with sourmash_args.FileOutputCSV(filename) as csv_fp:
-        w = csv.DictWriter(csv_fp, fieldnames=['filename', 'sketchtype',
-                                               'output_index', 'name',
-                                               'param_strs'])
+        w = csv.DictWriter(
+            csv_fp,
+            fieldnames=["filename", "sketchtype", "output_index", "name", "param_strs"],
+        )
         w.writeheader()
 
         output_n = 0
@@ -366,18 +381,22 @@ def _output_csv_info(filename, sigs_to_build):
 
             # should all be the same!
             if param_objs[0].dna:
-                assert all( ( p.dna for p in param_objs ) )
+                assert all(p.dna for p in param_objs)
                 sketchtype = "dna"
             else:
-                assert not any( ( p.dna for p in param_objs ) )
+                assert not any(p.dna for p in param_objs)
                 sketchtype = "protein"
 
             for p in param_objs:
                 param_strs.append(p.to_param_str())
 
-            row = dict(filename=filename, sketchtype=sketchtype,
-                       param_strs="-p " + " -p ".join(param_strs),
-                       name=name, output_index=output_n)
+            row = dict(
+                filename=filename,
+                sketchtype=sketchtype,
+                param_strs="-p " + " -p ".join(param_strs),
+                name=name,
+                output_index=output_n,
+            )
 
             w.writerow(row)
 
@@ -385,15 +404,19 @@ def _output_csv_info(filename, sigs_to_build):
 
 
 def fromfile(args):
-    if args.license != 'CC0':
-        error('error: sourmash only supports CC0-licensed signatures. sorry!')
+    if args.license != "CC0":
+        error("error: sourmash only supports CC0-licensed signatures. sorry!")
         sys.exit(-1)
 
     if args.output_signatures and os.path.exists(args.output_signatures):
         if not args.force_output_already_exists:
-            error(f"** ERROR: output location '{args.output_signatures}' already exists!")
-            error(f"** Not overwriting/appending.")
-            error(f"** Use --force-output-already-exists if you want to overwrite/append.")
+            error(
+                f"** ERROR: output location '{args.output_signatures}' already exists!"
+            )
+            error("** Not overwriting/appending.")
+            error(
+                "** Use --force-output-already-exists if you want to overwrite/append."
+            )
             sys.exit(-1)
 
     # now, create the set of desired sketch specs.
@@ -429,13 +452,13 @@ def fromfile(args):
     for csvfile in args.csvs:
         with sourmash_args.FileInputCSV(csvfile) as r:
             for row in r:
-                name = row['name']
+                name = row["name"]
                 if not name:
                     n_missing_name += 1
                     continue
 
-                genome = row['genome_filename']
-                proteome = row['protein_filename']
+                genome = row["genome_filename"]
+                proteome = row["protein_filename"]
                 total_rows += 1
 
                 if name in all_names:
@@ -447,8 +470,10 @@ def fromfile(args):
     fail_exit = False
     if n_duplicate_name:
         if args.report_duplicated:
-            notify("duplicated:\n" + '\n'.join(sorted(duplicate_names)))
-        error(f"** ERROR: {n_duplicate_name} entries have duplicate 'name' records. Exiting!")
+            notify("duplicated:\n" + "\n".join(sorted(duplicate_names)))
+        error(
+            f"** ERROR: {n_duplicate_name} entries have duplicate 'name' records. Exiting!"
+        )
         fail_exit = True
 
     if n_missing_name:
@@ -470,7 +495,7 @@ def fromfile(args):
 
         # for each manifest row,
         for row in manifest.rows:
-            name = row['name']
+            name = row["name"]
             if name:
                 # build a ComputeParameters object for later comparison
                 p = ComputeParameters.from_manifest_row(row)
@@ -505,7 +530,7 @@ def fromfile(args):
             if p not in plist:
                 # nope - figure out genome/proteome needed
                 filename = genome if p.dna else proteome
-                filetype = 'genome' if p.dna else 'proteome'
+                filetype = "genome" if p.dna else "proteome"
 
                 if filename:
                     # add to build list
@@ -524,77 +549,91 @@ def fromfile(args):
 
     if already_done_manifest:
         info_d = _summarize_manifest(already_done_manifest)
-        print_results('---')
+        print_results("---")
         print_results("summary of already-done sketches:")
 
-        for ski in info_d['sketch_info']:
-            mh_type = f"num={ski['num']}" if ski['num'] else f"scaled={ski['scaled']}"
-            mh_abund = ", abund" if ski['abund'] else ""
+        for ski in info_d["sketch_info"]:
+            mh_type = f"num={ski['num']}" if ski["num"] else f"scaled={ski['scaled']}"
+            mh_abund = ", abund" if ski["abund"] else ""
 
             sketch_str = f"{ski['count']} sketches with {ski['moltype']}, k={ski['ksize']}, {mh_type}{mh_abund}"
 
             print_results(f"   {sketch_str: <50} {ski['n_hashes']} total hashes")
 
-        print_results('---')
+        print_results("---")
 
     if args.output_manifest_matching:
         already_done_manifest.write_to_filename(args.output_manifest_matching)
-        notify(f"output {len(already_done_manifest)} already-done signatures to '{args.output_manifest_matching}' in manifest format.")
+        notify(
+            f"output {len(already_done_manifest)} already-done signatures to '{args.output_manifest_matching}' in manifest format."
+        )
 
     if missing:
         error("** ERROR: we cannot build some of the requested signatures.")
-        error(f"** {missing_count} total signatures (for {len(missing)} names) cannot be built.")
+        error(
+            f"** {missing_count} total signatures (for {len(missing)} names) cannot be built."
+        )
         if args.ignore_missing:
             error("** (continuing past this error because --ignore-missing was set)")
         else:
             sys.exit(-1)
 
-    notify(f"** {total_sigs - skipped_sigs} new signatures to build from {len(to_build)} files;")
+    notify(
+        f"** {total_sigs - skipped_sigs} new signatures to build from {len(to_build)} files;"
+    )
     if not to_build:
-        notify(f"** Nothing to build. Exiting!")
+        notify("** Nothing to build. Exiting!")
         sys.exit(0)
 
     if skipped_sigs:
         notify(f"** {skipped_sigs} already exist, so skipping those.")
     else:
-        notify(f"** we found no pre-existing signatures that match.")
+        notify("** we found no pre-existing signatures that match.")
 
     ## first, print out a summary of to_build:
 
-    print_results('---')
+    print_results("---")
     print_results("summary of sketches to build:")
 
     counter = Counter()
-    build_info_d = {}
     for filename, param_objs in to_build.items():
         for p in param_objs:
-            moltype = p.moltype
             assert len(p.ksizes) == 1
             ksize = p.ksizes[0]
-            if not p.dna: ksize //= 3
-
-            ski = _SketchInfo(ksize=ksize, moltype=p.moltype,
-                              scaled=p.scaled, num=p.num_hashes,
-                              abund=p.track_abundance)
+            if not p.dna:
+                ksize //= 3
+
+            ski = _SketchInfo(
+                ksize=ksize,
+                moltype=p.moltype,
+                scaled=p.scaled,
+                num=p.num_hashes,
+                abund=p.track_abundance,
+            )
             counter[ski] += 1
 
     for ski, count in counter.items():
         mh_type = f"num={ski.num}" if ski.num else f"scaled={ski.scaled}"
         mh_abund = ", abund" if ski.abund else ""
 
-        sketch_str = f"{count} sketches with {ski.moltype}, k={ski.ksize}, {mh_type}{mh_abund}"
+        sketch_str = (
+            f"{count} sketches with {ski.moltype}, k={ski.ksize}, {mh_type}{mh_abund}"
+        )
 
         print_results(f"   {sketch_str: <50}")
 
-    print_results('---')
+    print_results("---")
 
     ## now, onward ho - do we build anything, or output stuff, or just exit?
 
-    if args.output_signatures:                   # actually compute
-        _compute_sigs(to_build, args.output_signatures,
-                      check_sequence=args.check_sequence)
+    if args.output_signatures:  # actually compute
+        _compute_sigs(
+            to_build, args.output_signatures, check_sequence=args.check_sequence
+        )
 
-    if args.output_csv_info: # output info necessary to construct
+    if args.output_csv_info:  # output info necessary to construct
         _output_csv_info(args.output_csv_info, to_build)
 
-    notify(f"** {total_sigs} total requested; output {total_sigs - skipped_sigs}, skipped {skipped_sigs}")
+    notify(
+        f"** {total_sigs} total requested; output {total_sigs - skipped_sigs}, skipped {skipped_sigs}"
+    )
diff --git a/src/sourmash/commands.py b/src/sourmash/commands.py
index 7de69c5621..e2d1a09a50 100644
--- a/src/sourmash/commands.py
+++ b/src/sourmash/commands.py
@@ -9,20 +9,24 @@
 import io
 
 import screed
-from .compare import (compare_all_pairs, compare_serial_containment,
-                      compare_serial_max_containment, compare_serial_avg_containment)
+from .compare import (
+    compare_all_pairs,
+    compare_serial_containment,
+    compare_serial_max_containment,
+    compare_serial_avg_containment,
+)
 from . import MinHash
 from .sbtmh import load_sbt_index, create_sbt_index
 from . import signature as sig
 from . import sourmash_args
 from .logging import notify, error, print_results, set_quiet
-from .sourmash_args import (FileOutput, FileOutputCSV,
-                            SaveSignaturesToLocation)
+from .sourmash_args import FileOutput, FileOutputCSV, SaveSignaturesToLocation
 from .search import prefetch_database, PrefetchResult
 from .index import LazyLinearIndex
 
 WATERMARK_SIZE = 10000
 
+
 def _get_screen_width():
     # default fallback is 80x24
     (col, rows) = shutil.get_terminal_size()
@@ -52,17 +56,21 @@ def compare(args):
     moltypes = set()
     size_may_be_inaccurate = False
     for filename in inp_files:
-        notify(f"loading '{filename}'", end='\r')
-        loaded = sourmash_args.load_file_as_signatures(filename,
-                                                       ksize=args.ksize,
-                                                       select_moltype=moltype,
-                                                       picklist=picklist,
-                                                       yield_all_files=args.force,
-                                                       progress=progress,
-                                                       pattern=pattern_search)
+        notify(f"loading '{filename}'", end="\r")
+        loaded = sourmash_args.load_file_as_signatures(
+            filename,
+            ksize=args.ksize,
+            select_moltype=moltype,
+            picklist=picklist,
+            yield_all_files=args.force,
+            progress=progress,
+            pattern=pattern_search,
+        )
         loaded = list(loaded)
         if not loaded:
-            notify(f'\nwarning: no signatures loaded at given ksize/molecule type/picklist from {filename}')
+            notify(
+                f"\nwarning: no signatures loaded at given ksize/molecule type/picklist from {filename}"
+            )
         siglist.extend(loaded)
 
         # track ksizes/moltypes
@@ -75,22 +83,22 @@ def compare(args):
             break
 
     if not siglist:
-        error('no signatures found! exiting.')
+        error("no signatures found! exiting.")
         sys.exit(-1)
 
     # check ksizes and type
     if len(ksizes) > 1:
-        error('multiple k-mer sizes loaded; please specify one with -k.')
+        error("multiple k-mer sizes loaded; please specify one with -k.")
         ksizes = sorted(ksizes)
-        error('(saw k-mer sizes {})'.format(', '.join(map(str, ksizes))))
+        error("(saw k-mer sizes {})".format(", ".join(map(str, ksizes))))
         sys.exit(-1)
 
     if len(moltypes) > 1:
-        error('multiple molecule types loaded; please specify --dna, --protein')
+        error("multiple molecule types loaded; please specify --dna, --protein")
         sys.exit(-1)
 
-    notify(' '*79, end='\r')
-    notify(f'loaded {format(len(siglist))} signatures total.')
+    notify(" " * 79, end="\r")
+    notify(f"loaded {format(len(siglist))} signatures total.")
 
     if picklist:
         sourmash_args.report_picklist(args, picklist)
@@ -103,21 +111,27 @@ def compare(args):
 
     # complain if it's not all one or the other
     if is_scaled != is_scaled_2:
-        error('ERROR: cannot mix scaled signatures with num signatures')
+        error("ERROR: cannot mix scaled signatures with num signatures")
         sys.exit(-1)
 
     is_containment = False
     if args.containment or args.max_containment or args.avg_containment:
         is_containment = True
 
-        containment_args = [args.containment, args.max_containment, args.avg_containment]
+        containment_args = [
+            args.containment,
+            args.max_containment,
+            args.avg_containment,
+        ]
         if sum(containment_args) > 1:
             notify("ERROR: cannot specify more than one containment argument!")
             sys.exit(-1)
 
     # complain if --containment and not is_scaled
     if is_containment and not is_scaled:
-        error('must use scaled signatures with --containment, --max-containment, and --avg-containment')
+        error(
+            "must use scaled signatures with --containment, --max-containment, and --avg-containment"
+        )
         sys.exit(-1)
 
     # complain if --ani and not is_scaled
@@ -126,14 +140,16 @@ def compare(args):
         return_ani = True
 
     if return_ani and not is_scaled:
-        error('must use scaled signatures with --estimate-ani')
+        error("must use scaled signatures with --estimate-ani")
         sys.exit(-1)
 
     # notify about implicit --ignore-abundance:
     if is_containment or return_ani:
-        track_abundances = any(( s.minhash.track_abundance for s in siglist ))
+        track_abundances = any(s.minhash.track_abundance for s in siglist)
         if track_abundances:
-            notify('NOTE: --containment, --max-containment, --avg-containment, and --estimate-ani ignore signature abundances.')
+            notify(
+                "NOTE: --containment, --max-containment, --avg-containment, and --estimate-ani ignore signature abundances."
+            )
 
     # if using scaled sketches or --scaled, downsample to common max scaled.
     printed_scaled_msg = False
@@ -144,7 +160,9 @@ def compare(args):
 
             max_scaled = max(max_scaled, args.scaled)
             if max_scaled > args.scaled:
-                notify(f"WARNING: --scaled specified {args.scaled}, but max scaled of sketches is {max_scaled}")
+                notify(
+                    f"WARNING: --scaled specified {args.scaled}, but max scaled of sketches is {max_scaled}"
+                )
                 notify(f"WARNING: continuing with scaled value of {max_scaled}.")
 
         new_siglist = []
@@ -153,7 +171,9 @@ def compare(args):
                 size_may_be_inaccurate = True
             if s.minhash.scaled != max_scaled:
                 if not printed_scaled_msg:
-                    notify(f'NOTE: downsampling to scaled value of {format(max_scaled)}')
+                    notify(
+                        f"NOTE: downsampling to scaled value of {format(max_scaled)}"
+                    )
                     printed_scaled_msg = True
                 with s.update() as s:
                     s.minhash = s.minhash.downsample(scaled=max_scaled)
@@ -166,10 +186,10 @@ def compare(args):
         sys.exit(-1)
 
     if len(siglist) == 0:
-        error('no signatures!')
+        error("no signatures!")
         sys.exit(-1)
 
-    notify('')
+    notify("")
 
     # build the distance matrix
     numpy.set_printoptions(precision=3, suppress=True)
@@ -184,8 +204,9 @@ def compare(args):
     elif args.avg_containment:
         similarity = compare_serial_avg_containment(siglist, return_ani=return_ani)
     else:
-        similarity = compare_all_pairs(siglist, args.ignore_abundance,
-                                       n_jobs=args.processes, return_ani=return_ani)
+        similarity = compare_all_pairs(
+            siglist, args.ignore_abundance, n_jobs=args.processes, return_ani=return_ani
+        )
 
     # if distance matrix desired, switch to 1-similarity
     if args.distance_matrix:
@@ -196,25 +217,33 @@ def compare(args):
     if len(siglist) < 30:
         for i, ss in enumerate(siglist):
             # for small matrices, pretty-print some output
-            name_num = '{}-{}'.format(i, str(ss))
+            name_num = f"{i}-{str(ss)}"
             if len(name_num) > 20:
-                name_num = name_num[:17] + '...'
-            print_results('{:20s}\t{}'.format(name_num, matrix[i, :, ],))
+                name_num = name_num[:17] + "..."
+            print_results(
+                "{:20s}\t{}".format(
+                    name_num,
+                    matrix[
+                        i,
+                        :,
+                    ],
+                )
+            )
 
     if args.distance_matrix:
-        print_results('max distance in matrix: {:.3f}', numpy.max(matrix))
+        print_results("max distance in matrix: {:.3f}", numpy.max(matrix))
     else:
-        print_results('min similarity in matrix: {:.3f}', numpy.min(matrix))
+        print_results("min similarity in matrix: {:.3f}", numpy.min(matrix))
 
     # shall we output a matrix to stdout?
     if args.output:
-        labeloutname = args.output + '.labels.txt'
-        notify(f'saving labels to: {labeloutname}')
-        with open(labeloutname, 'w') as fp:
+        labeloutname = args.output + ".labels.txt"
+        notify(f"saving labels to: {labeloutname}")
+        with open(labeloutname, "w") as fp:
             fp.write("\n".join(labeltext))
 
-        notify(f'saving comparison matrix to: {args.output}')
-        with open(args.output, 'wb') as fp:
+        notify(f"saving comparison matrix to: {args.output}")
+        with open(args.output, "wb") as fp:
             numpy.save(fp, matrix)
 
     # output CSV?
@@ -231,15 +260,20 @@ def compare(args):
 
     if size_may_be_inaccurate:
         if args.distance_matrix:
-            notify("WARNING: size estimation for at least one of these sketches may be inaccurate. ANI distances will be set to 1 for these comparisons.")
+            notify(
+                "WARNING: size estimation for at least one of these sketches may be inaccurate. ANI distances will be set to 1 for these comparisons."
+            )
         else:
-            notify("WARNING: size estimation for at least one of these sketches may be inaccurate. ANI values will be set to 1 for these comparisons.")
+            notify(
+                "WARNING: size estimation for at least one of these sketches may be inaccurate. ANI values will be set to 1 for these comparisons."
+            )
 
 
 def plot(args):
     "Produce a clustering matrix and plot."
     import matplotlib as mpl
-    mpl.use('Agg')
+
+    mpl.use("Agg")
     import numpy
     import pylab
     import scipy.cluster.hierarchy as sch
@@ -248,16 +282,16 @@ def plot(args):
     # load files
     D_filename = args.distances
 
-    notify(f'loading comparison matrix from {D_filename}...')
-    with open(D_filename, 'rb') as f:
+    notify(f"loading comparison matrix from {D_filename}...")
+    with open(D_filename, "rb") as f:
         D = numpy.load(f)
     # not sure how to change this to use f-strings
-    notify('...got {} x {} matrix.', *D.shape)
+    notify("...got {} x {} matrix.", *D.shape)
 
     # see sourmash#2790 for details :)
     if args.labeltext or args.labels:
         display_labels = True
-        args.labels = True      # override => labels always true
+        args.labels = True  # override => labels always true
     elif args.labels is None and not args.indices:
         # default to labels
         args.labels = True
@@ -273,14 +307,14 @@ def plot(args):
         if args.labeltext:
             labelfilename = args.labeltext
         else:
-            labelfilename = D_filename + '.labels.txt'
+            labelfilename = D_filename + ".labels.txt"
 
-        notify(f'loading labels from {labelfilename}')
+        notify(f"loading labels from {labelfilename}")
         with open(labelfilename) as f:
-            labeltext = [ x.strip() for x in f ]
-        
+            labeltext = [x.strip() for x in f]
+
         if len(labeltext) != D.shape[0]:
-            error('{} labels != matrix size, exiting', len(labeltext))
+            error("{} labels != matrix size, exiting", len(labeltext))
             sys.exit(-1)
     elif args.indices:
         # construct integer labels
@@ -290,14 +324,14 @@ def plot(args):
         labeltext = [""] * D.shape[0]
 
     if args.pdf:
-        ext = '.pdf'
+        ext = ".pdf"
     else:
-        ext = '.png'
+        ext = ".png"
 
     # build filenames, decide on PDF/PNG output
-    dendrogram_out = os.path.basename(D_filename) + '.dendro' + ext
-    matrix_out = os.path.basename(D_filename) + '.matrix' + ext
-    hist_out = os.path.basename(D_filename) + '.hist' + ext
+    dendrogram_out = os.path.basename(D_filename) + ".dendro" + ext
+    matrix_out = os.path.basename(D_filename) + ".matrix" + ext
+    hist_out = os.path.basename(D_filename) + ".hist" + ext
 
     # output to a different directory?
     if args.output_dir:
@@ -308,13 +342,13 @@ def plot(args):
         hist_out = os.path.join(args.output_dir, hist_out)
 
     # make the histogram
-    notify(f'saving histogram of matrix values => {hist_out}')
-    fig = pylab.figure(figsize=(8,5))
+    notify(f"saving histogram of matrix values => {hist_out}")
+    fig = pylab.figure(figsize=(8, 5))
     pylab.hist(numpy.array(D.flat), bins=100)
     fig.savefig(hist_out)
 
     ### make the dendrogram:
-    fig = pylab.figure(figsize=(8,5))
+    fig = pylab.figure(figsize=(8, 5))
     ax1 = fig.add_axes([0.1, 0.1, 0.7, 0.8])
     ax1.set_xticks([])
     ax1.set_yticks([])
@@ -325,32 +359,36 @@ def plot(args):
 
         sample_idx = list(range(len(labeltext)))
         numpy.random.shuffle(sample_idx)
-        sample_idx = sample_idx[:args.subsample]
+        sample_idx = sample_idx[: args.subsample]
 
         np_idx = numpy.array(sample_idx)
         D = D[numpy.ix_(np_idx, np_idx)]
-        labeltext = [ labeltext[idx] for idx in sample_idx ]
+        labeltext = [labeltext[idx] for idx in sample_idx]
 
     ### do clustering
-    Y = sch.linkage(D, method='single')
-    sch.dendrogram(Y, orientation='right', labels=labeltext,
-                   no_labels=not display_labels)
+    Y = sch.linkage(D, method="single")
+    sch.dendrogram(
+        Y, orientation="right", labels=labeltext, no_labels=not display_labels
+    )
     fig.savefig(dendrogram_out)
-    notify(f'wrote dendrogram to: {dendrogram_out}')
+    notify(f"wrote dendrogram to: {dendrogram_out}")
 
     ### make the dendrogram+matrix:
-    (fig, rlabels, rmat) = sourmash_fig.plot_composite_matrix(D, labeltext,
-                                             show_labels=display_labels,
-                                             vmin=args.vmin,
-                                             vmax=args.vmax,
-                                             force=args.force)
+    (fig, rlabels, rmat) = sourmash_fig.plot_composite_matrix(
+        D,
+        labeltext,
+        show_labels=display_labels,
+        vmin=args.vmin,
+        vmax=args.vmax,
+        force=args.force,
+    )
     fig.savefig(matrix_out)
-    notify(f'wrote numpy distance matrix to: {matrix_out}')
+    notify(f"wrote numpy distance matrix to: {matrix_out}")
 
     if len(labeltext) < 30:
         # for small matrices, print out sample numbering for FYI.
         for i, name in enumerate(labeltext):
-            print_results('{}\t{}', i, name)
+            print_results("{}\t{}", i, name)
 
     # write out re-ordered matrix and labels
     if args.csv:
@@ -361,15 +399,15 @@ def plot(args):
             for i in range(len(rlabels)):
                 y = []
                 for j in range(len(rlabels)):
-                    y.append('{}'.format(rmat[i][j]))
+                    y.append(f"{rmat[i][j]}")
                 w.writerow(y)
-        notify(f'Wrote clustered matrix and labels out to {args.csv}')
+        notify(f"Wrote clustered matrix and labels out to {args.csv}")
 
 
 def import_csv(args):
     "Import a CSV file full of signatures/hashes."
 
-    with open(args.mash_csvfile, newline='') as fp:
+    with open(args.mash_csvfile, newline="") as fp:
         reader = csv.reader(fp)
         siglist = []
         for row in reader:
@@ -377,29 +415,29 @@ def import_csv(args):
             hashseed = int(row[1])
 
             # only support a limited import type, for now ;)
-            assert hashfn == 'murmur64'
+            assert hashfn == "murmur64"
             assert hashseed == 42
 
             _, _, ksize, name, hashes = row
             ksize = int(ksize)
 
             hashes = hashes.strip()
-            hashes = list(map(int, hashes.split(' ' )))
+            hashes = list(map(int, hashes.split(" ")))
 
             e = MinHash(len(hashes), ksize)
             e.add_many(hashes)
             s = sig.SourmashSignature(e, filename=name)
             siglist.append(s)
-            notify(f'loaded signature: {name} {s.md5sum()[:8]}')
+            notify(f"loaded signature: {name} {s.md5sum()[:8]}")
 
-        notify(f'saving {len(siglist)} signatures to JSON')
+        notify(f"saving {len(siglist)} signatures to JSON")
         with SaveSignaturesToLocation(args.output) as save_sig:
             save_sig.add_many(siglist)
 
 
 def sbt_combine(args):
     inp_files = list(args.sbts)
-    notify(f'combining {len(inp_files)} SBTs')
+    notify(f"combining {len(inp_files)} SBTs")
 
     tree = load_sbt_index(inp_files.pop(0))
 
@@ -426,11 +464,11 @@ def index(args):
         tree = create_sbt_index(args.bf_size, n_children=args.n_children)
 
     if args.sparseness < 0 or args.sparseness > 1.0:
-        error('sparseness must be in range [0.0, 1.0].')
+        error("sparseness must be in range [0.0, 1.0].")
 
     if args.scaled:
         args.scaled = int(args.scaled)
-        notify(f'downsampling signatures to scaled={args.scaled}')
+        notify(f"downsampling signatures to scaled={args.scaled}")
 
     inp_files = list(args.signatures)
     if args.from_file:
@@ -441,7 +479,7 @@ def index(args):
         error("ERROR: no files to index!? Supply on command line or use --from-file")
         sys.exit(-1)
 
-    notify(f'loading {len(inp_files)} files into SBT')
+    notify(f"loading {len(inp_files)} files into SBT")
 
     progress = sourmash_args.SignatureLoadingProgress()
 
@@ -451,12 +489,14 @@ def index(args):
     nums = set()
     scaleds = set()
     for f in inp_files:
-        siglist = sourmash_args.load_file_as_signatures(f,
-                                                        ksize=args.ksize,
-                                                        select_moltype=moltype,
-                                                        yield_all_files=args.force,
-                                                        picklist=picklist,
-                                                        progress=progress)
+        siglist = sourmash_args.load_file_as_signatures(
+            f,
+            ksize=args.ksize,
+            select_moltype=moltype,
+            yield_all_files=args.force,
+            picklist=picklist,
+            progress=progress,
+        )
 
         # load all matching signatures in this file
         ss = None
@@ -481,26 +521,29 @@ def index(args):
 
         # check to make sure we aren't loading incompatible signatures
         if len(ksizes) > 1 or len(moltypes) > 1:
-            error('multiple k-mer sizes or molecule types present; fail.')
-            error('specify --dna/--protein and --ksize as necessary')
-            error('ksizes: {}; moltypes: {}',
-                  ", ".join(map(str, ksizes)), ", ".join(moltypes))
+            error("multiple k-mer sizes or molecule types present; fail.")
+            error("specify --dna/--protein and --ksize as necessary")
+            error(
+                "ksizes: {}; moltypes: {}",
+                ", ".join(map(str, ksizes)),
+                ", ".join(moltypes),
+            )
             sys.exit(-1)
 
-        if nums == { 0 } and len(scaleds) == 1:
-            pass # good
-        elif scaleds == { 0 } and len(nums) == 1:
-            pass # also good
+        if nums == {0} and len(scaleds) == 1:
+            pass  # good
+        elif scaleds == {0} and len(nums) == 1:
+            pass  # also good
         else:
-            error('trying to build an SBT with incompatible signatures.')
-            error('nums = {}; scaleds = {}', repr(nums), repr(scaleds))
+            error("trying to build an SBT with incompatible signatures.")
+            error("nums = {}; scaleds = {}", repr(nums), repr(scaleds))
             sys.exit(-1)
 
-    notify('')
+    notify("")
 
     # did we load any!?
     if n == 0:
-        error('no signatures found to load into tree!? failing.')
+        error("no signatures found to load into tree!? failing.")
         sys.exit(-1)
 
     if picklist:
@@ -513,8 +556,10 @@ def index(args):
 
 
 def search(args):
-    from .search import (search_databases_with_flat_query,
-                         search_databases_with_abund_query)
+    from .search import (
+        search_databases_with_flat_query,
+        search_databases_with_abund_query,
+    )
 
     set_quiet(args.quiet, args.debug)
     moltype = sourmash_args.calculate_moltype(args)
@@ -522,18 +567,21 @@ def search(args):
     pattern_search = sourmash_args.load_include_exclude_db_patterns(args)
 
     # set up the query.
-    query = sourmash_args.load_query_signature(args.query,
-                                               ksize=args.ksize,
-                                               select_moltype=moltype,
-                                               select_md5=args.md5)
-    notify(f'loaded query: {str(query)[:30]}... (k={query.minhash.ksize}, {sourmash_args.get_moltype(query)})')
+    query = sourmash_args.load_query_signature(
+        args.query, ksize=args.ksize, select_moltype=moltype, select_md5=args.md5
+    )
+    notify(
+        f"loaded query: {str(query)[:30]}... (k={query.minhash.ksize}, {sourmash_args.get_moltype(query)})"
+    )
 
     if args.scaled:
         if not query.minhash.scaled:
-            error('cannot downsample a signature not created with --scaled')
+            error("cannot downsample a signature not created with --scaled")
             sys.exit(-1)
         if args.scaled != query.minhash.scaled:
-            notify(f'downsampling query from scaled={query.minhash.scaled} to {int(args.scaled)}')
+            notify(
+                f"downsampling query from scaled={query.minhash.scaled} to {int(args.scaled)}"
+            )
             with query.update() as query:
                 query.minhash = query.minhash.downsample(scaled=args.scaled)
 
@@ -544,11 +592,14 @@ def search(args):
             notify("ERROR: cannot specify both --containment and --max-containment!")
             sys.exit(-1)
 
-    databases = sourmash_args.load_dbs_and_sigs(args.databases, query,
-                                                not is_containment,
-                                                picklist=picklist,
-                                                pattern=pattern_search,
-                                                fail_on_empty_database=args.fail_on_empty_database)
+    databases = sourmash_args.load_dbs_and_sigs(
+        args.databases,
+        query,
+        not is_containment,
+        picklist=picklist,
+        pattern=pattern_search,
+        fail_on_empty_database=args.fail_on_empty_database,
+    )
 
     # handle signatures with abundance
     if query.minhash.track_abundance:
@@ -559,7 +610,9 @@ def search(args):
                     query.minhash = query.minhash.flatten()
         elif args.containment or args.max_containment:
             # abund sketch + keep abundance => no containment searches
-            notify("ERROR: cannot do containment searches on an abund signature; maybe specify --ignore-abundance?")
+            notify(
+                "ERROR: cannot do containment searches on an abund signature; maybe specify --ignore-abundance?"
+            )
             sys.exit(-1)
     else:
         # forcibly ignore abundances if query has no abundances
@@ -568,32 +621,40 @@ def search(args):
     # do the actual search
     if query.minhash.track_abundance:
         try:
-            results = search_databases_with_abund_query(query, databases,
-                                       threshold=args.threshold,
-                                       do_containment=args.containment,
-                                       do_max_containment=args.max_containment,
-                                       best_only=args.best_only,
-                                       unload_data=True)
+            results = search_databases_with_abund_query(
+                query,
+                databases,
+                threshold=args.threshold,
+                do_containment=args.containment,
+                do_max_containment=args.max_containment,
+                best_only=args.best_only,
+                unload_data=True,
+            )
         except TypeError as exc:
             error(f"ERROR: {str(exc)}")
             sys.exit(-1)
     else:
-        results = search_databases_with_flat_query(query, databases,
-                                   threshold=args.threshold,
-                                   do_containment=args.containment,
-                                   do_max_containment=args.max_containment,
-                                   best_only=args.best_only,
-                                   unload_data=True,
-                                   estimate_ani_ci=args.estimate_ani_ci)
+        results = search_databases_with_flat_query(
+            query,
+            databases,
+            threshold=args.threshold,
+            do_containment=args.containment,
+            do_max_containment=args.max_containment,
+            best_only=args.best_only,
+            unload_data=True,
+            estimate_ani_ci=args.estimate_ani_ci,
+        )
 
     n_matches = len(results)
     if args.best_only:
         args.num_results = 1
 
     if not args.num_results or n_matches <= args.num_results:
-        print_results(f'{len(results)} matches above threshold {args.threshold:0.3f}:')
+        print_results(f"{len(results)} matches above threshold {args.threshold:0.3f}:")
     else:
-        print_results(f'{len(results)} matches above threshold {args.threshold:0.3f}; showing first {args.num_results}:')
+        print_results(
+            f"{len(results)} matches above threshold {args.threshold:0.3f}; showing first {args.num_results}:"
+        )
 
         n_matches = args.num_results
 
@@ -604,9 +665,9 @@ def search(args):
     print_results("similarity   match")
     print_results("----------   -----")
     for sr in results[:n_matches]:
-        pct = '{:.1f}%'.format(sr.similarity*100)
+        pct = f"{sr.similarity * 100:.1f}%"
         name = sr.match._display_name(60)
-        print_results('{:>6}       {}', pct, name)
+        print_results("{:>6}       {}", pct, name)
         if sr.cmp_scaled is not None:
             if not size_may_be_inaccurate and sr.size_may_be_inaccurate:
                 size_may_be_inaccurate = True
@@ -637,9 +698,13 @@ def search(args):
         sourmash_args.report_picklist(args, picklist)
 
     if size_may_be_inaccurate:
-        notify("WARNING: size estimation for at least one of these sketches may be inaccurate. ANI values will not be reported for these comparisons.")
+        notify(
+            "WARNING: size estimation for at least one of these sketches may be inaccurate. ANI values will not be reported for these comparisons."
+        )
     if jaccard_ani_untrustworthy:
-        notify("WARNING: Jaccard estimation for at least one of these comparisons is likely inaccurate. Could not estimate ANI for these comparisons.")
+        notify(
+            "WARNING: Jaccard estimation for at least one of these comparisons is likely inaccurate. Could not estimate ANI for these comparisons."
+        )
 
 
 def categorize(args):
@@ -653,7 +718,7 @@ def categorize(args):
     # eliminate names we've already categorized
     already_names = set()
     if args.load_csv:
-        with open(args.load_csv, newline='') as fp:
+        with open(args.load_csv, newline="") as fp:
             r = csv.reader(fp)
             for row in r:
                 already_names.add(row[0])
@@ -668,13 +733,12 @@ def _yield_all_sigs(queries, ksize, moltype):
         for filename in queries:
             mi = MultiIndex.load_from_path(filename, False)
             mi = mi.select(ksize=ksize, moltype=moltype)
-            for ss, loc in mi.signatures_with_location():
-                yield ss, loc
+            yield from mi.signatures_with_location()
 
     csv_w = None
     csv_fp = None
     if args.csv:
-        csv_fp = open(args.csv, 'w', newline='')
+        csv_fp = open(args.csv, "w", newline="")
         csv_w = csv.writer(csv_fp)
 
     search_obj = make_jaccard_search_query(threshold=args.threshold)
@@ -683,7 +747,9 @@ def _yield_all_sigs(queries, ksize, moltype):
         if loc in already_names:
             continue
 
-        notify(f'loaded query: {str(orig_query)[:30]}... (k={orig_query.minhash.ksize}, {orig_query.minhash.moltype})')
+        notify(
+            f"loaded query: {str(orig_query)[:30]}... (k={orig_query.minhash.ksize}, {orig_query.minhash.moltype})"
+        )
 
         if args.ignore_abundance and orig_query.minhash.track_abundance:
             query = orig_query.copy()
@@ -691,7 +757,9 @@ def _yield_all_sigs(queries, ksize, moltype):
                 query.minhash = query.minhash.flatten()
         else:
             if orig_query.minhash.track_abundance:
-                notify("ERROR: this search cannot be done on signatures calculated with abundance.")
+                notify(
+                    "ERROR: this search cannot be done on signatures calculated with abundance."
+                )
                 notify("ERROR: please specify --ignore-abundance.")
                 sys.exit(-1)
 
@@ -700,19 +768,18 @@ def _yield_all_sigs(queries, ksize, moltype):
         results = []
         for sr in db.find(search_obj, query):
             match = sr.signature
-            if match.md5sum() != query.md5sum(): # ignore self.
+            if match.md5sum() != query.md5sum():  # ignore self.
                 results.append((orig_query.similarity(match), match))
 
         if results:
-            results.sort(key=lambda x: -x[0])   # reverse sort on similarity
+            results.sort(key=lambda x: -x[0])  # reverse sort on similarity
             best_hit_sim, best_hit_query = results[0]
-            notify(f'for {query}, found: {best_hit_sim:.2f} {best_hit_query}')
+            notify(f"for {query}, found: {best_hit_sim:.2f} {best_hit_query}")
             best_hit_query_name = best_hit_query.name
             if csv_w:
-                csv_w.writerow([loc, query, best_hit_query_name,
-                               best_hit_sim])
+                csv_w.writerow([loc, query, best_hit_query_name, best_hit_sim])
         else:
-            notify(f'for {query}, no match found')
+            notify(f"for {query}, no match found")
 
     if csv_fp:
         csv_fp.close()
@@ -727,43 +794,49 @@ def gather(args):
     pattern_search = sourmash_args.load_include_exclude_db_patterns(args)
 
     # load the query signature & figure out all the things
-    query = sourmash_args.load_query_signature(args.query,
-                                               ksize=args.ksize,
-                                               select_moltype=moltype,
-                                               select_md5=args.md5)
-    notify(f'loaded query: {str(query)[:30]}... (k={query.minhash.ksize}, {sourmash_args.get_moltype(query)})')
+    query = sourmash_args.load_query_signature(
+        args.query, ksize=args.ksize, select_moltype=moltype, select_md5=args.md5
+    )
+    notify(
+        f"loaded query: {str(query)[:30]}... (k={query.minhash.ksize}, {sourmash_args.get_moltype(query)})"
+    )
 
     # verify signature was computed right.
     if not query.minhash.scaled:
-        error('query signature needs to be created with --scaled')
+        error("query signature needs to be created with --scaled")
         sys.exit(-1)
 
     if args.scaled and args.scaled != query.minhash.scaled:
-        notify(f'downsampling query from scaled={query.minhash.scaled} to {int(args.scaled)}')
+        notify(
+            f"downsampling query from scaled={query.minhash.scaled} to {int(args.scaled)}"
+        )
         with query.update() as query:
             query.minhash = query.minhash.downsample(scaled=args.scaled)
 
     # empty?
     if not len(query.minhash):
-        error('no query hashes!? exiting.')
+        error("no query hashes!? exiting.")
         sys.exit(-1)
 
     # set up the search databases
     cache_size = args.cache_size
     if args.cache_size == 0:
         cache_size = None
-    databases = sourmash_args.load_dbs_and_sigs(args.databases, query, False,
-                                                cache_size=cache_size,
-                                                picklist=picklist,
-                                                pattern=pattern_search,
-                                                fail_on_empty_database=args.fail_on_empty_database)
-
-
-    if args.linear:             # force linear traversal?
-        databases = [ LazyLinearIndex(db) for db in databases ]
+    databases = sourmash_args.load_dbs_and_sigs(
+        args.databases,
+        query,
+        False,
+        cache_size=cache_size,
+        picklist=picklist,
+        pattern=pattern_search,
+        fail_on_empty_database=args.fail_on_empty_database,
+    )
+
+    if args.linear:  # force linear traversal?
+        databases = [LazyLinearIndex(db) for db in databases]
 
     size_may_be_inaccurate = False
-    if args.prefetch:           # note: on by default!
+    if args.prefetch:  # note: on by default!
         notify("Starting prefetch sweep across databases.")
         prefetch_query = query.copy()
         if prefetch_query.minhash.track_abundance:
@@ -800,14 +873,21 @@ def gather(args):
             ident_mh.add_many(union_found)
             noident_mh.remove_many(union_found)
 
-                # optionally calculate and output prefetch info to csv
+            # optionally calculate and output prefetch info to csv
             if prefetch_csvout_fp:
                 for found_sig in counter.signatures():
                     # calculate intersection stats and info
-                    prefetch_result = PrefetchResult(prefetch_query, found_sig, cmp_scaled=scaled, 
-                                                     threshold_bp=args.threshold_bp, estimate_ani_ci=args.estimate_ani_ci)
+                    prefetch_result = PrefetchResult(
+                        prefetch_query,
+                        found_sig,
+                        cmp_scaled=scaled,
+                        threshold_bp=args.threshold_bp,
+                        estimate_ani_ci=args.estimate_ani_ci,
+                    )
                     if prefetch_csvout_w is None:
-                        prefetch_csvout_w = prefetch_result.init_dictwriter(prefetch_csvout_fp)
+                        prefetch_csvout_w = prefetch_result.init_dictwriter(
+                            prefetch_csvout_fp
+                        )
                     prefetch_result.write(prefetch_csvout_w)
 
             counters.append(counter)
@@ -817,7 +897,9 @@ def gather(args):
                 prefetch_csvout_fp.flush()
 
         display_bp = format_bp(args.threshold_bp)
-        notify(f"Prefetch found {len(save_prefetch)} signatures with overlap >= {display_bp}.")
+        notify(
+            f"Prefetch found {len(save_prefetch)} signatures with overlap >= {display_bp}."
+        )
         save_prefetch.close()
         if prefetch_csvout_fp:
             prefetch_csvout_fp.close()
@@ -831,20 +913,22 @@ def gather(args):
     notify("Doing gather to generate minimum metagenome cover.")
 
     found = 0
-    weighted_missed = 1
     is_abundance = query.minhash.track_abundance and not args.ignore_abundance
     orig_query_mh = query.minhash
     if not orig_query_mh.size_is_accurate():
         size_may_be_inaccurate = True
-    gather_iter = GatherDatabases(query, counters,
-                                  threshold_bp=args.threshold_bp,
-                                  ignore_abundance=args.ignore_abundance,
-                                  noident_mh=noident_mh,
-                                  ident_mh=ident_mh,
-                                  estimate_ani_ci=args.estimate_ani_ci)
+    gather_iter = GatherDatabases(
+        query,
+        counters,
+        threshold_bp=args.threshold_bp,
+        ignore_abundance=args.ignore_abundance,
+        noident_mh=noident_mh,
+        ident_mh=ident_mh,
+        estimate_ani_ci=args.estimate_ani_ci,
+    )
 
     screen_width = _get_screen_width()
-    sum_f_uniq_found = 0.
+    sum_f_uniq_found = 0.0
     result = None
 
     ### open output handles as needed for (1) saving CSV (2) saving matches
@@ -867,7 +951,7 @@ def gather(args):
             found += 1
             sum_f_uniq_found += result.f_unique_to_query
 
-            if found == 1:                # first result? print header.
+            if found == 1:  # first result? print header.
                 if is_abundance:
                     print_results("")
                     print_results("overlap     p_query p_match avg_abund")
@@ -877,22 +961,30 @@ def gather(args):
                     print_results("overlap     p_query p_match")
                     print_results("---------   ------- -------")
 
-
             # print interim result & save in `found` list for later use
-            pct_query = '{:.1f}%'.format(result.f_unique_weighted*100)
-            pct_genome = '{:.1f}%'.format(result.f_match*100)
+            pct_query = f"{result.f_unique_weighted * 100:.1f}%"
+            pct_genome = f"{result.f_match * 100:.1f}%"
 
             if is_abundance:
                 name = result.match._display_name(screen_width - 41)
-                average_abund ='{:.1f}'.format(result.average_abund)
-                print_results('{:9}   {:>7} {:>7} {:>9}    {}',
-                          format_bp(result.intersect_bp), pct_query, pct_genome,
-                          average_abund, name)
+                average_abund = f"{result.average_abund:.1f}"
+                print_results(
+                    "{:9}   {:>7} {:>7} {:>9}    {}",
+                    format_bp(result.intersect_bp),
+                    pct_query,
+                    pct_genome,
+                    average_abund,
+                    name,
+                )
             else:
                 name = result.match._display_name(screen_width - 31)
-                print_results('{:9}   {:>7} {:>7}    {}',
-                          format_bp(result.intersect_bp), pct_query, pct_genome,
-                          name)
+                print_results(
+                    "{:9}   {:>7} {:>7}    {}",
+                    format_bp(result.intersect_bp),
+                    pct_query,
+                    pct_genome,
+                    name,
+                )
 
             # write out CSV
             if args.output:
@@ -915,13 +1007,15 @@ def gather(args):
     # report on thresholding -
     if gather_iter.query:
         # if still a query, then we failed the threshold.
-        notify(f'found less than {format_bp(args.threshold_bp)} in common. => exiting')
+        notify(f"found less than {format_bp(args.threshold_bp)} in common. => exiting")
 
     # basic reporting:
     if found:
-        print_results(f'\nfound {found} matches total;')
+        print_results(f"\nfound {found} matches total;")
         if found == args.num_results:
-            print_results(f'(truncated gather because --num-results={args.num_results})')
+            print_results(
+                f"(truncated gather because --num-results={args.num_results})"
+            )
     else:
         display_bp = format_bp(args.threshold_bp)
         notify(f"\nNo matches found for --threshold-bp at {display_bp}.")
@@ -930,13 +1024,19 @@ def gather(args):
         if is_abundance and result:
             p_covered = result.sum_weighted_found / result.total_weighted_hashes
             p_covered *= 100
-            print_results(f'the recovered matches hit {p_covered:.1f}% of the abundance-weighted query.')
+            print_results(
+                f"the recovered matches hit {p_covered:.1f}% of the abundance-weighted query."
+            )
 
-        print_results(f'the recovered matches hit {sum_f_uniq_found*100:.1f}% of the query k-mers (unweighted).')
+        print_results(
+            f"the recovered matches hit {sum_f_uniq_found*100:.1f}% of the query k-mers (unweighted)."
+        )
 
-    print_results('')
+    print_results("")
     if gather_iter.scaled != query.minhash.scaled:
-        print_results(f'WARNING: final scaled was {gather_iter.scaled}, vs query scaled of {query.minhash.scaled}')
+        print_results(
+            f"WARNING: final scaled was {gather_iter.scaled}, vs query scaled of {query.minhash.scaled}"
+        )
 
     # save CSV?
     if (found and args.output) or args.create_empty_results:
@@ -947,7 +1047,7 @@ def gather(args):
     if args.output_unassigned:
         remaining_query = gather_iter.query
         if not (remaining_query.minhash or noident_mh):
-            notify('no unassigned hashes to save with --output-unassigned!')
+            notify("no unassigned hashes to save with --output-unassigned!")
         else:
             notify(f"saving unassigned hashes to '{args.output_unassigned}'")
 
@@ -967,7 +1067,9 @@ def gather(args):
         sourmash_args.report_picklist(args, picklist)
 
     if size_may_be_inaccurate:
-        notify("WARNING: size estimation for at least one of these sketches may be inaccurate. ANI values will not be reported for these comparisons.")
+        notify(
+            "WARNING: size estimation for at least one of these sketches may be inaccurate. ANI values will not be reported for these comparisons."
+        )
     # DONE w/gather function.
 
 
@@ -979,11 +1081,11 @@ def multigather(args):
     moltype = sourmash_args.calculate_moltype(args)
 
     if not args.db:
-        error('Error! must specify at least one database with --db')
+        error("Error! must specify at least one database with --db")
         sys.exit(-1)
 
     if not args.query and not args.query_from_file:
-        error('Error! must specify at least one query signature with --query')
+        error("Error! must specify at least one query signature with --query")
         sys.exit(-1)
 
     # flatten --db and --query
@@ -994,36 +1096,49 @@ def multigather(args):
         inp_files.extend(more_files)
 
     # need a query to get ksize, moltype for db loading
-    query = next(iter(sourmash_args.load_file_as_signatures(inp_files[0], ksize=args.ksize, select_moltype=moltype)))
-
-    notify(f'loaded first query: {str(query)[:30]}... (k={query.minhash.ksize}, {sourmash_args.get_moltype(query)})')
-
-    databases = sourmash_args.load_dbs_and_sigs(args.db, query, False,
-                                                fail_on_empty_database=args.fail_on_empty_database)
+    query = next(
+        iter(
+            sourmash_args.load_file_as_signatures(
+                inp_files[0], ksize=args.ksize, select_moltype=moltype
+            )
+        )
+    )
+
+    notify(
+        f"loaded first query: {str(query)[:30]}... (k={query.minhash.ksize}, {sourmash_args.get_moltype(query)})"
+    )
+
+    databases = sourmash_args.load_dbs_and_sigs(
+        args.db, query, False, fail_on_empty_database=args.fail_on_empty_database
+    )
 
     # run gather on all the queries.
-    n=0
+    n = 0
     size_may_be_inaccurate = False
     for queryfile in inp_files:
         # load the query signature(s) & figure out all the things
-        for query in sourmash_args.load_file_as_signatures(queryfile,
-                                                       ksize=args.ksize,
-                                                       select_moltype=moltype):
-            notify(f'loaded query: {str(query)[:30]}... (k={query.minhash.ksize}, {sourmash_args.get_moltype(query)})')
+        for query in sourmash_args.load_file_as_signatures(
+            queryfile, ksize=args.ksize, select_moltype=moltype
+        ):
+            notify(
+                f"loaded query: {str(query)[:30]}... (k={query.minhash.ksize}, {sourmash_args.get_moltype(query)})"
+            )
 
             # verify signature was computed right.
             if not query.minhash.scaled:
-                error('query signature needs to be created with --scaled; skipping')
+                error("query signature needs to be created with --scaled; skipping")
                 continue
 
             if args.scaled and args.scaled != query.minhash.scaled:
-                notify(f'downsampling query from scaled={query.minhash.scaled} to {int(args.scaled)}')
+                notify(
+                    f"downsampling query from scaled={query.minhash.scaled} to {int(args.scaled)}"
+                )
                 with query.update() as query:
                     query.minhash = query.minhash.downsample(scaled=args.scaled)
 
             # empty?
             if not len(query.minhash):
-                error('no query hashes!? skipping to next..')
+                error("no query hashes!? skipping to next..")
                 continue
 
             counters = []
@@ -1050,17 +1165,19 @@ def multigather(args):
                 ident_mh.add_many(union_found)
 
             found = 0
-            weighted_missed = 1
             is_abundance = query.minhash.track_abundance and not args.ignore_abundance
             orig_query_mh = query.minhash
-            gather_iter = GatherDatabases(query, counters,
-                                          threshold_bp=args.threshold_bp,
-                                          ignore_abundance=args.ignore_abundance,
-                                          noident_mh=noident_mh,
-                                          ident_mh=ident_mh)
+            gather_iter = GatherDatabases(
+                query,
+                counters,
+                threshold_bp=args.threshold_bp,
+                ignore_abundance=args.ignore_abundance,
+                noident_mh=noident_mh,
+                ident_mh=ident_mh,
+            )
 
             screen_width = _get_screen_width()
-            sum_f_uniq_found = 0.
+            sum_f_uniq_found = 0.0
             result = None
 
             query_filename = query.filename
@@ -1071,9 +1188,9 @@ def multigather(args):
             output_base = os.path.basename(query_filename)
             if args.output_dir:
                 output_base = os.path.join(args.output_dir, output_base)
-            output_csv = output_base + '.csv'
+            output_csv = output_base + ".csv"
 
-            output_matches = output_base + '.matches.sig'
+            output_matches = output_base + ".matches.sig"
             save_sig_obj = SaveSignaturesToLocation(output_matches)
             save_sig = save_sig_obj.__enter__()
             notify(f"saving all matching signatures to '{output_matches}'")
@@ -1087,7 +1204,7 @@ def multigather(args):
             for result in gather_iter:
                 found += 1
                 sum_f_uniq_found += result.f_unique_to_query
-                if found == 1:                # first result? print header.
+                if found == 1:  # first result? print header.
                     if is_abundance:
                         print_results("")
                         print_results("overlap     p_query p_match avg_abund")
@@ -1097,22 +1214,30 @@ def multigather(args):
                         print_results("overlap     p_query p_match")
                         print_results("---------   ------- -------")
 
-
                 # print interim result & save in a list for later use
-                pct_query = '{:.1f}%'.format(result.f_unique_weighted*100)
-                pct_genome = '{:.1f}%'.format(result.f_match*100)
+                pct_query = f"{result.f_unique_weighted * 100:.1f}%"
+                pct_genome = f"{result.f_match * 100:.1f}%"
 
                 if is_abundance:
                     name = result.match._display_name(screen_width - 41)
-                    average_abund ='{:.1f}'.format(result.average_abund)
-                    print_results('{:9}   {:>7} {:>7} {:>9}    {}',
-                              format_bp(result.intersect_bp), pct_query, pct_genome,
-                              average_abund, name)
+                    average_abund = f"{result.average_abund:.1f}"
+                    print_results(
+                        "{:9}   {:>7} {:>7} {:>9}    {}",
+                        format_bp(result.intersect_bp),
+                        pct_query,
+                        pct_genome,
+                        average_abund,
+                        name,
+                    )
                 else:
                     name = result.match._display_name(screen_width - 31)
-                    print_results('{:9}   {:>7} {:>7}    {}',
-                              format_bp(result.intersect_bp), pct_query, pct_genome,
-                              name)
+                    print_results(
+                        "{:9}   {:>7} {:>7}    {}",
+                        format_bp(result.intersect_bp),
+                        pct_query,
+                        pct_genome,
+                        name,
+                    )
 
                 ## @CTB
                 if csv_writer is None:
@@ -1128,10 +1253,12 @@ def multigather(args):
             # report on thresholding -
             if gather_iter.query.minhash:
                 # if still a query, then we failed the threshold.
-                notify(f'found less than {format_bp(args.threshold_bp)} in common. => exiting')
+                notify(
+                    f"found less than {format_bp(args.threshold_bp)} in common. => exiting"
+                )
 
             # basic reporting
-            print_results('\nfound {} matches total;', found)
+            print_results("\nfound {} matches total;", found)
 
             # close saving etc.
             save_sig_obj.close()
@@ -1143,17 +1270,21 @@ def multigather(args):
             if is_abundance and result:
                 p_covered = result.sum_weighted_found / result.total_weighted_hashes
                 p_covered *= 100
-                print_results(f'the recovered matches hit {p_covered:.1f}% of the abundance-weighted query.')
+                print_results(
+                    f"the recovered matches hit {p_covered:.1f}% of the abundance-weighted query."
+                )
 
-            print_results(f'the recovered matches hit {sum_f_uniq_found*100:.1f}% of the query k-mers (unweighted).')
-            print_results('')
+            print_results(
+                f"the recovered matches hit {sum_f_uniq_found*100:.1f}% of the query k-mers (unweighted)."
+            )
+            print_results("")
 
             if found == 0:
-                notify('nothing found... skipping.')
+                notify("nothing found... skipping.")
                 continue
 
-            output_unassigned = output_base + '.unassigned.sig'
-            with open(output_unassigned, 'wt') as fp:
+            output_unassigned = output_base + ".unassigned.sig"
+            with open(output_unassigned, "w"):
                 remaining_query = gather_iter.query
                 if noident_mh:
                     remaining_mh = remaining_query.minhash.to_mutable()
@@ -1165,9 +1296,9 @@ def multigather(args):
                     remaining_query.minhash = abund_query_mh
 
                 if found == 0:
-                    notify('nothing found - entire query signature unassigned.')
+                    notify("nothing found - entire query signature unassigned.")
                 elif not remaining_query:
-                    notify('no unassigned hashes! not saving.')
+                    notify("no unassigned hashes! not saving.")
                 else:
                     notify(f'saving unassigned hashes to "{output_unassigned}"')
 
@@ -1177,9 +1308,11 @@ def multigather(args):
             n += 1
 
         # fini, next query!
-    notify(f'\nconducted gather searches on {n} signatures')
+    notify(f"\nconducted gather searches on {n} signatures")
     if size_may_be_inaccurate:
-        notify("WARNING: size estimation for at least one of these sketches may be inaccurate. ANI values will not be reported for these comparisons.")
+        notify(
+            "WARNING: size estimation for at least one of these sketches may be inaccurate. ANI values will not be reported for these comparisons."
+        )
 
 
 def watch(args):
@@ -1187,7 +1320,7 @@ def watch(args):
     set_quiet(args.quiet)
 
     if args.input_is_protein and args.dna:
-        notify('WARNING: input is protein, turning off nucleotide hashing.')
+        notify("WARNING: input is protein, turning off nucleotide hashing.")
         args.dna = False
         args.protein = True
 
@@ -1195,22 +1328,22 @@ def watch(args):
         notify('ERROR: cannot use "watch" with both nucleotide and protein.')
 
     if args.dna:
-        moltype = 'DNA'
+        moltype = "DNA"
         is_protein = False
         dayhoff = False
         hp = False
     elif args.protein:
-        moltype = 'protein'
+        moltype = "protein"
         is_protein = True
         dayhoff = False
         hp = False
     elif args.dayhoff:
-        moltype = 'dayhoff'
+        moltype = "dayhoff"
         is_protein = True
         dayhoff = True
         hp = False
     else:
-        moltype = 'hp'
+        moltype = "hp"
         is_protein = True
         dayhoff = False
         hp = True
@@ -1224,23 +1357,27 @@ def watch(args):
         tree_mh = leaf.data.minhash
         ksize = tree_mh.ksize
 
-    E = MinHash(ksize=ksize, n=args.num_hashes, is_protein=is_protein, dayhoff=dayhoff, hp=hp)
+    E = MinHash(
+        ksize=ksize, n=args.num_hashes, is_protein=is_protein, dayhoff=dayhoff, hp=hp
+    )
 
-    notify(f'Computing signature for k={ksize}, {moltype} from stdin')
+    notify(f"Computing signature for k={ksize}, {moltype} from stdin")
 
     def do_search():
         results = []
-        streamsig = sig.SourmashSignature(E, filename='stdin', name=args.name)
-        for similarity, match, _ in tree.search(streamsig,
-                                                threshold=args.threshold,
-                                                best_only=True,
-                                                ignore_abundance=True,
-                                                do_containment=False):
+        streamsig = sig.SourmashSignature(E, filename="stdin", name=args.name)
+        for similarity, match, _ in tree.search(
+            streamsig,
+            threshold=args.threshold,
+            best_only=True,
+            ignore_abundance=True,
+            do_containment=False,
+        ):
             results.append((similarity, match))
 
         return results
 
-    notify('reading sequences from stdin')
+    notify("reading sequences from stdin")
     watermark = WATERMARK_SIZE
 
     # iterate over input records
@@ -1249,7 +1386,7 @@ def do_search():
         for n, record in enumerate(screed_iter):
             # at each watermark, print status & check cardinality
             if n >= watermark:
-                notify(f'\r... read {n} sequences', end='')
+                notify(f"\r... read {n} sequences", end="")
                 watermark += WATERMARK_SIZE
 
                 if do_search():
@@ -1262,16 +1399,15 @@ def do_search():
 
     results = do_search()
     if not results:
-        notify(f'... read {n} sequences, no matches found.')
+        notify(f"... read {n} sequences, no matches found.")
     else:
-        results.sort(key=lambda x: -x[0])   # take best
+        results.sort(key=lambda x: -x[0])  # take best
         similarity, found_sig = results[0]
-        print_results('FOUND: {}, at {:.3f}', found_sig,
-               similarity)
+        print_results("FOUND: {}, at {:.3f}", found_sig, similarity)
 
     if args.output:
         notify(f"saving signature to '{args.output}'")
-        streamsig = sig.SourmashSignature(E, filename='stdin', name=args.name)
+        streamsig = sig.SourmashSignature(E, filename="stdin", name=args.name)
         with SaveSignaturesToLocation(args.output) as save_sig:
             save_sig.add(streamsig)
 
@@ -1296,9 +1432,15 @@ def prefetch(args):
         notify("ERROR: no databases or signatures to search!?")
         sys.exit(-1)
 
-    if not (args.save_unmatched_hashes or args.save_matching_hashes or
-            args.save_matches or args.output):
-        notify("WARNING: no output(s) specified! Nothing will be saved from this prefetch!")
+    if not (
+        args.save_unmatched_hashes
+        or args.save_matching_hashes
+        or args.save_matches
+        or args.output
+    ):
+        notify(
+            "WARNING: no output(s) specified! Nothing will be saved from this prefetch!"
+        )
 
     # figure out what k-mer size and molecule type we're looking for here
     ksize = args.ksize
@@ -1307,15 +1449,16 @@ def prefetch(args):
     pattern_search = sourmash_args.load_include_exclude_db_patterns(args)
 
     # load the query signature & figure out all the things
-    query = sourmash_args.load_query_signature(args.query,
-                                               ksize=args.ksize,
-                                               select_moltype=moltype,
-                                               select_md5=args.md5)
-    notify(f'loaded query: {str(query)[:30]}... (k={query.minhash.ksize}, {sourmash_args.get_moltype(query)})')
+    query = sourmash_args.load_query_signature(
+        args.query, ksize=args.ksize, select_moltype=moltype, select_md5=args.md5
+    )
+    notify(
+        f"loaded query: {str(query)[:30]}... (k={query.minhash.ksize}, {sourmash_args.get_moltype(query)})"
+    )
 
     # verify signature was computed with scaled.
     if not query.minhash.scaled:
-        error('query signature needs to be created with --scaled')
+        error("query signature needs to be created with --scaled")
         sys.exit(-1)
 
     # if with track_abund, flatten me
@@ -1325,15 +1468,19 @@ def prefetch(args):
         query_mh = query_mh.flatten()
 
     if args.scaled and args.scaled != query_mh.scaled:
-        notify(f'downsampling query from scaled={query_mh.scaled} to {int(args.scaled)}')
+        notify(
+            f"downsampling query from scaled={query_mh.scaled} to {int(args.scaled)}"
+        )
         query_mh = query_mh.downsample(scaled=args.scaled)
 
-    notify(f"query sketch has scaled={query_mh.scaled}; will be dynamically downsampled as needed.")
+    notify(
+        f"query sketch has scaled={query_mh.scaled}; will be dynamically downsampled as needed."
+    )
     common_scaled = query_mh.scaled
 
     # empty?
     if not len(query_mh):
-        error('no query hashes!? exiting.')
+        error("no query hashes!? exiting.")
         sys.exit(-1)
 
     with query.update() as query:
@@ -1357,12 +1504,12 @@ def prefetch(args):
     ident_mh = query_mh.copy_and_clear()
     noident_mh = query_mh.to_mutable()
 
-    did_a_search = False        # track whether we did _any_ search at all!
+    did_a_search = False  # track whether we did _any_ search at all!
     size_may_be_inaccurate = False
     total_signatures_loaded = 0
     sum_signatures_after_select = 0
     for dbfilename in args.databases:
-        notify(f"loading signatures from '{dbfilename}'", end='\r')
+        notify(f"loading signatures from '{dbfilename}'", end="\r")
 
         db = sourmash_args.load_file_as_index(dbfilename)
         total_signatures_loaded += len(db)
@@ -1371,24 +1518,25 @@ def prefetch(args):
         if args.linear:
             db = LazyLinearIndex(db)
 
-        db = db.select(ksize=ksize, moltype=moltype,
-                       containment=True, scaled=True)
+        db = db.select(ksize=ksize, moltype=moltype, containment=True, scaled=True)
 
         sum_signatures_after_select += len(db)
 
-        db = sourmash_args.apply_picklist_and_pattern(db, picklist,
-                                                      pattern_search)
+        db = sourmash_args.apply_picklist_and_pattern(db, picklist, pattern_search)
 
         if not db:
             notify(f"...no compatible signatures in '{dbfilename}'; skipping")
             continue
 
-        for result in prefetch_database(query, db, args.threshold_bp, estimate_ani_ci= args.estimate_ani_ci):
+        for result in prefetch_database(
+            query, db, args.threshold_bp, estimate_ani_ci=args.estimate_ani_ci
+        ):
             match = result.match
 
             # ensure we're all on the same page wrt scaled resolution:
-            common_scaled = max(match.minhash.scaled, query.minhash.scaled,
-                                common_scaled)
+            common_scaled = max(
+                match.minhash.scaled, query.minhash.scaled, common_scaled
+            )
 
             query_mh = query.minhash.downsample(scaled=common_scaled)
             match_mh = match.minhash.downsample(scaled=common_scaled)
@@ -1412,8 +1560,10 @@ def prefetch(args):
             matches_out.add(match)
 
             if matches_out.count % 10 == 0:
-                notify(f"total of {matches_out.count} matching signatures so far.",
-                       end="\r")
+                notify(
+                    f"total of {matches_out.count} matching signatures so far.",
+                    end="\r",
+                )
 
             # keep track of inaccurate size estimation
             if not size_may_be_inaccurate and result.size_may_be_inaccurate:
@@ -1429,11 +1579,17 @@ def prefetch(args):
         del db
 
     notify("--")
-    notify(f"loaded {total_signatures_loaded} total signatures from {len(args.databases)} locations.")
-    notify(f"after selecting signatures compatible with search, {sum_signatures_after_select} remain.")
+    notify(
+        f"loaded {total_signatures_loaded} total signatures from {len(args.databases)} locations."
+    )
+    notify(
+        f"after selecting signatures compatible with search, {sum_signatures_after_select} remain."
+    )
 
     if not did_a_search:
-        notify("ERROR in prefetch: after picklists and patterns, no signatures to search!?")
+        notify(
+            "ERROR in prefetch: after picklists and patterns, no signatures to search!?"
+        )
         sys.exit(-1)
 
     notify("--")
@@ -1445,7 +1601,9 @@ def prefetch(args):
         csvout_fp.close()
 
     assert len(query_mh) == len(ident_mh) + len(noident_mh)
-    notify(f"of {len(query_mh)} distinct query hashes, {len(ident_mh)} were found in matches above threshold.")
+    notify(
+        f"of {len(query_mh)} distinct query hashes, {len(ident_mh)} were found in matches above threshold."
+    )
     notify(f"a total of {len(noident_mh)} query hashes remain unmatched.")
     notify(f"final scaled value (max across query and all matches) is {common_scaled}")
 
@@ -1453,7 +1611,7 @@ def prefetch(args):
         filename = args.save_matching_hashes
         notify(f"saving {len(ident_mh)} matched hashes to '{filename}'")
 
-        sig_name = ''
+        sig_name = ""
         if query.name:
             sig_name = f"{query.name}-known"
 
@@ -1468,7 +1626,7 @@ def prefetch(args):
     if args.save_unmatched_hashes:
         filename = args.save_unmatched_hashes
 
-        sig_name = ''
+        sig_name = ""
         if query.name:
             sig_name = f"{query.name}-unknown"
 
@@ -1486,6 +1644,8 @@ def prefetch(args):
         sourmash_args.report_picklist(args, picklist)
 
     if size_may_be_inaccurate:
-        notify("WARNING: size estimation for at least one of these sketches may be inaccurate. ANI values will not be reported for these comparisons.")
+        notify(
+            "WARNING: size estimation for at least one of these sketches may be inaccurate. ANI values will not be reported for these comparisons."
+        )
 
     return 0
diff --git a/src/sourmash/compare.py b/src/sourmash/compare.py
index 35b8639cb5..85928dc8a4 100644
--- a/src/sourmash/compare.py
+++ b/src/sourmash/compare.py
@@ -39,22 +39,28 @@ def compare_serial(siglist, ignore_abundance, *, downsample=False, return_ani=Fa
 
     for i, j in iterator:
         if return_ani:
-            ani_result = siglist[i].jaccard_ani(siglist[j],downsample=downsample)
+            ani_result = siglist[i].jaccard_ani(siglist[j], downsample=downsample)
             if not potential_false_negatives and ani_result.p_exceeds_threshold:
                 potential_false_negatives = True
             if not jaccard_ani_untrustworthy and ani_result.je_exceeds_threshold:
                 jaccard_ani_untrustworthy = True
             ani = ani_result.ani
-            if ani == None:
+            if ani is None:
                 ani = 0.0
             similarities[i][j] = similarities[j][i] = ani
         else:
-            similarities[i][j] = similarities[j][i] = siglist[i].similarity(siglist[j], ignore_abundance=ignore_abundance, downsample=downsample)
+            similarities[i][j] = similarities[j][i] = siglist[i].similarity(
+                siglist[j], ignore_abundance=ignore_abundance, downsample=downsample
+            )
 
     if jaccard_ani_untrustworthy:
-        notify("WARNING: Jaccard estimation for at least one of these comparisons is likely inaccurate. Could not estimate ANI for these comparisons.")
+        notify(
+            "WARNING: Jaccard estimation for at least one of these comparisons is likely inaccurate. Could not estimate ANI for these comparisons."
+        )
     if potential_false_negatives:
-        notify("WARNING: Some of these sketches may have no hashes in common based on chance alone (false negatives). Consider decreasing your scaled value to prevent this.")
+        notify(
+            "WARNING: Some of these sketches may have no hashes in common based on chance alone (false negatives). Consider decreasing your scaled value to prevent this."
+        )
     return similarities
 
 
@@ -78,19 +84,24 @@ def compare_serial_containment(siglist, *, downsample=False, return_ani=False):
             if i == j:
                 containments[i][j] = 1
             elif return_ani:
-                ani_result = siglist[j].containment_ani(siglist[i], downsample=downsample)
+                ani_result = siglist[j].containment_ani(
+                    siglist[i], downsample=downsample
+                )
                 ani = ani_result.ani
                 if not potential_false_negatives and ani_result.p_exceeds_threshold:
                     potential_false_negatives = True
-                if ani == None:
+                if ani is None:
                     ani = 0.0
                 containments[i][j] = ani
             else:
-                containments[i][j] = siglist[j].contained_by(siglist[i],
-                                                         downsample=downsample)
+                containments[i][j] = siglist[j].contained_by(
+                    siglist[i], downsample=downsample
+                )
 
     if potential_false_negatives:
-        notify("WARNING: Some of these sketches may have no hashes in common based on chance alone (false negatives). Consider decreasing your scaled value to prevent this.")
+        notify(
+            "WARNING: Some of these sketches may have no hashes in common based on chance alone (false negatives). Consider decreasing your scaled value to prevent this."
+        )
 
     return containments
 
@@ -115,18 +126,23 @@ def compare_serial_max_containment(siglist, *, downsample=False, return_ani=Fals
 
     for i, j in iterator:
         if return_ani:
-            ani_result = siglist[j].max_containment_ani(siglist[i], downsample=downsample)
+            ani_result = siglist[j].max_containment_ani(
+                siglist[i], downsample=downsample
+            )
             ani = ani_result.ani
             if not potential_false_negatives and ani_result.p_exceeds_threshold:
                 potential_false_negatives = True
-            if ani == None:
+            if ani is None:
                 ani = 0.0
             containments[i][j] = containments[j][i] = ani
         else:
-            containments[i][j] = containments[j][i] = siglist[j].max_containment(siglist[i],
-                                                        downsample=downsample)
+            containments[i][j] = containments[j][i] = siglist[j].max_containment(
+                siglist[i], downsample=downsample
+            )
     if potential_false_negatives:
-        notify("WARNING: Some of these sketches may have no hashes in common based on chance alone (false negatives). Consider decreasing your scaled value to prevent this.")
+        notify(
+            "WARNING: Some of these sketches may have no hashes in common based on chance alone (false negatives). Consider decreasing your scaled value to prevent this."
+        )
 
     return containments
 
@@ -153,17 +169,20 @@ def compare_serial_avg_containment(siglist, *, downsample=False, return_ani=Fals
         if return_ani:
             cmp = FracMinHashComparison(siglist[j].minhash, siglist[i].minhash)
             ani = cmp.avg_containment_ani
-            if ani == None:
+            if ani is None:
                 ani = 0.0
             if not potential_false_negatives and cmp.potential_false_negative:
                 potential_false_negatives = True
             containments[i][j] = containments[j][i] = ani
         else:
-            containments[i][j] = containments[j][i] = siglist[j].avg_containment(siglist[i],
-                                                        downsample=downsample)
+            containments[i][j] = containments[j][i] = siglist[j].avg_containment(
+                siglist[i], downsample=downsample
+            )
 
     if potential_false_negatives:
-        notify("WARNING: Some of these sketches may have no hashes in common based on chance alone (false negatives). Consider decreasing your scaled value to prevent this.")
+        notify(
+            "WARNING: Some of these sketches may have no hashes in common based on chance alone (false negatives). Consider decreasing your scaled value to prevent this."
+        )
 
     return containments
 
@@ -174,16 +193,18 @@ def similarity_args_unpack(args, ignore_abundance, *, downsample, return_ani=Fal
     sig1, sig2 = args
     if return_ani:
         ani = sig1.jaccard_ani(sig2, downsample=downsample).ani
-        if ani == None:
+        if ani is None:
             ani = 0.0
         return ani
     else:
-        return sig1.similarity(sig2,
-                           ignore_abundance=ignore_abundance,
-                           downsample=downsample)
+        return sig1.similarity(
+            sig2, ignore_abundance=ignore_abundance, downsample=downsample
+        )
 
 
-def get_similarities_at_index(index, ignore_abundance, downsample, siglist, *, return_ani=False):
+def get_similarities_at_index(
+    index, ignore_abundance, downsample, siglist, *, return_ani=False
+):
     """Returns similarities of all the combinations of signature at index in
     the siglist with the rest of the indices starting at index + 1. Doesn't
     redundantly calculate signatures with all the other indices prior to
@@ -202,18 +223,24 @@ def get_similarities_at_index(index, ignore_abundance, downsample, siglist, *, r
         with rest of the signatures from index+1
     """
     startt = time.time()
-    sig_iterator = itertools.product([siglist[index]], siglist[index + 1:])
-    func = partial(similarity_args_unpack,
-                   ignore_abundance=ignore_abundance,
-                   downsample=downsample,
-                   return_ani=return_ani)
+    sig_iterator = itertools.product([siglist[index]], siglist[index + 1 :])
+    func = partial(
+        similarity_args_unpack,
+        ignore_abundance=ignore_abundance,
+        downsample=downsample,
+        return_ani=return_ani,
+    )
     similarity_list = list(map(func, sig_iterator))
     notify(
-        f"comparison for index {index} done in {time.time() - startt:.5f} seconds", end='\r')
+        f"comparison for index {index} done in {time.time() - startt:.5f} seconds",
+        end="\r",
+    )
     return similarity_list
 
 
-def compare_parallel(siglist, ignore_abundance, downsample, n_jobs, *, return_ani=False):
+def compare_parallel(
+    siglist, ignore_abundance, downsample, n_jobs, *, return_ani=False
+):
     """Compare all combinations of signatures and return a matrix
     of similarities. Processes combinations parallely on number of processes
     given by n_jobs
@@ -256,7 +283,8 @@ def compare_parallel(siglist, ignore_abundance, downsample, n_jobs, *, return_an
         siglist=siglist,
         ignore_abundance=ignore_abundance,
         downsample=downsample,
-        return_ani=return_ani)
+        return_ani=return_ani,
+    )
     notify("Created similarity func")
 
     # Initialize multiprocess.pool
@@ -279,19 +307,27 @@ def compare_parallel(siglist, ignore_abundance, downsample, n_jobs, *, return_an
         startt = time.time()
         col_idx = index + 1
         for idx_condensed, item in enumerate(l):
-            memmap_similarities[index, col_idx + idx_condensed] = memmap_similarities[idx_condensed + col_idx, index] = item
+            memmap_similarities[index, col_idx + idx_condensed] = memmap_similarities[
+                idx_condensed + col_idx, index
+            ] = item
         notify(
-            f"Setting similarities matrix for index {index} done in {time.time() - startt:.5f} seconds", end='\r')
+            f"Setting similarities matrix for index {index} done in {time.time() - startt:.5f} seconds",
+            end="\r",
+        )
     notify("Setting similarities completed")
 
     pool.close()
     pool.join()
 
-    notify(f"Time taken to compare all pairs parallely is {time.time() - start_initial:.5f} seconds ")
+    notify(
+        f"Time taken to compare all pairs parallely is {time.time() - start_initial:.5f} seconds "
+    )
     return np.memmap(filename, dtype=np.float64, shape=(length_siglist, length_siglist))
 
 
-def compare_all_pairs(siglist, ignore_abundance, downsample=False, n_jobs=None, return_ani=False):
+def compare_all_pairs(
+    siglist, ignore_abundance, downsample=False, n_jobs=None, return_ani=False
+):
     """Compare all combinations of signatures and return a matrix
     of similarities. Processes combinations either serially or
     based on parallely on number of processes given by n_jobs
@@ -309,7 +345,14 @@ def compare_all_pairs(siglist, ignore_abundance, downsample=False, n_jobs=None,
     :return: np.array similarity matrix
     """
     if n_jobs is None or n_jobs == 1:
-        similarities = compare_serial(siglist, ignore_abundance=ignore_abundance, downsample=downsample, return_ani=return_ani)
+        similarities = compare_serial(
+            siglist,
+            ignore_abundance=ignore_abundance,
+            downsample=downsample,
+            return_ani=return_ani,
+        )
     else:
-        similarities = compare_parallel(siglist, ignore_abundance, downsample, n_jobs, return_ani=return_ani)
+        similarities = compare_parallel(
+            siglist, ignore_abundance, downsample, n_jobs, return_ani=return_ani
+        )
     return similarities
diff --git a/src/sourmash/distance_utils.py b/src/sourmash/distance_utils.py
index 66feb6259c..9106bd8812 100644
--- a/src/sourmash/distance_utils.py
+++ b/src/sourmash/distance_utils.py
@@ -12,12 +12,14 @@
 
 from .logging import notify
 
+
 def check_distance(dist):
     if not 0 <= dist <= 1:
         raise ValueError(f"Error: distance value {dist :.4f} is not between 0 and 1!")
     else:
         return dist
 
+
 def check_prob_threshold(val, threshold=1e-3):
     """
     Check likelihood of no shared hashes based on chance alone (false neg).
@@ -29,15 +31,18 @@ def check_prob_threshold(val, threshold=1e-3):
         exceeds_threshold = True
     return val, exceeds_threshold
 
+
 def check_jaccard_error(val, threshold=1e-4):
     exceeds_threshold = False
     if threshold is not None and val > threshold:
         exceeds_threshold = True
     return val, exceeds_threshold
 
+
 @dataclass
 class ANIResult:
     """Base class for distance/ANI from k-mer containment."""
+
     dist: float
     p_nothing_in_common: float
     p_threshold: float = 1e-3
@@ -47,7 +52,9 @@ class ANIResult:
     def check_dist_and_p_threshold(self):
         # check values
         self.dist = check_distance(self.dist)
-        self.p_nothing_in_common, self.p_exceeds_threshold = check_prob_threshold(self.p_nothing_in_common, self.p_threshold)
+        self.p_nothing_in_common, self.p_exceeds_threshold = check_prob_threshold(
+            self.p_nothing_in_common, self.p_threshold
+        )
 
     def __post_init__(self):
         self.check_dist_and_p_threshold()
@@ -62,6 +69,7 @@ def ani(self):
 @dataclass
 class jaccardANIResult(ANIResult):
     """Class for distance/ANI from jaccard (includes jaccard_error)."""
+
     jaccard_error: float = None
     je_threshold: float = 1e-4
 
@@ -70,7 +78,9 @@ def __post_init__(self):
         self.check_dist_and_p_threshold()
         # check jaccard error
         if self.jaccard_error is not None:
-            self.jaccard_error, self.je_exceeds_threshold = check_jaccard_error(self.jaccard_error, self.je_threshold)
+            self.jaccard_error, self.je_exceeds_threshold = check_jaccard_error(
+                self.jaccard_error, self.je_threshold
+            )
         else:
             raise ValueError("Error: jaccard_error cannot be None.")
 
@@ -89,6 +99,7 @@ class ciANIResult(ANIResult):
 
     Set CI defaults to None, just in case CI can't be estimated for given sample.
     """
+
     dist_low: float = None
     dist_high: float = None
 
@@ -128,7 +139,7 @@ def var_n_mutated(L, k, r1, *, q=None):
     if r1 == 0:
         return 0.0
     r1 = float(r1)
-    if q == None:  # we assume that if q is provided, it is correct for r1
+    if q is None:  # we assume that if q is provided, it is correct for r1
         q = r1_to_q(k, r1)
     varN = (
         L * (1 - q) * (q * (2 * k + (2 / r1) - 1) - 2 * k)
@@ -158,7 +169,9 @@ def handle_seqlen_nkmers(ksize, *, sequence_len_bp=None, n_unique_kmers=None):
         return n_unique_kmers
     elif sequence_len_bp is None:
         # both are None, raise ValueError
-        raise ValueError("Error: distance estimation requires input of either 'sequence_len_bp' or 'n_unique_kmers'")
+        raise ValueError(
+            "Error: distance estimation requires input of either 'sequence_len_bp' or 'n_unique_kmers'"
+        )
     else:
         n_unique_kmers = sequence_len_bp - (ksize - 1)
         return n_unique_kmers
@@ -175,7 +188,7 @@ def set_size_chernoff(set_size, scaled, *, relative_error=0.05):
     @param relative_error: the desired relative error (defaults to 5%)
     @return: float (the upper bound probability)
     """
-    upper_bound = 1 - 2 * np.exp(- relative_error**2*set_size/(scaled * 3))
+    upper_bound = 1 - 2 * np.exp(-(relative_error**2) * set_size / (scaled * 3))
     return upper_bound
 
 
@@ -190,14 +203,17 @@ def set_size_exact_prob(set_size, scaled, *, relative_error=0.05):
     @return: float (the upper bound probability)
     """
     # Need to check if the edge case is an integer or not. If not, don't include it in the equation
-    pmf_arg = -set_size/scaled * (relative_error - 1)
+    pmf_arg = -set_size / scaled * (relative_error - 1)
     if pmf_arg == int(pmf_arg):
-        prob = binom.cdf(set_size/scaled * (relative_error + 1), set_size, 1/scaled) - \
-               binom.cdf(-set_size/scaled * (relative_error - 1), set_size, 1/scaled) + \
-               binom.pmf(-set_size/scaled * (relative_error - 1), set_size, 1/scaled)
+        prob = (
+            binom.cdf(set_size / scaled * (relative_error + 1), set_size, 1 / scaled)
+            - binom.cdf(-set_size / scaled * (relative_error - 1), set_size, 1 / scaled)
+            + binom.pmf(-set_size / scaled * (relative_error - 1), set_size, 1 / scaled)
+        )
     else:
-        prob = binom.cdf(set_size / scaled * (relative_error + 1), set_size, 1 / scaled) - \
-               binom.cdf(-set_size / scaled * (relative_error - 1), set_size, 1 / scaled)
+        prob = binom.cdf(
+            set_size / scaled * (relative_error + 1), set_size, 1 / scaled
+        ) - binom.cdf(-set_size / scaled * (relative_error - 1), set_size, 1 / scaled)
     return prob
 
 
@@ -225,7 +241,9 @@ def get_exp_probability_nothing_common(
     Arguments: n_unique_kmers, ksize, mutation_rate, scaled
     Returns: float - expected likelihood that nothing is common between sketches
     """
-    n_unique_kmers = handle_seqlen_nkmers(ksize, sequence_len_bp=sequence_len_bp,n_unique_kmers=n_unique_kmers)
+    n_unique_kmers = handle_seqlen_nkmers(
+        ksize, sequence_len_bp=sequence_len_bp, n_unique_kmers=n_unique_kmers
+    )
     f_scaled = 1.0 / float(scaled)
     if mutation_rate == 1.0:
         return 1.0
@@ -251,12 +269,14 @@ def containment_to_distance(
     Containment --> distance CI (one step)
     """
     sol1, sol2, point_estimate = None, None, None
-    n_unique_kmers = handle_seqlen_nkmers(ksize, sequence_len_bp = sequence_len_bp, n_unique_kmers=n_unique_kmers)
+    n_unique_kmers = handle_seqlen_nkmers(
+        ksize, sequence_len_bp=sequence_len_bp, n_unique_kmers=n_unique_kmers
+    )
     if containment == 0:
-        #point_estimate = 1.0
+        # point_estimate = 1.0
         point_estimate = sol1 = sol2 = 1.0
     elif containment == 1:
-        #point_estimate = 0.0
+        # point_estimate = 0.0
         point_estimate = sol1 = sol2 = 0.0
     else:
         point_estimate = 1.0 - containment ** (1.0 / ksize)
@@ -273,25 +293,33 @@ def containment_to_distance(
                 term_1 = (1.0 - f_scaled) / (
                     f_scaled * n_unique_kmers**3 * bias_factor**2
                 )
-                term_2 = lambda pest: n_unique_kmers * exp_n_mutated(
-                    n_unique_kmers, ksize, pest
-                ) - exp_n_mutated_squared(n_unique_kmers, ksize, pest)
-                term_3 = lambda pest: var_n_mutated(n_unique_kmers, ksize, pest) / (
-                    n_unique_kmers**2
-                )
 
-                var_direct = lambda pest: term_1 * term_2(pest) + term_3(pest)
-
-                f1 = (
-                    lambda pest: (1 - pest) ** ksize
-                    + z_alpha * np.sqrt(var_direct(pest))
-                    - containment
-                )
-                f2 = (
-                    lambda pest: (1 - pest) ** ksize
-                    - z_alpha * np.sqrt(var_direct(pest))
-                    - containment
-                )
+                def term_2(pest):
+                    return n_unique_kmers * exp_n_mutated(
+                        n_unique_kmers, ksize, pest
+                    ) - exp_n_mutated_squared(n_unique_kmers, ksize, pest)
+
+                def term_3(pest):
+                    return (
+                        var_n_mutated(n_unique_kmers, ksize, pest) / n_unique_kmers**2
+                    )
+
+                def var_direct(pest):
+                    return term_1 * term_2(pest) + term_3(pest)
+
+                def f1(pest):
+                    return (
+                        (1 - pest) ** ksize
+                        + z_alpha * np.sqrt(var_direct(pest))
+                        - containment
+                    )
+
+                def f2(pest):
+                    return (
+                        (1 - pest) ** ksize
+                        - z_alpha * np.sqrt(var_direct(pest))
+                        - containment
+                    )
 
                 sol1 = brentq(f1, 0.0000001, 0.9999999)
                 sol2 = brentq(f2, 0.0000001, 0.9999999)
@@ -308,7 +336,13 @@ def containment_to_distance(
     prob_nothing_in_common = get_exp_probability_nothing_common(
         point_estimate, ksize, scaled, n_unique_kmers=n_unique_kmers
     )
-    return ciANIResult(point_estimate, prob_nothing_in_common, dist_low=sol2, dist_high=sol1, p_threshold=prob_threshold)
+    return ciANIResult(
+        point_estimate,
+        prob_nothing_in_common,
+        dist_low=sol2,
+        dist_high=sol1,
+        p_threshold=prob_threshold,
+    )
 
 
 def jaccard_to_distance(
@@ -341,7 +375,9 @@ def jaccard_to_distance(
     useful for determining whether scaled is sufficient for these comparisons.
     """
     error_lower_bound = None
-    n_unique_kmers = handle_seqlen_nkmers(ksize, sequence_len_bp=sequence_len_bp, n_unique_kmers=n_unique_kmers)
+    n_unique_kmers = handle_seqlen_nkmers(
+        ksize, sequence_len_bp=sequence_len_bp, n_unique_kmers=n_unique_kmers
+    )
     if jaccard == 0:
         point_estimate = 1.0
         error_lower_bound = 0.0
@@ -361,4 +397,10 @@ def jaccard_to_distance(
     prob_nothing_in_common = get_exp_probability_nothing_common(
         point_estimate, ksize, scaled, n_unique_kmers=n_unique_kmers
     )
-    return jaccardANIResult(point_estimate, prob_nothing_in_common, jaccard_error=error_lower_bound, p_threshold=prob_threshold, je_threshold=err_threshold)
+    return jaccardANIResult(
+        point_estimate,
+        prob_nothing_in_common,
+        jaccard_error=error_lower_bound,
+        p_threshold=prob_threshold,
+        je_threshold=err_threshold,
+    )
diff --git a/src/sourmash/exceptions.py b/src/sourmash/exceptions.py
index b2f18c12d2..002fbafdfc 100644
--- a/src/sourmash/exceptions.py
+++ b/src/sourmash/exceptions.py
@@ -1,7 +1,7 @@
 from ._lowlevel import lib
 
 
-__all__ = ['SourmashError']
+__all__ = ["SourmashError"]
 exceptions_by_code = {}
 
 
@@ -16,13 +16,15 @@ def __init__(self, msg):
     def __str__(self):
         rv = self.message
         if self.rust_info is not None:
-            return u'%s\n\n%s' % (rv, self.rust_info)
+            return f"{rv}\n\n{self.rust_info}"
         return rv
 
 
 class IndexNotSupported(SourmashError):
     def __init__(self):
-        SourmashError.__init__(self, "This index format is not supported in this version of sourmash")
+        SourmashError.__init__(
+            self, "This index format is not supported in this version of sourmash"
+        )
 
 
 class IndexNotLoaded(SourmashError):
@@ -55,7 +57,7 @@ def _get_error_base(error_name):
 
 def _make_exceptions():
     for attr in dir(lib):
-        if not attr.startswith('SOURMASH_ERROR_CODE_'):
+        if not attr.startswith("SOURMASH_ERROR_CODE_"):
             continue
 
         code = getattr(lib, attr)
@@ -69,4 +71,5 @@ def _make_exceptions():
         else:
             exceptions_by_code[code] = ValueError
 
+
 _make_exceptions()
diff --git a/src/sourmash/fig.py b/src/sourmash/fig.py
index 4454ef64d9..9ca96f1aab 100644
--- a/src/sourmash/fig.py
+++ b/src/sourmash/fig.py
@@ -3,6 +3,7 @@
 Make plots using the distance matrix+labels output by `sourmash compare`.
 """
 from .logging import error, notify
+
 try:
     import numpy
     import pylab
@@ -10,18 +11,20 @@
 except (RuntimeError, ImportError):
     pass
 
+
 def load_matrix_and_labels(basefile):
     """Load the comparison matrix and associated labels.
 
     Returns a square numpy matrix & list of labels.
     """
-    D = numpy.load(open(basefile, 'rb'))
-    labeltext = [x.strip() for x in open(basefile + '.labels.txt')]
+    D = numpy.load(open(basefile, "rb"))
+    labeltext = [x.strip() for x in open(basefile + ".labels.txt")]
     return (D, labeltext)
 
 
-def plot_composite_matrix(D, labeltext, show_labels=True,
-                          vmax=1.0, vmin=0.0, force=False):
+def plot_composite_matrix(
+    D, labeltext, show_labels=True, vmax=1.0, vmin=0.0, force=False
+):
     """Build a composite plot showing dendrogram + distance matrix/heatmap.
 
     Returns a matplotlib figure.
@@ -30,25 +33,34 @@ def plot_composite_matrix(D, labeltext, show_labels=True,
     shown on the plot.
     """
     if D.max() > 1.0 or D.min() < 0.0:
-        error('This matrix doesn\'t look like a distance matrix - min value {}, max value {}', D.min(), D.max())
+        error(
+            "This matrix doesn't look like a distance matrix - min value {}, max value {}",
+            D.min(),
+            D.max(),
+        )
         if not force:
             raise ValueError("not a distance matrix")
         else:
-            notify('force is set; scaling to [0, 1]')
+            notify("force is set; scaling to [0, 1]")
             D -= D.min()
             D /= D.max()
 
     if show_labels:
-        show_indices = True
+        pass
 
     fig = pylab.figure(figsize=(11, 8))
     ax1 = fig.add_axes([0.09, 0.1, 0.2, 0.6])
 
     # plot dendrogram
-    Y = sch.linkage(D, method='single')  # centroid
-
-    Z1 = sch.dendrogram(Y, orientation='left', labels=labeltext,
-                        no_labels=not show_labels, get_leaves=True)
+    Y = sch.linkage(D, method="single")  # centroid
+
+    Z1 = sch.dendrogram(
+        Y,
+        orientation="left",
+        labels=labeltext,
+        no_labels=not show_labels,
+        get_leaves=True,
+    )
     ax1.set_xticks([])
 
     xstart = 0.45
@@ -58,8 +70,8 @@ def plot_composite_matrix(D, labeltext, show_labels=True,
     scale_xstart = xstart + width + 0.01
 
     # re-order labels along rows, top to bottom
-    idx1 = Z1['leaves']
-    reordered_labels = [ labeltext[i] for i in idx1 ]
+    idx1 = Z1["leaves"]
+    reordered_labels = [labeltext[i] for i in idx1]
 
     # reorder D by the clustering in the dendrogram
     D = D[idx1, :]
@@ -68,8 +80,9 @@ def plot_composite_matrix(D, labeltext, show_labels=True,
     # show matrix
     axmatrix = fig.add_axes([xstart, 0.1, width, 0.6])
 
-    im = axmatrix.matshow(D, aspect='auto', origin='lower',
-                          cmap=pylab.cm.YlGnBu, vmin=vmin, vmax=vmax)
+    im = axmatrix.matshow(
+        D, aspect="auto", origin="lower", cmap=pylab.cm.YlGnBu, vmin=vmin, vmax=vmax
+    )
     axmatrix.set_xticks([])
     axmatrix.set_yticks([])
 
diff --git a/src/sourmash/hll.py b/src/sourmash/hll.py
index c98ded5e8b..8a78049b34 100644
--- a/src/sourmash/hll.py
+++ b/src/sourmash/hll.py
@@ -32,7 +32,7 @@ def add_sequence(self, sequence, force=False):
     def add_kmer(self, kmer):
         "Add a kmer into the sketch."
         if len(kmer) != self.ksize:
-            raise ValueError("kmer to add is not {} in length".format(self.ksize))
+            raise ValueError(f"kmer to add is not {self.ksize} in length")
         self.add_sequence(kmer)
 
     def add(self, h):
diff --git a/src/sourmash/index/__init__.py b/src/sourmash/index/__init__.py
index 08068255e5..154f37c126 100644
--- a/src/sourmash/index/__init__.py
+++ b/src/sourmash/index/__init__.py
@@ -39,18 +39,23 @@
 from abc import abstractmethod, ABC
 from collections import namedtuple, Counter
 
-from sourmash.search import (make_jaccard_search_query,
-                             make_containment_query,
-                             calc_threshold_from_bp)
+from sourmash.search import (
+    make_jaccard_search_query,
+    make_containment_query,
+    calc_threshold_from_bp,
+)
 from sourmash.manifest import CollectionManifest
 from sourmash.logging import debug_literal
 from sourmash.signature import load_signatures, save_signatures
-from sourmash.minhash import (flatten_and_downsample_scaled,
-                              flatten_and_downsample_num,
-                              flatten_and_intersect_scaled)
+from sourmash.minhash import (
+    flatten_and_downsample_scaled,
+    flatten_and_downsample_num,
+    flatten_and_intersect_scaled,
+)
 
 # generic return tuple for Index.search and Index.gather
-IndexSearchResult = namedtuple('Result', 'score, signature, location')
+IndexSearchResult = namedtuple("Result", "score, signature, location")
+
 
 class Index(ABC):
     # this will be removed soon; see sourmash#1894.
@@ -103,8 +108,7 @@ def save(self, path, storage=None, sparseness=0.0, structure_only=False):
 
     @classmethod
     @abstractmethod
-    def load(cls, location, leaf_loader=None, storage=None,
-             print_version_warning=True):
+    def load(cls, location, leaf_loader=None, storage=None, print_version_warning=True):
         """ """
 
     def find(self, search_fn, query, **kwargs):
@@ -133,7 +137,7 @@ def prepare_subject(subj_mh):
             def prepare_query(query_mh, subj_mh):
                 return flatten_and_downsample_scaled(query_mh, subj_mh.scaled)
 
-        else:                   # num
+        else:  # num
             query_num = query_mh.num
 
             def prepare_subject(subj_mh):
@@ -156,10 +160,7 @@ def prepare_query(query_mh, subj_mh):
             query_size = len(query_mh)
             subj_size = len(subj_mh)
 
-            score = search_fn.score_fn(query_size,
-                                       shared_size,
-                                       subj_size,
-                                       total_size)
+            score = search_fn.score_fn(query_size, shared_size, subj_size, total_size)
 
             if search_fn.passes(score):
                 # note: here we yield the original signature, not the
@@ -173,7 +174,9 @@ def search_abund(self, query, *, threshold=None, **kwargs):
         Results will be sorted by similarity, highest to lowest.
         """
         if not query.minhash.track_abundance:
-            raise TypeError("'search_abund' requires query signature with abundance information")
+            raise TypeError(
+                "'search_abund' requires query signature with abundance information"
+            )
 
         # check arguments
         if threshold is None:
@@ -184,7 +187,9 @@ def search_abund(self, query, *, threshold=None, **kwargs):
         matches = []
         for subj, loc in self.signatures_with_location():
             if not subj.minhash.track_abundance:
-                raise TypeError("'search_abund' requires subject signatures with abundance information")
+                raise TypeError(
+                    "'search_abund' requires subject signatures with abundance information"
+                )
             score = query.similarity(subj, downsample=True)
             if score >= threshold:
                 matches.append(IndexSearchResult(score, subj, loc))
@@ -193,9 +198,16 @@ def search_abund(self, query, *, threshold=None, **kwargs):
         matches.sort(key=lambda x: -x.score)
         return matches
 
-    def search(self, query, *, threshold=None,
-               do_containment=False, do_max_containment=False,
-               best_only=False, **kwargs):
+    def search(
+        self,
+        query,
+        *,
+        threshold=None,
+        do_containment=False,
+        do_max_containment=False,
+        best_only=False,
+        **kwargs,
+    ):
         """Return list of IndexSearchResult with similarity above 'threshold'.
 
         Results will be sorted by similarity, highest to lowest.
@@ -211,10 +223,12 @@ def search(self, query, *, threshold=None,
             raise TypeError("'search' requires 'threshold'")
         threshold = float(threshold)
 
-        search_obj = make_jaccard_search_query(do_containment=do_containment,
-                                        do_max_containment=do_max_containment,
-                                               best_only=best_only,
-                                               threshold=threshold)
+        search_obj = make_jaccard_search_query(
+            do_containment=do_containment,
+            do_max_containment=do_max_containment,
+            best_only=best_only,
+            threshold=threshold,
+        )
 
         # do the actual search:
         matches = list(self.find(search_obj, query, **kwargs))
@@ -228,17 +242,17 @@ def prefetch(self, query, threshold_bp, **kwargs):
 
         Generator. Returns 0 or more IndexSearchResult namedtuples.
         """
-        if not self:            # empty database? quit.
+        if not self:  # empty database? quit.
             raise ValueError("no signatures to search")
 
         # default best_only to False
-        best_only = kwargs.get('best_only', False)
+        best_only = kwargs.get("best_only", False)
 
-        search_fn = make_containment_query(query.minhash, threshold_bp,
-                                           best_only=best_only)
+        search_fn = make_containment_query(
+            query.minhash, threshold_bp, best_only=best_only
+        )
 
-        for sr in self.find(search_fn, query, **kwargs):
-            yield sr
+        yield from self.find(search_fn, query, **kwargs)
 
     def best_containment(self, query, threshold_bp=None, **kwargs):
         """Return the match with the best Jaccard containment in the Index.
@@ -247,8 +261,7 @@ def best_containment(self, query, threshold_bp=None, **kwargs):
         """
 
         results = self.prefetch(query, threshold_bp, best_only=True, **kwargs)
-        results = sorted(results,
-                         key=lambda x: (-x.score, x.signature.md5sum()))
+        results = sorted(results, key=lambda x: (-x.score, x.signature.md5sum()))
 
         try:
             return next(iter(results))
@@ -277,8 +290,7 @@ def peek(self, query_mh, *, threshold_bp=0):
             return []
 
         # if matches, calculate intersection & return.
-        intersect_mh = flatten_and_intersect_scaled(result.signature.minhash,
-                                                    query_mh)
+        intersect_mh = flatten_and_intersect_scaled(result.signature.minhash, query_mh)
 
         return [result, intersect_mh]
 
@@ -307,8 +319,15 @@ def counter_gather(self, query, threshold_bp, **kwargs):
         return counter
 
     @abstractmethod
-    def select(self, ksize=None, moltype=None, scaled=None, num=None,
-               abund=None, containment=None):
+    def select(
+        self,
+        ksize=None,
+        moltype=None,
+        scaled=None,
+        num=None,
+        abund=None,
+        containment=None,
+    ):
         """Return Index containing only signatures that match requirements.
 
         Current arguments can be any or all of:
@@ -326,8 +345,17 @@ def select(self, ksize=None, moltype=None, scaled=None, num=None,
         """
 
 
-def select_signature(ss, *, ksize=None, moltype=None, scaled=0, num=0,
-                     containment=False, abund=None, picklist=None):
+def select_signature(
+    ss,
+    *,
+    ksize=None,
+    moltype=None,
+    scaled=0,
+    num=0,
+    containment=False,
+    abund=None,
+    picklist=None,
+):
     "Check that the given signature matches the specified requirements."
     # ksize match?
     if ksize and ksize != ss.minhash.ksize:
@@ -372,6 +400,7 @@ class LinearIndex(Index):
 
     Concrete class; signatures held in memory; does not use manifests.
     """
+
     def __init__(self, _signatures=None, filename=None):
         self._signatures = []
         if _signatures:
@@ -395,7 +424,7 @@ def insert(self, node):
         self._signatures.append(node)
 
     def save(self, path):
-        with open(path, 'wt') as fp:
+        with open(path, "w") as fp:
             save_signatures(self.signatures(), fp)
 
     @classmethod
@@ -404,7 +433,7 @@ def load(cls, location, filename=None):
         si = load_signatures(location, do_raise=True)
 
         if filename is None:
-            filename=location
+            filename = location
         lidx = LinearIndex(si, filename=filename)
         return lidx
 
@@ -449,14 +478,12 @@ def __init__(self, db, selection_dict={}):
     def signatures(self):
         "Return the selected signatures."
         db = self.db.select(**self.selection_dict)
-        for ss in db.signatures():
-            yield ss
+        yield from db.signatures()
 
     def signatures_with_location(self):
         "Return the selected signatures, with a location."
         db = self.db.select(**self.selection_dict)
-        for tup in db.signatures_with_location():
-            yield tup
+        yield from db.signatures_with_location()
 
     def __bool__(self):
         try:
@@ -502,10 +529,18 @@ class ZipFileLinearIndex(Index):
 
     Concrete class; signatures dynamically loaded from disk; uses manifests.
     """
+
     is_database = True
 
-    def __init__(self, storage, *, selection_dict=None,
-                 traverse_yield_all=False, manifest=None, use_manifest=True):
+    def __init__(
+        self,
+        storage,
+        *,
+        selection_dict=None,
+        traverse_yield_all=False,
+        manifest=None,
+        use_manifest=True,
+    ):
         self.storage = storage
         self.selection_dict = selection_dict
         self.traverse_yield_all = traverse_yield_all
@@ -514,7 +549,7 @@ def __init__(self, storage, *, selection_dict=None,
         # do we have a manifest already? if not, try loading.
         if use_manifest:
             if manifest is not None:
-                debug_literal('ZipFileLinearIndex using passed-in manifest')
+                debug_literal("ZipFileLinearIndex using passed-in manifest")
                 self.manifest = manifest
             else:
                 self._load_manifest()
@@ -529,15 +564,16 @@ def __init__(self, storage, *, selection_dict=None,
     def _load_manifest(self):
         "Load a manifest if one exists"
         try:
-            manifest_data = self.storage.load('SOURMASH-MANIFEST.csv')
+            manifest_data = self.storage.load("SOURMASH-MANIFEST.csv")
         except (KeyError, FileNotFoundError):
             self.manifest = None
         else:
-            debug_literal(f'found manifest on load for {self.storage.path}')
+            debug_literal(f"found manifest on load for {self.storage.path}")
 
             # load manifest!
             from io import StringIO
-            manifest_data = manifest_data.decode('utf-8')
+
+            manifest_data = manifest_data.decode("utf-8")
             manifest_fp = StringIO(manifest_data)
             self.manifest = CollectionManifest.load_from_csv(manifest_fp)
 
@@ -584,8 +620,9 @@ def load(cls, location, traverse_yield_all=False, use_manifest=True):
             raise FileNotFoundError(location)
 
         storage = ZipStorage(location)
-        return cls(storage, traverse_yield_all=traverse_yield_all,
-                   use_manifest=use_manifest)
+        return cls(
+            storage, traverse_yield_all=traverse_yield_all, use_manifest=use_manifest
+        )
 
     def _signatures_with_internal(self):
         """Return an iterator of tuples (ss, internal_location).
@@ -596,9 +633,11 @@ def _signatures_with_internal(self):
         # 'Storage' does not provide a way to list all the files, so :shrug:.
         for filename in self.storage._filenames():
             # should we load this file? if it ends in .sig OR we are forcing:
-            if filename.endswith('.sig') or \
-               filename.endswith('.sig.gz') or \
-               self.traverse_yield_all:
+            if (
+                filename.endswith(".sig")
+                or filename.endswith(".sig.gz")
+                or self.traverse_yield_all
+            ):
                 sig_data = self.storage.load(filename)
                 for ss in load_signatures(sig_data):
                     yield ss, filename
@@ -628,14 +667,19 @@ def signatures(self):
             # ad-hoc zipfiles that have no manifests.)
             for filename in storage._filenames():
                 # should we load this file? if it ends in .sig OR force:
-                if filename.endswith('.sig') or \
-                   filename.endswith('.sig.gz') or \
-                   self.traverse_yield_all:
+                if (
+                    filename.endswith(".sig")
+                    or filename.endswith(".sig.gz")
+                    or self.traverse_yield_all
+                ):
                     if selection_dict:
-                        select = lambda x: select_signature(x,
-                                                            **selection_dict)
+
+                        def select(x):
+                            return select_signature(x, **selection_dict)
                     else:
-                        select = lambda x: True
+
+                        def select(x):
+                            return True
 
                     data = self.storage.load(filename)
                     for ss in load_signatures(data):
@@ -651,11 +695,13 @@ def select(self, **kwargs):
 
         if manifest is not None:
             manifest = manifest.select_to_manifest(**kwargs)
-            return ZipFileLinearIndex(self.storage,
-                                      selection_dict=None,
-                                      traverse_yield_all=traverse_yield_all,
-                                      manifest=manifest,
-                                      use_manifest=True)
+            return ZipFileLinearIndex(
+                self.storage,
+                selection_dict=None,
+                traverse_yield_all=traverse_yield_all,
+                manifest=manifest,
+                use_manifest=True,
+            )
         else:
             # no manifest? just pass along all the selection kwargs to
             # the new ZipFileLinearIndex.
@@ -671,11 +717,13 @@ def select(self, **kwargs):
                     d[k] = v
                 kwargs = d
 
-            return ZipFileLinearIndex(self.storage,
-                                      selection_dict=kwargs,
-                                      traverse_yield_all=traverse_yield_all,
-                                      manifest=None,
-                                      use_manifest=False)
+            return ZipFileLinearIndex(
+                self.storage,
+                selection_dict=kwargs,
+                traverse_yield_all=traverse_yield_all,
+                manifest=None,
+                use_manifest=False,
+            )
 
 
 class CounterGather:
@@ -699,11 +747,12 @@ class CounterGather:
     duplicate md5s are collapsed inside the class, because we use the
     md5sum as a key into the dictionary used to store matches.
     """
+
     def __init__(self, query):
         "Constructor - takes a query SourmashSignature."
         query_mh = query.minhash
         if not query_mh.scaled:
-            raise ValueError('gather requires scaled signatures')
+            raise ValueError("gather requires scaled signatures")
 
         # track query
         self.orig_query_mh = query_mh.copy().flatten()
@@ -746,8 +795,7 @@ def downsample(self, scaled):
 
     def signatures(self):
         "Return all signatures."
-        for ss in self.siglist.values():
-            yield ss
+        yield from self.siglist.values()
 
     @property
     def union_found(self):
@@ -763,8 +811,7 @@ def union_found(self):
 
         # for each match, intersect match with query & then add to found_mh.
         for ss in self.siglist.values():
-            intersect_mh = flatten_and_intersect_scaled(ss.minhash,
-                                                        orig_query_mh)
+            intersect_mh = flatten_and_intersect_scaled(ss.minhash, orig_query_mh)
             found_mh.add_many(intersect_mh)
 
         return found_mh
@@ -784,7 +831,7 @@ def peek(self, cur_query_mh, *, threshold_bp=0):
         scaled = self.downsample(cur_query_mh.scaled)
         cur_query_mh = cur_query_mh.downsample(scaled=scaled)
 
-        if not cur_query_mh:             # empty query? quit.
+        if not cur_query_mh:  # empty query? quit.
             return []
 
         # CTB: could probably remove this check unless debug requested.
@@ -841,7 +888,7 @@ def consume(self, intersect_mh):
         # Prepare counter for finding the next match by decrementing
         # all hashes found in the current match in other datasets;
         # remove empty datasets from counter, too.
-        for (dataset_id, _) in most_common:
+        for dataset_id, _ in most_common:
             # CTB: note, remaining_mh may not be at correct scaled here.
             # this means that counters that _should_ be empty might not
             # _be_ empty in some situations.  This does not
@@ -849,8 +896,7 @@ def consume(self, intersect_mh):
             # 'counter' objects. The tradeoffs to fixing this would
             # need to be examined! (This could be fixed in self.downsample().)
             remaining_mh = siglist[dataset_id].minhash
-            intersect_count = intersect_mh.count_common(remaining_mh,
-                                                        downsample=True)
+            intersect_count = intersect_mh.count_common(remaining_mh, downsample=True)
             if intersect_count:
                 counter[dataset_id] -= intersect_count
                 if counter[dataset_id] == 0:
@@ -881,6 +927,7 @@ class MultiIndex(Index):
 
     Concrete class; signatures held in memory; builds and uses manifests.
     """
+
     def __init__(self, manifest, parent, *, prepend_location=False):
         """Constructor; takes manifest containing signatures, together with
         the top-level location.
@@ -898,16 +945,16 @@ def location(self):
 
     def signatures(self):
         for row in self.manifest.rows:
-            yield row['signature']
+            yield row["signature"]
 
     def signatures_with_location(self):
         for row in self.manifest.rows:
-            loc = row['internal_location']
+            loc = row["internal_location"]
             # here, 'parent' may have been removed from internal_location
             # for directories; if so, add it back in.
             if self.prepend_location:
                 loc = os.path.join(self.parent, loc)
-            yield row['signature'], loc
+            yield row["signature"], loc
 
     def _signatures_with_internal(self):
         """Return an iterator of tuples (ss, location)
@@ -916,8 +963,7 @@ def _signatures_with_internal(self):
         index. This is a special feature of this (in memory) class.
         """
         for row in self.manifest.rows:
-            yield row['signature'], row['internal_location']
-
+            yield row["signature"], row["internal_location"]
 
     def __len__(self):
         if self.manifest is None:
@@ -986,18 +1032,17 @@ def load_from_directory(cls, pathname, *, force=False):
 
                 rel = os.path.relpath(thisfile, pathname)
                 source_list.append(rel)
-            except (IOError, sourmash.exceptions.SourmashError) as exc:
+            except (OSError, sourmash.exceptions.SourmashError) as exc:
                 if force:
-                    continue    # ignore error
+                    continue  # ignore error
                 else:
-                    raise ValueError(exc)      # stop loading!
+                    raise ValueError(exc)  # stop loading!
 
         # did we load anything? if not, error
         if not index_list:
             raise ValueError(f"no signatures to load under directory '{pathname}'")
 
-        return cls.load(index_list, source_list, pathname,
-                        prepend_location=True)
+        return cls.load(index_list, source_list, pathname, prepend_location=True)
 
     @classmethod
     def load_from_path(cls, pathname, force=False):
@@ -1010,7 +1055,7 @@ def load_from_path(cls, pathname, force=False):
         if not os.path.exists(pathname):
             raise ValueError(f"'{pathname}' must exist.")
 
-        if os.path.isdir(pathname): # traverse
+        if os.path.isdir(pathname):  # traverse
             return cls.load_from_directory(pathname, force=force)
 
         # load as a .sig/JSON file
@@ -1020,7 +1065,7 @@ def load_from_path(cls, pathname, force=False):
             idx = LinearIndex.load(pathname)
             index_list = [idx]
             source_list = [pathname]
-        except (IOError, sourmash.exceptions.SourmashError):
+        except (OSError, sourmash.exceptions.SourmashError):
             if not force:
                 raise ValueError(f"no signatures to load from '{pathname}'")
             return None
@@ -1035,8 +1080,8 @@ def load_from_pathlist(cls, filename):
         including zip collections, etc; it uses 'load_file_as_index'
         underneath.
         """
-        from ..sourmash_args import (load_pathlist_from_file,
-                                    load_file_as_index)
+        from ..sourmash_args import load_pathlist_from_file, load_file_as_index
+
         idx_list = []
         src_list = []
 
@@ -1056,8 +1101,9 @@ def save(self, *args):
     def select(self, **kwargs):
         "Run 'select' on the manifest."
         new_manifest = self.manifest.select_to_manifest(**kwargs)
-        return MultiIndex(new_manifest, self.parent,
-                          prepend_location=self.prepend_location)
+        return MultiIndex(
+            new_manifest, self.parent, prepend_location=self.prepend_location
+        )
 
 
 class StandaloneManifestIndex(Index):
@@ -1085,6 +1131,7 @@ class StandaloneManifestIndex(Index):
     objects. However, this class does not store any signatures in
     memory, unlike MultiIndex.
     """
+
     is_database = True
 
     def __init__(self, manifest, location, *, prefix=None):
@@ -1119,8 +1166,7 @@ def location(self):
 
     def signatures_with_location(self):
         "Return an iterator over all signatures and their locations."
-        for ss, loc in self._signatures_with_internal():
-            yield ss, loc
+        yield from self._signatures_with_internal()
 
     def signatures(self):
         "Return an iterator over all signatures."
@@ -1140,7 +1186,7 @@ def _signatures_with_internal(self):
         picklist = self.manifest.to_picklist()
         for iloc in self.manifest.locations():
             # prepend location with prefix?
-            if not iloc.startswith('/') and self.prefix:
+            if not iloc.startswith("/") and self.prefix:
                 iloc = os.path.join(self.prefix, iloc)
 
             idx = sourmash.load_file_as_index(iloc)
@@ -1165,5 +1211,4 @@ def insert(self, *args):
     def select(self, **kwargs):
         "Run 'select' on the manifest."
         new_manifest = self.manifest.select_to_manifest(**kwargs)
-        return StandaloneManifestIndex(new_manifest, self._location,
-                                       prefix=self.prefix)
+        return StandaloneManifestIndex(new_manifest, self._location, prefix=self.prefix)
diff --git a/src/sourmash/index/revindex.py b/src/sourmash/index/revindex.py
index 2f7074b53f..01f808783d 100644
--- a/src/sourmash/index/revindex.py
+++ b/src/sourmash/index/revindex.py
@@ -123,9 +123,9 @@ def signatures(self):
         for sig in sigs:
             yield sig
 
-        #if self._signatures:
+        # if self._signatures:
         #    yield from self._signatures
-        #else:
+        # else:
         #    raise NotImplementedError("Call into Rust and retrieve sigs")
 
     def __len__(self):
@@ -156,81 +156,81 @@ def select(self, ksize=None, moltype=None, **kwargs):
             # TODO: deal with None/default values
             self.template = MinHash(ksize=ksize, moltype=moltype)
 
-#    def search(self, query, *args, **kwargs):
-#        """Return set of matches with similarity above 'threshold'.
-#
-#        Results will be sorted by similarity, highest to lowest.
-#
-#        Optional arguments:
-#          * do_containment: default False. If True, use Jaccard containment.
-#          * ignore_abundance: default False. If True, and query signature
-#            and database support k-mer abundances, ignore those abundances.
-#
-#        Note, the "best only" hint is ignored by LCA_Database
-#        """
-#        if not query.minhash:
-#            return []
-#
-#        # check arguments
-#        if "threshold" not in kwargs:
-#            raise TypeError("'search' requires 'threshold'")
-#        threshold = kwargs["threshold"]
-#        do_containment = kwargs.get("do_containment", False)
-#        ignore_abundance = kwargs.get("ignore_abundance", False)
-#
-#        self._init_inner()
-#
-#        size = ffi.new("uintptr_t *")
-#        results_ptr = self._methodcall(
-#            lib.revindex_search,
-#            query._get_objptr(),
-#            threshold,
-#            do_containment,
-#            ignore_abundance,
-#            size,
-#        )
-#
-#        size = size[0]
-#        if size == 0:
-#            return []
-#
-#        results = []
-#        for i in range(size):
-#            match = SearchResult._from_objptr(results_ptr[i])
-#            if match.score >= threshold:
-#                results.append(IndexSearchResult(match.score, match.signature, match.filename))
-#
-#        return results
-#
-#    def gather(self, query, *args, **kwargs):
-#        "Return the match with the best Jaccard containment in the database."
-#        if not query.minhash:
-#            return []
-#
-#        self._init_inner()
-#
-#        threshold_bp = kwargs.get("threshold_bp", 0.0)
-#        threshold = threshold_bp / (len(query.minhash) * self.scaled)
-#
-#        results = []
-#        size = ffi.new("uintptr_t *")
-#        results_ptr = self._methodcall(
-#            lib.revindex_gather, query._get_objptr(), threshold, True, True, size
-#        )
-#        size = size[0]
-#        if size == 0:
-#            return []
-#
-#        results = []
-#        for i in range(size):
-#            match = SearchResult._from_objptr(results_ptr[i])
-#            if match.score >= threshold:
-#                results.append(IndexSearchResult(match.score, match.signature, match.filename))
-#
-#        results.sort(reverse=True,
-#                     key=lambda x: (x.score, x.signature.md5sum()))
-#
-#        return results[:1]
+    #    def search(self, query, *args, **kwargs):
+    #        """Return set of matches with similarity above 'threshold'.
+    #
+    #        Results will be sorted by similarity, highest to lowest.
+    #
+    #        Optional arguments:
+    #          * do_containment: default False. If True, use Jaccard containment.
+    #          * ignore_abundance: default False. If True, and query signature
+    #            and database support k-mer abundances, ignore those abundances.
+    #
+    #        Note, the "best only" hint is ignored by LCA_Database
+    #        """
+    #        if not query.minhash:
+    #            return []
+    #
+    #        # check arguments
+    #        if "threshold" not in kwargs:
+    #            raise TypeError("'search' requires 'threshold'")
+    #        threshold = kwargs["threshold"]
+    #        do_containment = kwargs.get("do_containment", False)
+    #        ignore_abundance = kwargs.get("ignore_abundance", False)
+    #
+    #        self._init_inner()
+    #
+    #        size = ffi.new("uintptr_t *")
+    #        results_ptr = self._methodcall(
+    #            lib.revindex_search,
+    #            query._get_objptr(),
+    #            threshold,
+    #            do_containment,
+    #            ignore_abundance,
+    #            size,
+    #        )
+    #
+    #        size = size[0]
+    #        if size == 0:
+    #            return []
+    #
+    #        results = []
+    #        for i in range(size):
+    #            match = SearchResult._from_objptr(results_ptr[i])
+    #            if match.score >= threshold:
+    #                results.append(IndexSearchResult(match.score, match.signature, match.filename))
+    #
+    #        return results
+    #
+    #    def gather(self, query, *args, **kwargs):
+    #        "Return the match with the best Jaccard containment in the database."
+    #        if not query.minhash:
+    #            return []
+    #
+    #        self._init_inner()
+    #
+    #        threshold_bp = kwargs.get("threshold_bp", 0.0)
+    #        threshold = threshold_bp / (len(query.minhash) * self.scaled)
+    #
+    #        results = []
+    #        size = ffi.new("uintptr_t *")
+    #        results_ptr = self._methodcall(
+    #            lib.revindex_gather, query._get_objptr(), threshold, True, True, size
+    #        )
+    #        size = size[0]
+    #        if size == 0:
+    #            return []
+    #
+    #        results = []
+    #        for i in range(size):
+    #            match = SearchResult._from_objptr(results_ptr[i])
+    #            if match.score >= threshold:
+    #                results.append(IndexSearchResult(match.score, match.signature, match.filename))
+    #
+    #        results.sort(reverse=True,
+    #                     key=lambda x: (x.score, x.signature.md5sum()))
+    #
+    #        return results[:1]
 
     @property
     def scaled(self):
diff --git a/src/sourmash/index/sqlite_index.py b/src/sourmash/index/sqlite_index.py
index b16eb00b59..458d40919d 100644
--- a/src/sourmash/index/sqlite_index.py
+++ b/src/sourmash/index/sqlite_index.py
@@ -95,9 +95,15 @@
 # converters for unsigned 64-bit ints: if over MAX_SQLITE_INT,
 # convert to signed int.
 
-MAX_SQLITE_INT = 2 ** 63 - 1
-convert_hash_to = lambda x: BitArray(uint=x, length=64).int if x > MAX_SQLITE_INT else x
-convert_hash_from = lambda x: BitArray(int=x, length=64).uint if x < 0 else x
+MAX_SQLITE_INT = 2**63 - 1
+
+
+def convert_hash_to(x):
+    return BitArray(uint=x, length=64).int if x > MAX_SQLITE_INT else x
+
+
+def convert_hash_from(x):
+    return BitArray(int=x, length=64).uint if x < 0 else x
 
 
 def load_sqlite_index(filename, *, request_manifest=False):
@@ -126,27 +132,29 @@ def load_sqlite_index(filename, *, request_manifest=False):
     is_manifest = False
     is_lca_db = False
 
-    if 'SqliteIndex' in internal_d:
-        v = internal_d['SqliteIndex']
-        if v != '1.0':
+    if "SqliteIndex" in internal_d:
+        v = internal_d["SqliteIndex"]
+        if v != "1.0":
             raise IndexNotSupported
         is_index = True
         debug_literal("load_sqlite_index: it's an index!")
 
-    if is_index and 'SqliteLineage' in internal_d:
-        v = internal_d['SqliteLineage']
-        if v != '1.0':
+    if is_index and "SqliteLineage" in internal_d:
+        v = internal_d["SqliteLineage"]
+        if v != "1.0":
             raise IndexNotSupported
 
         is_lca_db = True
         debug_literal("load_sqlite_index: it's got a lineage table!")
 
-    if 'SqliteManifest' in internal_d:
-        v = internal_d['SqliteManifest']
-        if v != '1.0':
+    if "SqliteManifest" in internal_d:
+        v = internal_d["SqliteManifest"]
+        if v != "1.0":
             raise IndexNotSupported
         is_manifest = True
-        debug_literal(f"load_sqlite_index: it's a manifest! request_manifest: {request_manifest}")
+        debug_literal(
+            f"load_sqlite_index: it's a manifest! request_manifest: {request_manifest}"
+        )
 
     # every Index is a Manifest!
     if is_index or is_lca_db:
@@ -163,10 +171,10 @@ def load_sqlite_index(filename, *, request_manifest=False):
             debug_literal("load_sqlite_index: returning SqliteIndex")
             idx = SqliteIndex(filename)
     elif is_manifest:
-        managed_by_index=False
+        managed_by_index = False
         if is_index:
             assert request_manifest
-            managed_by_index=True
+            managed_by_index = True
 
         prefix = os.path.dirname(filename)
         mf = SqliteCollectionManifest(conn, managed_by_index=managed_by_index)
@@ -178,7 +186,7 @@ def load_sqlite_index(filename, *, request_manifest=False):
 
 class SqliteIndex(Index):
     is_database = True
-    
+
     # NOTE: we do not need _signatures_with_internal for this class
     # because it supplies a manifest directly :tada:.
 
@@ -192,8 +200,7 @@ def __init__(self, dbfile, *, sqlite_manifest=None, conn=None):
 
         # build me a SQLite manifest class to use for selection.
         if sqlite_manifest is None:
-            sqlite_manifest = SqliteCollectionManifest(conn,
-                                                       managed_by_index=True)
+            sqlite_manifest = SqliteCollectionManifest(conn, managed_by_index=True)
         self.manifest = sqlite_manifest
         self.conn = conn
 
@@ -202,7 +209,9 @@ def __init__(self, dbfile, *, sqlite_manifest=None, conn=None):
         c.execute("SELECT DISTINCT scaled FROM sourmash_sketches")
         scaled_vals = c.fetchall()
         if len(scaled_vals) > 1:
-            raise ValueError("this database has multiple scaled values, which is not currently allowed")
+            raise ValueError(
+                "this database has multiple scaled values, which is not currently allowed"
+            )
 
         if scaled_vals:
             self.scaled = scaled_vals[0][0]
@@ -247,28 +256,35 @@ def create(cls, dbfile, *, append=False):
     def _create_tables(cls, c, *, ignore_exists=False):
         "Create sqlite tables for SqliteIndex"
         try:
-            sqlite_utils.add_sourmash_internal(c, 'SqliteIndex', '1.0')
+            sqlite_utils.add_sourmash_internal(c, "SqliteIndex", "1.0")
             SqliteCollectionManifest._create_tables(c)
 
-            c.execute("""
+            c.execute(
+                """
             CREATE TABLE IF NOT EXISTS sourmash_hashes (
                hashval INTEGER NOT NULL,
                sketch_id INTEGER NOT NULL,
                FOREIGN KEY (sketch_id) REFERENCES sourmash_sketches (id)
             )
-            """)
-            c.execute("""
+            """
+            )
+            c.execute(
+                """
             CREATE INDEX IF NOT EXISTS sourmash_hashval_idx ON sourmash_hashes (
                hashval,
                sketch_id
             )
-            """)
-            c.execute("""
+            """
+            )
+            c.execute(
+                """
             CREATE INDEX IF NOT EXISTS sourmash_hashval_idx2 ON sourmash_hashes (
                hashval
             )
-            """)
-            c.execute("""
+            """
+            )
+            c.execute(
+                """
             CREATE INDEX IF NOT EXISTS sourmash_sketch_idx ON sourmash_hashes (
                sketch_id
             )
@@ -312,18 +328,21 @@ def insert(self, ss, *, cursor=None, commit=True):
             raise ValueError("cannot store signatures with abundance in SqliteIndex")
 
         if self.scaled is not None and self.scaled != ss.minhash.scaled:
-            raise ValueError(f"this database can only store scaled values={self.scaled}")
+            raise ValueError(
+                f"this database can only store scaled values={self.scaled}"
+            )
         elif self.scaled is None:
             self.scaled = ss.minhash.scaled
 
         # ok, first create and insert a manifest row
-        row = BaseCollectionManifest.make_manifest_row(ss, None,
-                                                       include_signature=False)
+        row = BaseCollectionManifest.make_manifest_row(
+            ss, None, include_signature=False
+        )
         self.manifest._insert_row(c, row, call_is_from_index=True)
 
         # retrieve ID of row for retrieving hashes:
         c.execute("SELECT last_insert_rowid()")
-        sketch_id, = c.fetchone()
+        (sketch_id,) = c.fetchone()
 
         # insert all the hashes
         hashes_to_sketch = []
@@ -331,8 +350,10 @@ def insert(self, ss, *, cursor=None, commit=True):
             hh = convert_hash_to(h)
             hashes_to_sketch.append((hh, sketch_id))
 
-        c.executemany("INSERT INTO sourmash_hashes (hashval, sketch_id) VALUES (?, ?)",
-                      hashes_to_sketch)
+        c.executemany(
+            "INSERT INTO sourmash_hashes (hashval, sketch_id) VALUES (?, ?)",
+            hashes_to_sketch,
+        )
 
         if commit:
             self.conn.commit()
@@ -366,30 +387,31 @@ def find(self, search_fn, query, **kwargs):
 
         picklist = None
         if self.manifest.selection_dict:
-            picklist = self.manifest.selection_dict.get('picklist')
+            picklist = self.manifest.selection_dict.get("picklist")
 
         c1 = self.conn.cursor()
         c2 = self.conn.cursor()
 
-        debug_literal('running _get_matching_sketches...')
+        debug_literal("running _get_matching_sketches...")
         t0 = time.time()
-        xx = self._get_matching_sketches(c1, query_mh.hashes,
-                                         query_mh._max_hash)
+        xx = self._get_matching_sketches(c1, query_mh.hashes, query_mh._max_hash)
         for sketch_id, n_matching_hashes in xx:
-            debug_literal(f"...got sketch {sketch_id}, with {n_matching_hashes} matching hashes in {time.time() - t0:.2f}")
+            debug_literal(
+                f"...got sketch {sketch_id}, with {n_matching_hashes} matching hashes in {time.time() - t0:.2f}"
+            )
             #
             # first, estimate sketch size using sql results.
             #
             query_size = len(query_mh)
-            subj_size = self._load_sketch_size(c2, sketch_id,
-                                               query_mh._max_hash)
+            subj_size = self._load_sketch_size(c2, sketch_id, query_mh._max_hash)
             total_size = query_size + subj_size - n_matching_hashes
             shared_size = n_matching_hashes
 
-            score = search_fn.score_fn(query_size, shared_size, subj_size,
-                                       total_size)
+            score = search_fn.score_fn(query_size, shared_size, subj_size, total_size)
 
-            debug_literal(f"APPROX RESULT: score={score} qsize={query_size}, ssize={subj_size} total={total_size} overlap={shared_size}")
+            debug_literal(
+                f"APPROX RESULT: score={score} qsize={query_size}, ssize={subj_size} total={total_size} overlap={shared_size}"
+            )
 
             # do we pass?
             if not search_fn.passes(score):
@@ -415,8 +437,7 @@ def _select(self, *, num=0, track_abundance=False, **kwargs):
         # create manifest if needed
         manifest = self.manifest
         if manifest is None:
-            manifest = SqliteCollectionManifest(self.conn,
-                                                managed_by_index=True)
+            manifest = SqliteCollectionManifest(self.conn, managed_by_index=True)
 
         # modify manifest
         manifest = manifest.select_to_manifest(**kwargs)
@@ -427,9 +448,7 @@ def select(self, *args, **kwargs):
         sqlite_manifest = self._select(*args, **kwargs)
 
         # return a new SqliteIndex with a new manifest, but same old conn.
-        return SqliteIndex(self.dbfile,
-                           sqlite_manifest=sqlite_manifest,
-                           conn=self.conn)
+        return SqliteIndex(self.dbfile, sqlite_manifest=sqlite_manifest, conn=self.conn)
 
     #
     # Actual SQL queries, etc.
@@ -438,53 +457,77 @@ def select(self, *args, **kwargs):
     def _load_sketch_size(self, c1, sketch_id, max_hash):
         "Get sketch size for given sketch, downsampled by max_hash."
         if max_hash <= MAX_SQLITE_INT:
-            c1.execute("""
+            c1.execute(
+                """
             SELECT COUNT(hashval) FROM sourmash_hashes
             WHERE sketch_id=? AND hashval >= 0 AND hashval <= ?""",
-                       (sketch_id, max_hash))
+                (sketch_id, max_hash),
+            )
         else:
-            c1.execute('SELECT COUNT(hashval) FROM sourmash_hashes WHERE sketch_id=?',
-                       (sketch_id,))
+            c1.execute(
+                "SELECT COUNT(hashval) FROM sourmash_hashes WHERE sketch_id=?",
+                (sketch_id,),
+            )
 
-        n_hashes, = c1.fetchone()
+        (n_hashes,) = c1.fetchone()
         return n_hashes
 
     def _load_sketch(self, c, sketch_id, *, match_scaled=None):
         "Load an individual sketch. If match_scaled is set, downsample."
 
         start = time.time()
-        c.execute("""
+        c.execute(
+            """
         SELECT id, name, scaled, ksize, filename, moltype, seed
-        FROM sourmash_sketches WHERE id=?""", (sketch_id,))
-        debug_literal(f"load sketch {sketch_id}: got sketch info in {time.time() - start:.2f}")
+        FROM sourmash_sketches WHERE id=?""",
+            (sketch_id,),
+        )
+        debug_literal(
+            f"load sketch {sketch_id}: got sketch info in {time.time() - start:.2f}"
+        )
 
         sketch_id, name, scaled, ksize, filename, moltype, seed = c.fetchone()
         if match_scaled is not None:
             scaled = max(scaled, match_scaled)
 
-        is_protein = 1 if moltype=='protein' else 0
-        is_dayhoff = 1 if moltype=='dayhoff' else 0
-        is_hp = 1 if moltype=='hp' else 0
-
-        mh = MinHash(n=0, ksize=ksize, scaled=scaled, seed=seed,
-                     is_protein=is_protein, dayhoff=is_dayhoff, hp=is_hp)
-
+        is_protein = 1 if moltype == "protein" else 0
+        is_dayhoff = 1 if moltype == "dayhoff" else 0
+        is_hp = 1 if moltype == "hp" else 0
+
+        mh = MinHash(
+            n=0,
+            ksize=ksize,
+            scaled=scaled,
+            seed=seed,
+            is_protein=is_protein,
+            dayhoff=is_dayhoff,
+            hp=is_hp,
+        )
 
         template_values = [sketch_id]
 
         hash_constraint_str = ""
         max_hash = mh._max_hash
         if max_hash <= MAX_SQLITE_INT:
-            hash_constraint_str = "sourmash_hashes.hashval >= 0 AND sourmash_hashes.hashval <= ? AND"
+            hash_constraint_str = (
+                "sourmash_hashes.hashval >= 0 AND sourmash_hashes.hashval <= ? AND"
+            )
             template_values.insert(0, max_hash)
         else:
-            debug_literal('NOT EMPLOYING hash_constraint_str')
+            debug_literal("NOT EMPLOYING hash_constraint_str")
 
-        debug_literal(f"finding hashes for sketch {sketch_id} in {time.time() - start:.2f}")
-        c.execute(f"SELECT hashval FROM sourmash_hashes WHERE {hash_constraint_str} sourmash_hashes.sketch_id=?", template_values)
+        debug_literal(
+            f"finding hashes for sketch {sketch_id} in {time.time() - start:.2f}"
+        )
+        c.execute(
+            f"SELECT hashval FROM sourmash_hashes WHERE {hash_constraint_str} sourmash_hashes.sketch_id=?",
+            template_values,
+        )
 
-        debug_literal(f"loading hashes for sketch {sketch_id} in {time.time() - start:.2f}")
-        for hashval, in c:
+        debug_literal(
+            f"loading hashes for sketch {sketch_id} in {time.time() - start:.2f}"
+        )
+        for (hashval,) in c:
             hh = convert_hash_from(hashval)
             mh.add_hash(hh)
 
@@ -495,29 +538,36 @@ def _load_sketch(self, c, sketch_id, *, match_scaled=None):
     def _load_sketches(self, c):
         "Load sketches based on manifest _id column."
         for row in self.manifest.rows:
-            sketch_id = row['_id']
-            assert row['num'] == 0
-
-            moltype = row['moltype']
-            is_protein = 1 if moltype=='protein' else 0
-            is_dayhoff = 1 if moltype=='dayhoff' else 0
-            is_hp = 1 if moltype=='hp' else 0
-
-            ksize = row['ksize']
-            scaled = row['scaled']
-            seed = row['seed']
-
-            mh = MinHash(n=0, ksize=ksize, scaled=scaled, seed=seed,
-                         is_protein=is_protein, dayhoff=is_dayhoff, hp=is_hp)
+            sketch_id = row["_id"]
+            assert row["num"] == 0
+
+            moltype = row["moltype"]
+            is_protein = 1 if moltype == "protein" else 0
+            is_dayhoff = 1 if moltype == "dayhoff" else 0
+            is_hp = 1 if moltype == "hp" else 0
+
+            ksize = row["ksize"]
+            scaled = row["scaled"]
+            seed = row["seed"]
+
+            mh = MinHash(
+                n=0,
+                ksize=ksize,
+                scaled=scaled,
+                seed=seed,
+                is_protein=is_protein,
+                dayhoff=is_dayhoff,
+                hp=is_hp,
+            )
 
-            c.execute("SELECT hashval FROM sourmash_hashes WHERE sketch_id=?",
-                       (sketch_id,))
+            c.execute(
+                "SELECT hashval FROM sourmash_hashes WHERE sketch_id=?", (sketch_id,)
+            )
 
-            for hashval, in c:
+            for (hashval,) in c:
                 mh.add_hash(convert_hash_from(hashval))
 
-            ss = SourmashSignature(mh, name=row['name'],
-                                   filename=row['filename'])
+            ss = SourmashSignature(mh, name=row["name"], filename=row["filename"])
             yield ss, self.dbfile, sketch_id
 
     def _get_matching_sketches(self, c, hashes, max_hash):
@@ -529,11 +579,14 @@ def _get_matching_sketches(self, c, hashes, max_hash):
         because it slows things down in practice.
         """
         c.execute("DROP TABLE IF EXISTS sourmash_hash_query")
-        c.execute("CREATE TEMPORARY TABLE sourmash_hash_query (hashval INTEGER PRIMARY KEY)")
+        c.execute(
+            "CREATE TEMPORARY TABLE sourmash_hash_query (hashval INTEGER PRIMARY KEY)"
+        )
 
-        hashvals = [ (convert_hash_to(h),) for h in hashes ]
-        c.executemany("INSERT OR IGNORE INTO sourmash_hash_query (hashval) VALUES (?)",
-                      hashvals)
+        hashvals = [(convert_hash_to(h),) for h in hashes]
+        c.executemany(
+            "INSERT OR IGNORE INTO sourmash_hash_query (hashval) VALUES (?)", hashvals
+        )
 
         #
         # set up SELECT conditions
@@ -550,15 +603,18 @@ def _get_matching_sketches(self, c, hashes, max_hash):
             template_values.append(max_hash)
 
         # format conditions
-        conditions.append('sourmash_hashes.hashval=sourmash_hash_query.hashval')
+        conditions.append("sourmash_hashes.hashval=sourmash_hash_query.hashval")
         conditions = " AND ".join(conditions)
 
-        c.execute(f"""
+        c.execute(
+            f"""
         SELECT DISTINCT sourmash_hashes.sketch_id,COUNT(sourmash_hashes.hashval) as CNT
         FROM sourmash_hashes, sourmash_hash_query
         WHERE {conditions}
         GROUP BY sourmash_hashes.sketch_id ORDER BY CNT DESC
-        """, template_values)
+        """,
+            template_values,
+        )
 
         return c
 
@@ -578,6 +634,7 @@ class SqliteCollectionManifest(BaseCollectionManifest):
     In the latter case, the SqliteCollectionManifest is created with
     managed_by_index set to True.
     """
+
     def __init__(self, conn, *, selection_dict=None, managed_by_index=False):
         """
         Here, 'conn' should already be connected and configured.
@@ -617,8 +674,9 @@ def create_or_open(cls, filename):
     @classmethod
     def load_from_manifest(cls, manifest, *, dbfile=":memory:", append=False):
         "Create a new sqlite manifest from an existing manifest object."
-        return cls._create_manifest_from_rows(manifest.rows, location=dbfile,
-                                              append=append)
+        return cls._create_manifest_from_rows(
+            manifest.rows, location=dbfile, append=append
+        )
 
     @classmethod
     def create_manifest(cls, locations_iter, *, include_signature=False):
@@ -629,10 +687,10 @@ def create_manifest(cls, locations_iter, *, include_signature=False):
         Note: do NOT catch exceptions here, so this passes through load excs.
         Note: this method ignores 'include_signature'.
         """
+
         def rows_iter():
             for ss, location in locations_iter:
-                row = cls.make_manifest_row(ss, location,
-                                            include_signature=False)
+                row = cls.make_manifest_row(ss, location, include_signature=False)
                 yield row
 
         return cls._create_manifest_from_rows(rows_iter())
@@ -643,8 +701,9 @@ def _create_tables(cls, cursor):
         # this is a class method so that it can be used by SqliteIndex to
         # create manifest-compatible tables.
 
-        sqlite_utils.add_sourmash_internal(cursor, 'SqliteManifest', '1.0')
-        cursor.execute("""
+        sqlite_utils.add_sourmash_internal(cursor, "SqliteManifest", "1.0")
+        cursor.execute(
+            """
         CREATE TABLE sourmash_sketches
           (id INTEGER PRIMARY KEY,
            name TEXT,
@@ -660,7 +719,8 @@ def _create_tables(cls, cursor):
            internal_location TEXT,
         UNIQUE(internal_location, md5sum)
         )
-        """)
+        """
+        )
 
     def add_row(self, row):
         c = self.conn.cursor()
@@ -674,18 +734,21 @@ def _insert_row(self, cursor, row, *, call_is_from_index=False):
             raise Exception("must use SqliteIndex.insert to add to this manifest")
 
         row = dict(row)
-        if 'seed' not in row:
-            row['seed'] = 42
+        if "seed" not in row:
+            row["seed"] = 42
 
-        cursor.execute("""
+        cursor.execute(
+            """
         INSERT OR IGNORE INTO sourmash_sketches
           (name, num, scaled, ksize, filename, md5sum, moltype,
            seed, n_hashes, with_abundance, internal_location)
         VALUES (:name, :num, :scaled, :ksize, :filename, :md5,
                 :moltype, :seed, :n_hashes, :with_abundance,
-                :internal_location)""", row)
+                :internal_location)""",
+            row,
+        )
 
-        self._num_rows = None   # reset cache
+        self._num_rows = None  # reset cache
 
     def __bool__(self):
         "Is this manifest empty?"
@@ -700,7 +763,7 @@ def __bool__(self):
 
     def __eq__(self, other):
         "Check equality on a row-by-row basis. May fail on out-of-order rows."
-        for (a, b) in itertools.zip_longest(self.rows, other.rows):
+        for a, b in itertools.zip_longest(self.rows, other.rows):
             # ignore non-required keys.
             for k in self.required_keys:
                 if a[k] != b[k]:
@@ -749,21 +812,21 @@ def _make_select(self):
         picklist = None
         if self.selection_dict:
             select_d = self.selection_dict
-            if 'ksize' in select_d and select_d['ksize']:
+            if "ksize" in select_d and select_d["ksize"]:
                 conditions.append("sourmash_sketches.ksize = ?")
-                values.append(select_d['ksize'])
-            if 'num' in select_d and select_d['num'] > 0:
+                values.append(select_d["ksize"])
+            if "num" in select_d and select_d["num"] > 0:
                 conditions.append("sourmash_sketches.num > 0")
-            if 'scaled' in select_d and select_d['scaled'] > 0:
+            if "scaled" in select_d and select_d["scaled"] > 0:
                 conditions.append("sourmash_sketches.scaled > 0")
-            if 'containment' in select_d and select_d['containment']:
+            if "containment" in select_d and select_d["containment"]:
                 conditions.append("sourmash_sketches.scaled > 0")
-            if 'moltype' in select_d and select_d['moltype'] is not None:
-                moltype = select_d['moltype']
-                assert moltype in ('DNA', 'protein', 'dayhoff', 'hp'), moltype
+            if "moltype" in select_d and select_d["moltype"] is not None:
+                moltype = select_d["moltype"]
+                assert moltype in ("DNA", "protein", "dayhoff", "hp"), moltype
                 conditions.append(f"sourmash_sketches.moltype = '{moltype}'")
 
-            picklist = select_d.get('picklist')
+            picklist = select_d.get("picklist")
 
         return conditions, values, picklist
 
@@ -784,10 +847,10 @@ def select_to_manifest(self, **kwargs):
         new_mf = SqliteCollectionManifest(self.conn, selection_dict=kwargs)
 
         # if picklist, make sure we fill in 'found'.
-        picklist = kwargs.get('picklist')
+        picklist = kwargs.get("picklist")
         if picklist is not None:
             debug_literal("sqlite manifest: iterating through picklist")
-            _ = len(self)       # this forces iteration through rows.
+            _ = len(self)  # this forces iteration through rows.
 
         return new_mf
 
@@ -803,19 +866,43 @@ def rows(self):
             conditions = ""
 
         debug_literal(f"sqlite manifest rows: executing select with '{conditions}'")
-        c1.execute(f"""
+        c1.execute(
+            f"""
         SELECT id, name, md5sum, num, scaled, ksize, filename, moltype,
         seed, n_hashes, internal_location FROM sourmash_sketches {conditions}
-        """, values)
+        """,
+            values,
+        )
 
         debug_literal("sqlite manifest: entering row yield loop")
-        for (_id, name, md5sum, num, scaled, ksize, filename, moltype,
-             seed, n_hashes, iloc) in c1:
-            row = dict(num=num, scaled=scaled, name=name, filename=filename,
-                       n_hashes=n_hashes, with_abundance=0, ksize=ksize,
-                       md5=md5sum, internal_location=iloc,
-                       moltype=moltype, md5short=md5sum[:8],
-                       seed=seed, _id=_id)
+        for (
+            _id,
+            name,
+            md5sum,
+            num,
+            scaled,
+            ksize,
+            filename,
+            moltype,
+            seed,
+            n_hashes,
+            iloc,
+        ) in c1:
+            row = dict(
+                num=num,
+                scaled=scaled,
+                name=name,
+                filename=filename,
+                n_hashes=n_hashes,
+                with_abundance=0,
+                ksize=ksize,
+                md5=md5sum,
+                internal_location=iloc,
+                moltype=moltype,
+                md5short=md5sum[:8],
+                seed=seed,
+                _id=_id,
+            )
             if picklist is None or picklist.matches_manifest_row(row):
                 yield row
 
@@ -824,6 +911,7 @@ def filter_rows(self, row_filter_fn):
 
         This is done in memory, inserting each row one at a time.
         """
+
         def rows_iter():
             for row in self.rows:
                 if row_filter_fn(row):
@@ -833,9 +921,11 @@ def rows_iter():
 
     def filter_on_columns(self, col_filter_fn, col_names):
         "Create a new manifest based on column matches."
+
         def row_filter_fn(row):
-            x = [ row[col] for col in col_names if row[col] is not None ]
+            x = [row[col] for col in col_names if row[col] is not None]
             return col_filter_fn(x)
+
         return self.filter_rows(row_filter_fn)
 
     def locations(self):
@@ -856,20 +946,22 @@ def locations(self):
         else:
             conditions = ""
 
-        c1.execute(f"""
+        c1.execute(
+            f"""
         SELECT DISTINCT internal_location FROM sourmash_sketches {conditions}
-        """, values)
+        """,
+            values,
+        )
 
-        return ( iloc for iloc, in c1 )
+        return (iloc for (iloc,) in c1)
 
     def __contains__(self, ss):
         "Check to see if signature 'ss' is in this manifest."
         md5 = ss.md5sum()
 
         c = self.conn.cursor()
-        c.execute('SELECT COUNT(*) FROM sourmash_sketches WHERE md5sum=?',
-                  (md5,))
-        val, = c.fetchone()
+        c.execute("SELECT COUNT(*) FROM sourmash_sketches WHERE md5sum=?", (md5,))
+        (val,) = c.fetchone()
 
         if bool(val):
             picklist = self.picklist
@@ -880,18 +972,19 @@ def __contains__(self, ss):
     def picklist(self):
         "Return the picklist, if any."
         if self.selection_dict:
-            return self.selection_dict.get('picklist')
+            return self.selection_dict.get("picklist")
         return None
 
     def to_picklist(self):
         "Convert this manifest to a picklist."
-        pl = SignaturePicklist('manifest')
-        pl.pickset = { pl._get_value_for_manifest_row(row) for row in self.rows }
+        pl = SignaturePicklist("manifest")
+        pl.pickset = {pl._get_value_for_manifest_row(row) for row in self.rows}
         return pl
 
     @classmethod
-    def _create_manifest_from_rows(cls, rows_iter, *, location=":memory:",
-                                   append=False):
+    def _create_manifest_from_rows(
+        cls, rows_iter, *, location=":memory:", append=False
+    ):
         """Create a SqliteCollectionManifest from a rows iterator.
 
         Internal utility function.
@@ -903,7 +996,9 @@ def _create_manifest_from_rows(cls, rows_iter, *, location=":memory:",
             mf = cls.create(location)
         except (sqlite3.OperationalError, sqlite3.DatabaseError) as exc:
             if not append:
-                raise Exception(f"cannot create sqlite3 db at '{location}'; exception: {str(exc)}")
+                raise Exception(
+                    f"cannot create sqlite3 db at '{location}'; exception: {str(exc)}"
+                )
             db = load_sqlite_index(location, request_manifest=True)
             mf = db.manifest
 
@@ -920,6 +1015,7 @@ class LCA_SqliteDatabase(SqliteIndex):
     """
     A wrapper class for SqliteIndex + lineage db => LCA_Database functionality.
     """
+
     is_database = True
 
     def __init__(self, dbfile, *, lineage_db=None, sqlite_manifest=None):
@@ -929,10 +1025,12 @@ def __init__(self, dbfile, *, lineage_db=None, sqlite_manifest=None):
 
         c = self.conn.cursor()
 
-        c.execute('SELECT DISTINCT ksize, moltype FROM sourmash_sketches')
+        c.execute("SELECT DISTINCT ksize, moltype FROM sourmash_sketches")
         res = list(c)
         if len(res) > 1:
-            raise TypeError("can only have one ksize & moltype in an LCA_SqliteDatabase")
+            raise TypeError(
+                "can only have one ksize & moltype in an LCA_SqliteDatabase"
+            )
         if len(res) == 0:
             raise ValueError("cannot load an LCA_SqliteDatabase")
 
@@ -996,20 +1094,20 @@ def _build_index(self):
         lid_to_lineage = {}
 
         for row in mf.rows:
-            name = row['name']
+            name = row["name"]
             if name:
                 # this is a bit of a hack. we try identifiers _with_ and
                 # _without_ versions, and take whichever works. There is
                 # definitely a better way to do this, but I can't think
                 # of one right now.
-                ident = name.split(' ')[0]
+                ident = name.split(" ")[0]
 
-                lineage = lineage_db.get(ident) # try with identifier version
-                if lineage is None:             # nope - remove version.x
-                    ident = name.split('.')[0]
+                lineage = lineage_db.get(ident)  # try with identifier version
+                if lineage is None:  # nope - remove version.x
+                    ident = name.split(".")[0]
                     lineage = lineage_db.get(ident)
 
-                idx = row['_id'] # this is only present in sqlite manifests.
+                idx = row["_id"]  # this is only present in sqlite manifests.
                 ident_to_idx[ident] = idx
 
                 if lineage:
@@ -1038,16 +1136,16 @@ def insert(self, *args, **kwargs):
     def select(self, *args, **kwargs):
         sqlite_manifest = self._select(*args, **kwargs)
 
-        return LCA_SqliteDatabase(self.dbfile,
-                                  sqlite_manifest=sqlite_manifest,
-                                  lineage_db=self.lineage_db)
+        return LCA_SqliteDatabase(
+            self.dbfile, sqlite_manifest=sqlite_manifest, lineage_db=self.lineage_db
+        )
 
     ### LCA_Database API/protocol.
 
     def downsample_scaled(self, scaled):
         "Downsample the scaled for querying."
         if scaled < self.scaled:
-            raise ValueError("cannot decrease scaled from {} to {}".format(self.scaled, scaled))
+            raise ValueError(f"cannot decrease scaled from {self.scaled} to {scaled}")
 
         # CTB: maybe return a new LCA_Database? Right now this isn't how
         # the lca_db protocol works tho.
@@ -1097,17 +1195,18 @@ def get_identifiers_for_hashval(self, hashval):
 
 class _SqliteIndexHashvalToIndex:
     """
-    Internal wrapper class to retrieve keys and key/value pairs for 
+    Internal wrapper class to retrieve keys and key/value pairs for
     hashval -> [ list of idx ].
     """
+
     def __init__(self, sqlidx):
         self.sqlidx = sqlidx
 
     def __iter__(self):
         "Get all hashvals."
         c = self.sqlidx.conn.cursor()
-        c.execute('SELECT DISTINCT hashval FROM sourmash_hashes')
-        for hashval, in c:
+        c.execute("SELECT DISTINCT hashval FROM sourmash_hashes")
+        for (hashval,) in c:
             yield hashval
 
     def get(self, key, dv=None):
@@ -1117,10 +1216,9 @@ def get(self, key, dv=None):
 
         hh = convert_hash_to(key)
 
-        c.execute('SELECT sketch_id FROM sourmash_hashes WHERE hashval=?',
-                  (hh,))
+        c.execute("SELECT sketch_id FROM sourmash_hashes WHERE hashval=?", (hh,))
 
-        x = [ convert_hash_from(h) for h, in c ]
+        x = [convert_hash_from(h) for (h,) in c]
         return x or dv
 
     def __getitem__(self, key):
diff --git a/src/sourmash/lca/__init__.py b/src/sourmash/lca/__init__.py
index b2a9af2589..82b468c424 100644
--- a/src/sourmash/lca/__init__.py
+++ b/src/sourmash/lca/__init__.py
@@ -1,13 +1,18 @@
 "LCA and reverse index utilities."
 
 from .lca_db import LCA_Database
-from .lca_utils import (taxlist, zip_lineage, build_tree, find_lca,
-                        gather_assignments, display_lineage,
-                        count_lca_for_assignments)
+from .lca_utils import (
+    taxlist,
+    zip_lineage,
+    build_tree,
+    find_lca,
+    gather_assignments,
+    display_lineage,
+    count_lca_for_assignments,
+)
 
 from .command_index import index
 from .command_classify import classify
 from .command_summarize import summarize_main
 from .command_rankinfo import rankinfo_main
 from .__main__ import main
-
diff --git a/src/sourmash/lca/__main__.py b/src/sourmash/lca/__main__.py
index b02b891771..73faa36019 100644
--- a/src/sourmash/lca/__main__.py
+++ b/src/sourmash/lca/__main__.py
@@ -9,7 +9,7 @@
 from .command_compare_csv import compare_csv
 from ..logging import set_quiet, error
 
-usage='''
+usage = """
 sourmash lca <command> [<args>] - work with taxonomic information.
 
 ** Commands can be:
@@ -23,14 +23,15 @@
 ** Use '-h' to get subcommand-specific help, e.g.
 
 sourmash lca index -h
-'''
+"""
+
 
 def main(arglist=None):
     args = sourmash.cli.get_parser().parse_args(arglist)
     submod = getattr(sourmash.cli.sig, args.subcmd)
-    mainmethod = getattr(submod, 'main')
+    mainmethod = getattr(submod, "main")
     return mainmethod(args)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main(sys.argv)
diff --git a/src/sourmash/lca/command_classify.py b/src/sourmash/lca/command_classify.py
index cf5605be72..4ea5ae69ec 100644
--- a/src/sourmash/lca/command_classify.py
+++ b/src/sourmash/lca/command_classify.py
@@ -11,7 +11,7 @@
 from . import lca_utils
 from .lca_utils import check_files_exist
 
-DEFAULT_THRESHOLD=5                  # how many counts of a taxid at min
+DEFAULT_THRESHOLD = 5  # how many counts of a taxid at min
 
 
 def classify_signature(query_sig, dblist, threshold, majority):
@@ -33,10 +33,9 @@ def classify_signature(query_sig, dblist, threshold, majority):
          shows up, and filter out low-abundance ones (under threshold).
          Then, determine the LCA of all of those.
 
-      """
+    """
     # gather assignments from across all the databases
-    assignments = lca_utils.gather_assignments(query_sig.minhash.hashes,
-                                               dblist)
+    assignments = lca_utils.gather_assignments(query_sig.minhash.hashes, dblist)
 
     # now convert to trees -> do LCA & counts
     counts = lca_utils.count_lca_for_assignments(assignments)
@@ -59,20 +58,20 @@ def classify_signature(query_sig, dblist, threshold, majority):
             # update tree with this set of assignments
             lca_utils.build_tree([lca], tree)
 
-    status = 'nomatch'
+    status = "nomatch"
     if not tree:
         return [], status
 
     # now find lowest-common-ancestor of the resulting tree.
     lca, reason = lca_utils.find_lca(tree)
-    if reason == 0:               # leaf node
-        debug('END', lca)
-        status = 'found'
-    else:                         # internal node => disagreement
-        debug('MULTI', lca)
-        status = 'disagree'
+    if reason == 0:  # leaf node
+        debug("END", lca)
+        status = "found"
+    else:  # internal node => disagreement
+        debug("MULTI", lca)
+        status = "disagree"
 
-    debug('lineage is:', lca)
+    debug("lineage is:", lca)
 
     return lca, status
 
@@ -82,7 +81,7 @@ def classify(args):
     main single-genome classification function.
     """
     if not args.db:
-        error('Error! must specify at least one LCA database with --db')
+        error("Error! must specify at least one LCA database with --db")
         sys.exit(-1)
 
     set_quiet(args.quiet, args.debug)
@@ -98,7 +97,7 @@ def classify(args):
     dblist, ksize, scaled = lca_utils.load_databases(args.db, args.scaled)
 
     # find all the queries
-    notify('finding query signatures...')
+    notify("finding query signatures...")
     inp_files = list(args.query)
     if args.query_from_file:
         more_files = sourmash_args.load_pathlist_from_file(args.query_from_file)
@@ -108,7 +107,9 @@ def classify(args):
         sys.exit(-1)
 
     if not inp_files:
-        error('Error! must specify at least one query signature with --query or --query-from-file')
+        error(
+            "Error! must specify at least one query signature with --query or --query-from-file"
+        )
         sys.exit(-1)
 
     # set up output
@@ -117,7 +118,7 @@ def classify(args):
     with sourmash_args.FileOutputCSV(args.output) as outfp:
         csvfp = csv.writer(outfp)
 
-        csvfp.writerow(['ID','status'] + list(lca_utils.taxlist()))
+        csvfp.writerow(["ID", "status"] + list(lca_utils.taxlist()))
 
         # for each query, gather all the matches across databases
         total_count = 0
@@ -125,11 +126,10 @@ def classify(args):
         total_n = len(inp_files)
         for query_filename in inp_files:
             n += 1
-            for query_sig in load_file_as_signatures(query_filename,
-                                                     ksize=ksize):
-                notify(u'\r\033[K', end=u'')
-                notify(f'... classifying {query_sig} (file {n} of {total_n})', end='\r')
-                debug('classifying', query_sig)
+            for query_sig in load_file_as_signatures(query_filename, ksize=ksize):
+                notify("\r\033[K", end="")
+                notify(f"... classifying {query_sig} (file {n} of {total_n})", end="\r")
+                debug("classifying", query_sig)
                 total_count += 1
 
                 # make sure we're looking at the same scaled value as database
@@ -139,8 +139,9 @@ def classify(args):
                         query_sig.minhash = downsample_mh
 
                 # do the classification
-                lineage, status = classify_signature(query_sig, dblist,
-                                                     args.threshold, args.majority)
+                lineage, status = classify_signature(
+                    query_sig, dblist, args.threshold, args.majority
+                )
                 debug(lineage)
 
                 # output each classification to the spreadsheet
@@ -149,12 +150,12 @@ def classify(args):
 
                 # when outputting to stdout, make output intelligible
                 if not args.output:
-                    notify(u'\r\033[K', end=u'')
+                    notify("\r\033[K", end="")
                 csvfp.writerow(row)
 
-        notify(u'\r\033[K', end=u'')
-        notify(f'classified {total_count} signatures total')
+        notify("\r\033[K", end="")
+        notify(f"classified {total_count} signatures total")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     sys.exit(classify(sys.argv[1:]))
diff --git a/src/sourmash/lca/command_compare_csv.py b/src/sourmash/lca/command_compare_csv.py
index 99b7f8211a..c8018256f0 100644
--- a/src/sourmash/lca/command_compare_csv.py
+++ b/src/sourmash/lca/command_compare_csv.py
@@ -13,44 +13,50 @@
 
 def compare_csv(args):
     if args.start_column < 2:
-        error('error, --start-column cannot be less than 2')
+        error("error, --start-column cannot be less than 2")
         sys.exit(-1)
 
     set_quiet(args.quiet, args.debug)
 
     # first, load classify-style spreadsheet
-    notify(f'loading classify output from: {args.csv1}')
-    assignments0, num_rows0 = load_taxonomy_assignments(args.csv1,
-                                                        start_column=3,
-                                                        force=args.force)
+    notify(f"loading classify output from: {args.csv1}")
+    assignments0, num_rows0 = load_taxonomy_assignments(
+        args.csv1, start_column=3, force=args.force
+    )
 
-    notify(f'loaded {len(set(assignments0.values()))} distinct lineages, {num_rows0} rows')
-    notify('----')
+    notify(
+        f"loaded {len(set(assignments0.values()))} distinct lineages, {num_rows0} rows"
+    )
+    notify("----")
 
     # next, load custom taxonomy spreadsheet
-    delimiter = ','
+    delimiter = ","
     if args.tabs:
-        delimiter = '\t'
+        delimiter = "\t"
 
-    notify(f'loading custom spreadsheet from: {args.csv2}')
-    assignments, num_rows = load_taxonomy_assignments(args.csv2,
-                                               delimiter=delimiter,
-                                               start_column=args.start_column,
-                                               use_headers=not args.no_headers,
-                                               force=args.force)
-    notify(f'loaded {len(set(assignments.values()))} distinct lineages, {num_rows} rows')
+    notify(f"loading custom spreadsheet from: {args.csv2}")
+    assignments, num_rows = load_taxonomy_assignments(
+        args.csv2,
+        delimiter=delimiter,
+        start_column=args.start_column,
+        use_headers=not args.no_headers,
+        force=args.force,
+    )
+    notify(
+        f"loaded {len(set(assignments.values()))} distinct lineages, {num_rows} rows"
+    )
 
     # now, compute basic differences:
     missing_1 = set(assignments0.keys()) - set(assignments.keys())
     missing_2 = set(assignments.keys()) - set(assignments0.keys())
     if missing_2:
-        notify(f'missing {len(missing_2)} assignments in classify spreadsheet.')
+        notify(f"missing {len(missing_2)} assignments in classify spreadsheet.")
     if missing_1:
-        notify(f'missing {len(missing_1)} assignments in custom spreadsheet.')
+        notify(f"missing {len(missing_1)} assignments in custom spreadsheet.")
     if missing_1 or missing_2:
-        notify('(these will not be evaluated any further)')
+        notify("(these will not be evaluated any further)")
     else:
-        notify('note: all IDs are in both spreadsheets!')
+        notify("note: all IDs are in both spreadsheets!")
 
     # next, look at differences in lineages
     common = set(assignments0.keys())
@@ -71,7 +77,7 @@ def compare_csv(args):
             lca_utils.build_tree([v1], tree)
 
             lca, reason = lca_utils.find_lca(tree)
-            if reason == 0:               # compatible lineages
+            if reason == 0:  # compatible lineages
                 n_compat += 1
                 print_results("{},compatible,{}", k, ";".join(zip_lineage(lca)))
             else:
@@ -88,8 +94,8 @@ def compare_csv(args):
 
     if n_incompat:
         for rank in lca_utils.taxlist():
-            notify(f'{incompat_rank[rank]} incompatible at rank {rank}')
-        
+            notify(f"{incompat_rank[rank]} incompatible at rank {rank}")
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     sys.exit(compare_csv(sys.argv[1:]))
diff --git a/src/sourmash/lca/command_index.py b/src/sourmash/lca/command_index.py
index 3ee13164a8..f75a0ec8f2 100644
--- a/src/sourmash/lca/command_index.py
+++ b/src/sourmash/lca/command_index.py
@@ -15,10 +15,16 @@
 from sourmash.sourmash_args import DEFAULT_LOAD_K
 
 
-def load_taxonomy_assignments(filename, *, delimiter=',', start_column=2,
-                              use_headers=True, force=False,
-                              split_identifiers=False,
-                              keep_identifier_versions=False):
+def load_taxonomy_assignments(
+    filename,
+    *,
+    delimiter=",",
+    start_column=2,
+    use_headers=True,
+    force=False,
+    split_identifiers=False,
+    keep_identifier_versions=False,
+):
     """
     Load a taxonomy assignment spreadsheet into a dictionary.
 
@@ -26,34 +32,35 @@ def load_taxonomy_assignments(filename, *, delimiter=',', start_column=2,
     lineage tuples.
     """
     from sourmash.tax.tax_utils import LineagePair
+
     # parse spreadsheet!
     # CTB note: can't easily switch to FileInputCSV, because of
     # janky way we do/don't handle headers here. See issue #2198.
-    fp = open(filename, newline='')
+    fp = open(filename, newline="")
     r = csv.reader(fp, delimiter=delimiter)
-    row_headers = ['identifiers']
-    row_headers += ['_skip_']*(start_column - 2)
+    row_headers = ["identifiers"]
+    row_headers += ["_skip_"] * (start_column - 2)
     row_headers += list(lca_utils.taxlist())
 
     # first check that headers are interpretable.
     if use_headers:
-        notify('examining spreadsheet headers...')
+        notify("examining spreadsheet headers...")
         first_row = next(iter(r))
 
         n_disagree = 0
-        for (column, value) in zip(row_headers, first_row):
-            if column == '_skip_':
+        for column, value in zip(row_headers, first_row):
+            if column == "_skip_":
                 continue
 
             if column.lower() != value.lower():
                 notify(f"** assuming column '{value}' is {column} in spreadsheet")
                 n_disagree += 1
                 if n_disagree > 2:
-                    error('whoa, too many assumptions. are the headers right?')
-                    error('expecting {}', ",".join(row_headers))
+                    error("whoa, too many assumptions. are the headers right?")
+                    error("expecting {}", ",".join(row_headers))
                     if not force:
                         sys.exit(-1)
-                    notify('...continue, because --force was specified.')
+                    notify("...continue, because --force was specified.")
 
     # convert into a lineage pair
     assignments = {}
@@ -61,27 +68,27 @@ def load_taxonomy_assignments(filename, *, delimiter=',', start_column=2,
     n_species = 0
     n_strains = 0
     for row in r:
-        if row and row[0].strip():        # want non-empty row
+        if row and row[0].strip():  # want non-empty row
             num_rows += 1
             lineage = list(zip(row_headers, row))
-            lineage = [ x for x in lineage if x[0] != '_skip_' ]
+            lineage = [x for x in lineage if x[0] != "_skip_"]
 
             ident = lineage[0][1]
             lineage = lineage[1:]
 
             # fold, spindle, and mutilate ident?
             if split_identifiers:
-                ident = ident.split(' ')[0]
+                ident = ident.split(" ")[0]
 
                 if not keep_identifier_versions:
-                    ident = ident.split('.')[0]
+                    ident = ident.split(".")[0]
 
             # clean lineage of null names, replace with 'unassigned'
-            lineage = [ (a, lca_utils.filter_null(b)) for (a,b) in lineage ]
-            lineage = [ LineagePair(a, b) for (a, b) in lineage ]
+            lineage = [(a, lca_utils.filter_null(b)) for (a, b) in lineage]
+            lineage = [LineagePair(a, b) for (a, b) in lineage]
 
             # remove end nulls
-            while lineage and lineage[-1].name == 'unassigned':
+            while lineage and lineage[-1].name == "unassigned":
                 lineage = lineage[:-1]
 
             # store lineage tuple
@@ -90,13 +97,13 @@ def load_taxonomy_assignments(filename, *, delimiter=',', start_column=2,
                 if ident in assignments:
                     if assignments[ident] != tuple(lineage):
                         if not force:
-                            raise Exception("multiple lineages for identifier {}".format(ident))
+                            raise Exception(f"multiple lineages for identifier {ident}")
                 else:
                     assignments[ident] = tuple(lineage)
 
-                    if lineage[-1].rank == 'species':
+                    if lineage[-1].rank == "species":
                         n_species += 1
-                    elif lineage[-1].rank == 'strain':
+                    elif lineage[-1].rank == "strain":
                         n_species += 1
                         n_strains += 1
 
@@ -106,35 +113,50 @@ def load_taxonomy_assignments(filename, *, delimiter=',', start_column=2,
     # any more, when building a large GTDB-based database :) --CTB
     if len(assignments) * 0.2 > n_species and len(assignments) > 50:
         if not force:
-            error('')
+            error("")
             error("ERROR: fewer than 20% of lineages have species-level resolution!?")
-            error("({} species assignments found, of {} assignments total)",
-                  n_species, len(assignments))
+            error(
+                "({} species assignments found, of {} assignments total)",
+                n_species,
+                len(assignments),
+            )
             error("** If this is intentional, re-run the command with -f.")
             sys.exit(-1)
 
     return assignments, num_rows
 
 
-def generate_report(record_duplicates, record_no_lineage, record_remnants,
-                    unused_lineages, unused_identifiers, filename):
+def generate_report(
+    record_duplicates,
+    record_no_lineage,
+    record_remnants,
+    unused_lineages,
+    unused_identifiers,
+    filename,
+):
     """
     Output a report of anomalies from building the index.
     """
-    with open(filename, 'wt') as fp:
-        print(f'Duplicate signatures: {len(record_duplicates)}', file=fp)
+    with open(filename, "w") as fp:
+        print(f"Duplicate signatures: {len(record_duplicates)}", file=fp)
         fp.write("\n".join(record_duplicates))
         fp.write("\n")
-        print(f'----\nUnused identifiers: {len(unused_identifiers)}', file=fp)
+        print(f"----\nUnused identifiers: {len(unused_identifiers)}", file=fp)
         fp.write("\n".join(unused_identifiers))
         fp.write("\n")
-        print(f'----\nNo lineage provided for these identifiers: {len(record_no_lineage)}', file=fp)
+        print(
+            f"----\nNo lineage provided for these identifiers: {len(record_no_lineage)}",
+            file=fp,
+        )
         fp.write("\n".join(record_no_lineage))
         fp.write("\n")
-        print(f'----\nNo signatures found for these identifiers: {len(record_remnants)}', file=fp)
-        fp.write('\n'.join(record_remnants))
+        print(
+            f"----\nNo signatures found for these identifiers: {len(record_remnants)}",
+            file=fp,
+        )
+        fp.write("\n".join(record_remnants))
         fp.write("\n")
-        print(f'----\nUnused lineages: {len(unused_lineages)}', file=fp)
+        print(f"----\nUnused lineages: {len(unused_lineages)}", file=fp)
         for lineage in unused_lineages:
             fp.write(";".join(lca_utils.zip_lineage(lineage)))
             fp.write("\n")
@@ -145,7 +167,7 @@ def index(args):
     main function for building an LCA database.
     """
     if args.start_column < 2:
-        error('error, --start-column cannot be less than 2')
+        error("error, --start-column cannot be less than 2")
         sys.exit(-1)
 
     set_quiet(args.quiet, args.debug)
@@ -155,42 +177,50 @@ def index(args):
     if args.ksize is None:
         args.ksize = DEFAULT_LOAD_K
 
-    moltype = sourmash_args.calculate_moltype(args, default='DNA')
+    moltype = sourmash_args.calculate_moltype(args, default="DNA")
     picklist = sourmash_args.load_picklist(args)
 
     db_outfile = args.lca_db_out
-    if args.database_format == 'json':
-        if not (db_outfile.endswith('.lca.json') or \
-                    db_outfile.endswith('.lca.json.gz')):   # logic -> db.save
-            db_outfile += '.lca.json'
+    if args.database_format == "json":
+        if not (
+            db_outfile.endswith(".lca.json") or db_outfile.endswith(".lca.json.gz")
+        ):  # logic -> db.save
+            db_outfile += ".lca.json"
     else:
-        assert args.database_format == 'sql'
-        if not db_outfile.endswith('.lca.sql'):
-                db_outfile += '.lca.sql'
+        assert args.database_format == "sql"
+        if not db_outfile.endswith(".lca.sql"):
+            db_outfile += ".lca.sql"
 
     if os.path.exists(db_outfile):
         error(f"ERROR: output file {db_outfile} already exists. Not overwriting.")
         sys.exit(-1)
 
-    notify(f'saving to LCA DB: {format(db_outfile)}')
+    notify(f"saving to LCA DB: {format(db_outfile)}")
 
-    notify(f'Building LCA database with ksize={args.ksize} scaled={args.scaled} moltype={moltype}.')
+    notify(
+        f"Building LCA database with ksize={args.ksize} scaled={args.scaled} moltype={moltype}."
+    )
 
     # first, load taxonomy spreadsheet
-    delimiter = ','
+    delimiter = ","
     if args.tabs:
-        delimiter = '\t'
-    assignments, num_rows = load_taxonomy_assignments(args.csv,
-                                               delimiter=delimiter,
-                                               start_column=args.start_column,
-                                               use_headers=not args.no_headers,
-                                               force=args.force,
-                                               split_identifiers=args.split_identifiers,
-                                               keep_identifier_versions=args.keep_identifier_versions
+        delimiter = "\t"
+    assignments, num_rows = load_taxonomy_assignments(
+        args.csv,
+        delimiter=delimiter,
+        start_column=args.start_column,
+        use_headers=not args.no_headers,
+        force=args.force,
+        split_identifiers=args.split_identifiers,
+        keep_identifier_versions=args.keep_identifier_versions,
     )
 
-    notify(f'{len(assignments)} distinct identities in spreadsheet out of {num_rows} rows.')
-    notify(f'{len(set(assignments.values()))} distinct lineages in spreadsheet out of {num_rows} rows.')
+    notify(
+        f"{len(assignments)} distinct identities in spreadsheet out of {num_rows} rows."
+    )
+    notify(
+        f"{len(set(assignments.values()))} distinct lineages in spreadsheet out of {num_rows} rows."
+    )
 
     db = LCA_Database(args.ksize, args.scaled, moltype)
 
@@ -216,18 +246,28 @@ def index(args):
     n_skipped = 0
     for filename in inp_files:
         n += 1
-        it = load_file_as_signatures(filename, ksize=args.ksize,
-                                     select_moltype=moltype,
-                                     picklist=picklist,
-                                     yield_all_files=args.force)
+        it = load_file_as_signatures(
+            filename,
+            ksize=args.ksize,
+            select_moltype=moltype,
+            picklist=picklist,
+            yield_all_files=args.force,
+        )
         for sig in it:
-            notify(u'\r\033[K', end=u'')
-            notify(f'\r... loading signature {str(sig)[:30]} ({n} of {total_n}); skipped {n_skipped} so far', end='')
+            notify("\r\033[K", end="")
+            notify(
+                f"\r... loading signature {str(sig)[:30]} ({n} of {total_n}); skipped {n_skipped} so far",
+                end="",
+            )
             debug(filename, sig)
 
             # block off duplicates.
             if sig.md5sum() in md5_to_name:
-                debug('WARNING: in file {}, duplicate md5sum: {}; skipping', filename, sig.md5sum())
+                debug(
+                    "WARNING: in file {}, duplicate md5sum: {}; skipping",
+                    filename,
+                    sig.md5sum(),
+                )
                 record_duplicates.add(sig.name)
                 continue
 
@@ -240,13 +280,13 @@ def index(args):
                 ident = sig.filename
 
             orig_ident = ident
-            if args.split_identifiers: # hack for NCBI-style names, etc.
+            if args.split_identifiers:  # hack for NCBI-style names, etc.
                 # split on space...
-                ident = ident.split(' ')[0]
+                ident = ident.split(" ")[0]
 
                 if not args.keep_identifier_versions:
                     # ...and on period.
-                    ident = ident.split('.')[0]
+                    ident = ident.split(".")[0]
 
             lineage = assignments.get(ident)
 
@@ -257,7 +297,7 @@ def index(args):
                     if args.split_identifiers:
                         notify(f"(Identifier extracted from name: '{orig_ident})')")
                     sys.exit(-1)
-                debug('(skipping, because --require-taxonomy was specified)')
+                debug("(skipping, because --require-taxonomy was specified)")
                 n_skipped += 1
                 continue
 
@@ -265,8 +305,12 @@ def index(args):
             try:
                 db.insert(sig, ident=ident, lineage=lineage)
             except ValueError as e:
-                error("ERROR: cannot insert signature '{}' (md5 {}, loaded from '{}') into database.",
-                      sig, sig.md5sum()[:8], filename)
+                error(
+                    "ERROR: cannot insert signature '{}' (md5 {}, loaded from '{}') into database.",
+                    sig,
+                    sig.md5sum()[:8],
+                    filename,
+                )
                 error("ERROR: {}", str(e))
                 sys.exit(-1)
 
@@ -280,35 +324,43 @@ def index(args):
 
             # track lineage info - either no lineage, or this lineage used.
             else:
-                debug('WARNING: no lineage assignment for {}.', ident)
+                debug("WARNING: no lineage assignment for {}.", ident)
                 record_no_lineage.append(ident)
 
     # end main add signatures loop
 
     if n_skipped:
-        notify(f'... loaded {total_n} signatures; skipped {n_skipped} because of --require-taxonomy.')
+        notify(
+            f"... loaded {total_n} signatures; skipped {n_skipped} because of --require-taxonomy."
+        )
     else:
-        notify(f'... loaded {total_n} signatures.')
+        notify(f"... loaded {total_n} signatures.")
 
     # check -- did we find any signatures?
     if n == 0:
-        error('ERROR: no signatures found. ??')
+        error("ERROR: no signatures found. ??")
         sys.exit(1)
 
     # check -- did the signatures we found have any hashes?
     if not db.hashvals:
-        error('ERROR: no hash values found - are there any signatures?')
+        error("ERROR: no hash values found - are there any signatures?")
         sys.exit(1)
-    notify(f'loaded {len(db.hashvals)} hashes at ksize={args.ksize} scaled={args.scaled}')
+    notify(
+        f"loaded {len(db.hashvals)} hashes at ksize={args.ksize} scaled={args.scaled}"
+    )
 
     if picklist:
         sourmash_args.report_picklist(args, picklist)
 
     # summarize:
-    notify(f'{len(record_used_lineages)} assigned lineages out of {len(set(assignments.values()))} distinct lineages in spreadsheet.')
+    notify(
+        f"{len(record_used_lineages)} assigned lineages out of {len(set(assignments.values()))} distinct lineages in spreadsheet."
+    )
     unused_lineages = set(assignments.values()) - record_used_lineages
 
-    notify(f'{len(record_used_idents)} identifiers used out of {len(set(assignments))} distinct identifiers in spreadsheet.')
+    notify(
+        f"{len(record_used_idents)} identifiers used out of {len(set(assignments))} distinct identifiers in spreadsheet."
+    )
 
     assert record_used_idents.issubset(set(assignments))
     unused_identifiers = set(assignments) - record_used_idents
@@ -321,25 +373,34 @@ def index(args):
     # output a record of stuff if requested/available:
     if record_duplicates or record_no_lineage or record_remnants or unused_lineages:
         if record_duplicates:
-            notify(f'WARNING: {len(record_duplicates)} duplicate signatures.')
+            notify(f"WARNING: {len(record_duplicates)} duplicate signatures.")
         if record_no_lineage:
-            notify(f'WARNING: no lineage provided for {len(record_no_lineage)} signatures.')
+            notify(
+                f"WARNING: no lineage provided for {len(record_no_lineage)} signatures."
+            )
         if record_remnants:
-            notify(f'WARNING: no signatures for {len(record_remnants)} spreadsheet rows.')
+            notify(
+                f"WARNING: no signatures for {len(record_remnants)} spreadsheet rows."
+            )
         if unused_lineages:
-            notify(f'WARNING: {len(unused_lineages)} unused lineages.')
+            notify(f"WARNING: {len(unused_lineages)} unused lineages.")
 
         if unused_identifiers:
-            notify(f'WARNING: {len(unused_identifiers)} unused identifiers.')
+            notify(f"WARNING: {len(unused_identifiers)} unused identifiers.")
 
         if args.report:
             notify(f"generating a report and saving in '{args.report}'")
-            generate_report(record_duplicates, record_no_lineage,
-                            record_remnants, unused_lineages,
-                            unused_identifiers, args.report)
+            generate_report(
+                record_duplicates,
+                record_no_lineage,
+                record_remnants,
+                unused_lineages,
+                unused_identifiers,
+                args.report,
+            )
         else:
-            notify('(You can use --report to generate a detailed report.)')
+            notify("(You can use --report to generate a detailed report.)")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     sys.exit(index(sys.argv[1:]))
diff --git a/src/sourmash/lca/command_rankinfo.py b/src/sourmash/lca/command_rankinfo.py
index 8cd4c95a71..af0dbfa9d9 100644
--- a/src/sourmash/lca/command_rankinfo.py
+++ b/src/sourmash/lca/command_rankinfo.py
@@ -27,7 +27,6 @@ def make_lca_counts(dblist, min_num=0):
     # now convert to trees -> do LCA & counts
     counts = defaultdict(int)
     for hashval, lineages in assignments.items():
-
         # for each list of tuple_info [(rank, name), ...] build
         # a tree that lets us discover lowest-common-ancestor.
         debug(lineages)
@@ -46,7 +45,7 @@ def rankinfo_main(args):
     rankinfo!
     """
     if not args.db:
-        error('Error! must specify at least one LCA database with --db')
+        error("Error! must specify at least one LCA database with --db")
         sys.exit(-1)
 
     set_quiet(args.quiet, args.debug)
@@ -74,8 +73,8 @@ def rankinfo_main(args):
     else:
         for rank in lca_utils.taxlist():
             count = counts_by_rank.get(rank, 0)
-            print('{}: {} ({:.1f}%)'.format(rank, count, count / total * 100.))
+            print(f"{rank}: {count} ({count / total * 100.0:.1f}%)")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     sys.exit(rankinfo_main(sys.argv[1:]))
diff --git a/src/sourmash/lca/command_summarize.py b/src/sourmash/lca/command_summarize.py
index c571d7e141..02b57e60e1 100644
--- a/src/sourmash/lca/command_summarize.py
+++ b/src/sourmash/lca/command_summarize.py
@@ -13,7 +13,7 @@
 from sourmash.index import MultiIndex
 
 
-DEFAULT_THRESHOLD=5
+DEFAULT_THRESHOLD = 5
 
 
 def summarize(hashvals, dblist, threshold, ignore_abundance):
@@ -32,7 +32,7 @@ def summarize(hashvals, dblist, threshold, ignore_abundance):
     # now convert to trees -> do LCA & counts
     if not ignore_abundance:
         counts = lca_utils.count_lca_for_assignments(assignments, hashvals)
-    else: # flatten
+    else:  # flatten
         counts = lca_utils.count_lca_for_assignments(assignments, None)
     debug(counts.most_common())
 
@@ -69,9 +69,10 @@ def load_singletons_and_count(filenames, ksize, scaled, ignore_abundance):
         idx = idx.select(ksize=ksize)
 
         for query_sig, query_filename in idx.signatures_with_location():
-            notify(u'\r\033[K', end=u'')
-            notify(f'... loading {query_sig} (file {n} of {total_n})',
-                   total_n, end='\r')
+            notify("\r\033[K", end="")
+            notify(
+                f"... loading {query_sig} (file {n} of {total_n})", total_n, end="\r"
+            )
             total_count += 1
 
             if ignore_abundance and query_sig.minhash.track_abundance:
@@ -82,8 +83,8 @@ def load_singletons_and_count(filenames, ksize, scaled, ignore_abundance):
             count_signature(query_sig, scaled, hashvals)
             yield query_filename, query_sig, hashvals
 
-    notify(u'\r\033[K', end=u'')
-    notify(f'loaded {total_count} signatures from {n} files total.')
+    notify("\r\033[K", end="")
+    notify(f"loaded {total_count} signatures from {n} files total.")
 
 
 def count_signature(sig, scaled, hashvals):
@@ -104,32 +105,34 @@ def output_results(lineage_counts, total_counts, filename=None, sig=None):
     Output results in ~human-readable format.
     """
 
-    for (lineage, count) in lineage_counts.items():
+    for lineage, count in lineage_counts.items():
         if lineage:
             lineage = lca_utils.zip_lineage(lineage, truncate_empty=True)
-            lineage = ';'.join(lineage)
+            lineage = ";".join(lineage)
         else:
-            lineage = '(root)'
+            lineage = "(root)"
 
-        p = count / total_counts * 100.
-        p = '{:.1f}%'.format(p)
+        p = count / total_counts * 100.0
+        p = f"{p:.1f}%"
 
-        print_results('{:5} {:>5}   {}   {}:{} {}'.format(p, count, lineage, filename, sig.md5sum()[:8], sig))
+        print_results(
+            f"{p:5} {count:>5}   {lineage}   {filename}:{sig.md5sum()[:8]} {sig}"
+        )
 
-def output_csv(lineage_counts, total_counts, csv_fp, filename, sig,
-               write_header=True):
+
+def output_csv(lineage_counts, total_counts, csv_fp, filename, sig, write_header=True):
     """\
     Output results in CSV.
     """
 
     w = csv.writer(csv_fp)
     if write_header:
-        headers = ['count'] + list(lca_utils.taxlist())
-        headers += ['filename', 'sig_name', 'sig_md5', 'total_counts']
+        headers = ["count"] + list(lca_utils.taxlist())
+        headers += ["filename", "sig_name", "sig_md5", "total_counts"]
         w.writerow(headers)
 
-    for (lineage, count) in lineage_counts.items():
-        debug('lineage:', lineage)
+    for lineage, count in lineage_counts.items():
+        debug("lineage:", lineage)
         row = [count] + lca_utils.zip_lineage(lineage, truncate_empty=False)
         row += [filename, sig.name, sig.md5sum(), total_counts]
         w.writerow(row)
@@ -140,7 +143,7 @@ def summarize_main(args):
     main summarization function.
     """
     if not args.db:
-        error('Error! must specify at least one LCA database with --db')
+        error("Error! must specify at least one LCA database with --db")
         sys.exit(-1)
 
     set_quiet(args.quiet, args.debug)
@@ -160,10 +163,12 @@ def summarize_main(args):
     # load all the databases
     dblist, ksize, scaled = lca_utils.load_databases(args.db, args.scaled)
     if ignore_abundance:
-        notify("Ignoring any k-mer abundances in query, since --ignore-abundance given.")
+        notify(
+            "Ignoring any k-mer abundances in query, since --ignore-abundance given."
+        )
 
     # find all the queries
-    notify('finding query signatures...')
+    notify("finding query signatures...")
     inp_files = args.query
 
     if args.query_from_file:
@@ -171,7 +176,7 @@ def summarize_main(args):
         inp_files.extend(more_files)
 
     if not inp_files:
-        error('Error! must specify at least one query signature with --query')
+        error("Error! must specify at least one query signature with --query")
         sys.exit(-1)
 
     if not check_files_exist(*inp_files):
@@ -181,31 +186,37 @@ def summarize_main(args):
     csv_fp = None
     write_header = True
     if args.output:
-        csv_fp = open(args.output, 'w', newline='')
+        csv_fp = open(args.output, "w", newline="")
 
     try:
-        for filename, sig, hashvals in \
-          load_singletons_and_count(inp_files, ksize, scaled, ignore_abundance):
-
+        for filename, sig, hashvals in load_singletons_and_count(
+            inp_files, ksize, scaled, ignore_abundance
+        ):
             # get the full counted list of lineage counts in this signature
-            lineage_counts = summarize(hashvals, dblist, args.threshold,
-                                       ignore_abundance)
+            lineage_counts = summarize(
+                hashvals, dblist, args.threshold, ignore_abundance
+            )
             if not ignore_abundance:
                 total = float(sum(hashvals.values()))
             else:
                 total = float(len(hashvals))
 
-            output_results(lineage_counts, total,
-                           filename=filename, sig=sig)
+            output_results(lineage_counts, total, filename=filename, sig=sig)
 
             if csv_fp:
-                output_csv(lineage_counts, total, csv_fp, filename, sig,
-                           write_header=write_header)
+                output_csv(
+                    lineage_counts,
+                    total,
+                    csv_fp,
+                    filename,
+                    sig,
+                    write_header=write_header,
+                )
                 write_header = False
     finally:
         if csv_fp:
             csv_fp.close()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     sys.exit(summarize_main(sys.argv[1:]))
diff --git a/src/sourmash/lca/lca_db.py b/src/sourmash/lca/lca_db.py
index daabe3cb70..78855c71b8 100644
--- a/src/sourmash/lca/lca_db.py
+++ b/src/sourmash/lca/lca_db.py
@@ -14,6 +14,7 @@
 
 def cached_property(fun):
     """A memoize decorator for class properties."""
+
     @functools.wraps(fun)
     def get(self):
         try:
@@ -24,6 +25,7 @@ def get(self):
             pass
         ret = self._cache[fun] = fun(self)
         return ret
+
     return property(get)
 
 
@@ -56,13 +58,14 @@ class LCA_Database(Index):
     `_hashval_to_idx` is a dictionary from individual hash values to sets of
     `idx`.
     """
+
     is_database = True
 
     # we set manifest to None to avoid implication of fast on-disk access to
     # sketches. This may be revisited later.
     manifest = None
 
-    def __init__(self, ksize, scaled, moltype='DNA'):
+    def __init__(self, ksize, scaled, moltype="DNA"):
         self.ksize = int(ksize)
         self.scaled = int(scaled)
         self.filename = None
@@ -98,7 +101,7 @@ def _invalidate_cache(self):
 
         Internal method.
         """
-        if hasattr(self, '_cache'):
+        if hasattr(self, "_cache"):
             del self._cache
 
     def _get_ident_index(self, ident, fail_on_duplicate=False):
@@ -108,7 +111,7 @@ def _get_ident_index(self, ident, fail_on_duplicate=False):
         """
         idx = self._ident_to_idx.get(ident)
         if fail_on_duplicate:
-            assert idx is None     # should be no duplicate identities
+            assert idx is None  # should be no duplicate identities
 
         if idx is None:
             idx = self._next_index
@@ -153,10 +156,18 @@ def insert(self, sig, ident=None, lineage=None):
         minhash = sig.minhash
 
         if minhash.ksize != self.ksize:
-            raise ValueError("cannot insert signature with ksize {} into DB (ksize {})".format(minhash.ksize, self.ksize))
+            raise ValueError(
+                "cannot insert signature with ksize {} into DB (ksize {})".format(
+                    minhash.ksize, self.ksize
+                )
+            )
 
         if minhash.moltype != self.moltype:
-            raise ValueError("cannot insert signature with moltype {} into DB (moltype {})".format(minhash.moltype, self.moltype))
+            raise ValueError(
+                "cannot insert signature with moltype {} into DB (moltype {})".format(
+                    minhash.moltype, self.moltype
+                )
+            )
 
         # downsample to specified scaled; this has the side effect of
         # making sure they're all at the same scaled value!
@@ -169,7 +180,7 @@ def insert(self, sig, ident=None, lineage=None):
             ident = str(sig)
 
         if ident in self._ident_to_name:
-            raise ValueError("signature '{}' is already in this LCA db.".format(ident))
+            raise ValueError(f"signature '{ident}' is already in this LCA db.")
 
         # before adding, invalide any caching from @cached_property
         self._invalidate_cache()
@@ -189,7 +200,7 @@ def insert(self, sig, ident=None, lineage=None):
                 # map idx to lid as well.
                 self._idx_to_lid[idx] = lid
             except TypeError:
-                raise ValueError('lineage cannot be used as a key?!')
+                raise ValueError("lineage cannot be used as a key?!")
 
         for hashval in minhash.hashes:
             self._hashval_to_idx[hashval].add(idx)
@@ -197,7 +208,7 @@ def insert(self, sig, ident=None, lineage=None):
         return len(minhash)
 
     def __repr__(self):
-        return "LCA_Database('{}')".format(self.filename)
+        return f"LCA_Database('{self.filename}')"
 
     def signatures(self):
         """Return all of the signatures in this LCA database.
@@ -224,8 +235,16 @@ def _signatures_with_internal(self):
         for idx, ss in self._signatures.items():
             yield ss, idx
 
-    def select(self, ksize=None, moltype=None, num=0, scaled=0, abund=None,
-               containment=False, picklist=None):
+    def select(
+        self,
+        ksize=None,
+        moltype=None,
+        num=0,
+        scaled=0,
+        abund=None,
+        containment=False,
+        picklist=None,
+    ):
         """Select a subset of signatures to search.
 
         As with SBTs, queries with higher scaled values than the database
@@ -239,12 +258,18 @@ def select(self, ksize=None, moltype=None, num=0, scaled=0, abund=None,
             raise ValueError("cannot use 'num' MinHashes to search LCA database")
 
         if scaled > self.scaled and not containment:
-            raise ValueError(f"cannot use scaled={scaled} on this database (scaled={self.scaled})")
+            raise ValueError(
+                f"cannot use scaled={scaled} on this database (scaled={self.scaled})"
+            )
 
         if ksize is not None and self.ksize != ksize:
-            raise ValueError(f"ksize on this database is {self.ksize}; this is different from requested ksize of {ksize}")
+            raise ValueError(
+                f"ksize on this database is {self.ksize}; this is different from requested ksize of {ksize}"
+            )
         if moltype is not None and moltype != self.moltype:
-            raise ValueError(f"moltype on this database is {self.moltype}; this is different from requested moltype of {moltype}")
+            raise ValueError(
+                f"moltype on this database is {self.moltype}; this is different from requested moltype of {moltype}"
+            )
 
         if abund:
             raise ValueError("LCA databases do not support sketches with abund=True")
@@ -252,7 +277,9 @@ def select(self, ksize=None, moltype=None, num=0, scaled=0, abund=None,
         if picklist is not None:
             self.picklists.append(picklist)
             if len(self.picklists) > 1:
-                raise ValueError("we do not (yet) support multiple picklists for LCA databases")
+                raise ValueError(
+                    "we do not (yet) support multiple picklists for LCA databases"
+                )
 
         return self
 
@@ -266,24 +293,27 @@ def load(cls, db_name):
         from sourmash.tax.tax_utils import LineagePair
 
         if not os.path.isfile(db_name):
-            raise ValueError(f"'{db_name}' is not a file and cannot be loaded as an LCA database")
+            raise ValueError(
+                f"'{db_name}' is not a file and cannot be loaded as an LCA database"
+            )
 
         try:
             from sourmash.index.sqlite_index import LCA_SqliteDatabase
+
             return LCA_SqliteDatabase.load(db_name)
         except ValueError:
             pass
 
         xopen = open
-        if db_name.endswith('.gz'):
+        if db_name.endswith(".gz"):
             xopen = gzip.open
 
-        with xopen(db_name, 'rt') as fp:
+        with xopen(db_name, "rt") as fp:
             try:
                 first_ch = fp.read(1)
             except ValueError:
-                first_ch = 'X'
-            if not first_ch or first_ch[0] != '{':
+                first_ch = "X"
+            if not first_ch or first_ch[0] != "{":
                 raise ValueError(f"'{db_name}' is not an LCA database file.")
 
             fp.seek(0)
@@ -295,41 +325,45 @@ def load(cls, db_name):
                 pass
 
             if not load_d:
-                raise ValueError("cannot parse database file '{}' as JSON; invalid format.")
+                raise ValueError(
+                    "cannot parse database file '{}' as JSON; invalid format."
+                )
 
             version = None
             db_type = None
             try:
-                version = load_d.get('version')
-                db_type = load_d.get('type')
+                version = load_d.get("version")
+                db_type = load_d.get("type")
             except AttributeError:
                 pass
 
-            if db_type != 'sourmash_lca':
-                raise ValueError("database file '{}' is not an LCA db.".format(db_name))
+            if db_type != "sourmash_lca":
+                raise ValueError(f"database file '{db_name}' is not an LCA db.")
 
             version = float(version)
-            if version < 2.0 or 'lid_to_lineage' not in load_d:
-                raise ValueError("Error! This is an old-style LCA DB. You'll need to rebuild or download a newer one.")
-
-            ksize = int(load_d['ksize'])
-            scaled = int(load_d['scaled'])
-            moltype = load_d.get('moltype', 'DNA')
-            if moltype != 'DNA':
+            if version < 2.0 or "lid_to_lineage" not in load_d:
+                raise ValueError(
+                    "Error! This is an old-style LCA DB. You'll need to rebuild or download a newer one."
+                )
+
+            ksize = int(load_d["ksize"])
+            scaled = int(load_d["scaled"])
+            moltype = load_d.get("moltype", "DNA")
+            if moltype != "DNA":
                 assert ksize % 3 == 0
                 ksize = int(ksize / 3)
 
             db = cls(ksize, scaled, moltype)
 
             # convert lineage_dict to proper lineages (tuples of LineagePairs)
-            lid_to_lineage_2 = load_d['lid_to_lineage']
+            lid_to_lineage_2 = load_d["lid_to_lineage"]
             lid_to_lineage = {}
             lineage_to_lid = {}
             for k, v in lid_to_lineage_2.items():
-                v = dict( ((x[0], x[1]) for x in v) )
+                v = dict((x[0], x[1]) for x in v)
                 vv = []
                 for rank in taxlist():
-                    name = v.get(rank, '')
+                    name = v.get(rank, "")
                     vv.append(LineagePair(rank, name))
 
                 vv = tuple(vv)
@@ -340,18 +374,18 @@ def load(cls, db_name):
 
             # convert hashval -> lineage index keys to integers (looks like
             # JSON doesn't have a 64 bit type so stores them as strings)
-            hashval_to_idx_2 = load_d['hashval_to_idx']
+            hashval_to_idx_2 = load_d["hashval_to_idx"]
             hashval_to_idx = {}
 
             for k, v in hashval_to_idx_2.items():
                 hashval_to_idx[int(k)] = v
             db._hashval_to_idx = hashval_to_idx
 
-            db._ident_to_name = load_d['ident_to_name']
-            db._ident_to_idx = load_d['ident_to_idx']
+            db._ident_to_name = load_d["ident_to_name"]
+            db._ident_to_idx = load_d["ident_to_idx"]
 
             db._idx_to_lid = {}
-            for k, v in load_d['idx_to_lid'].items():
+            for k, v in load_d["idx_to_lid"].items():
                 db._idx_to_lid[int(k)] = v
 
         if db._ident_to_idx:
@@ -367,11 +401,11 @@ def load(cls, db_name):
 
         return db
 
-    def save(self, db_name, *, format='json'):
-        if format == 'sql':
+    def save(self, db_name, *, format="json"):
+        if format == "sql":
             self.save_to_sql(db_name)
         else:
-            assert format == 'json'
+            assert format == "json"
             self.save_to_json(db_name)
 
     def save_to_json(self, db_name):
@@ -380,42 +414,45 @@ def save_to_json(self, db_name):
         Method specific to this class.
         """
         if os.path.exists(db_name):
-            raise ValueError(f"LCA database {db_name} already exists; not overwriting or appending")
+            raise ValueError(
+                f"LCA database {db_name} already exists; not overwriting or appending"
+            )
 
         xopen = open
-        if db_name.endswith('.gz'):
+        if db_name.endswith(".gz"):
             xopen = gzip.open
 
-        with xopen(db_name, 'wt') as fp:
+        with xopen(db_name, "wt") as fp:
             # use an OrderedDict to preserve output order
             save_d = OrderedDict()
-            save_d['version'] = '2.1'
-            save_d['type'] = 'sourmash_lca'
-            save_d['license'] = 'CC0'
+            save_d["version"] = "2.1"
+            save_d["type"] = "sourmash_lca"
+            save_d["license"] = "CC0"
 
-            if self.moltype != 'DNA':
-                ksize = self.ksize*3
+            if self.moltype != "DNA":
+                ksize = self.ksize * 3
             else:
                 ksize = self.ksize
-            save_d['ksize'] = ksize
-            save_d['scaled'] = self.scaled
-            save_d['moltype'] = self.moltype
+            save_d["ksize"] = ksize
+            save_d["scaled"] = self.scaled
+            save_d["moltype"] = self.moltype
 
             # convert lineage internals from tuples to dictionaries
             d = OrderedDict()
             for k, v in self._lid_to_lineage.items():
-                d[k] = dict([ (vv.rank, vv.name) for vv in v ])
-            save_d['lid_to_lineage'] = d
+                d[k] = dict([(vv.rank, vv.name) for vv in v])
+            save_d["lid_to_lineage"] = d
 
             # convert values from sets to lists, so that JSON knows how to save
-            save_d['hashval_to_idx'] = \
-               dict((k, list(v)) for (k, v) in self._hashval_to_idx.items())
-
-            save_d['ident_to_name'] = self._ident_to_name
-            save_d['ident_to_idx'] = self._ident_to_idx
-            save_d['idx_to_lid'] = self._idx_to_lid
-            save_d['lid_to_lineage'] = self._lid_to_lineage
-            
+            save_d["hashval_to_idx"] = dict(
+                (k, list(v)) for (k, v) in self._hashval_to_idx.items()
+            )
+
+            save_d["ident_to_name"] = self._ident_to_name
+            save_d["ident_to_idx"] = self._ident_to_idx
+            save_d["idx_to_lid"] = self._idx_to_lid
+            save_d["lid_to_lineage"] = self._lid_to_lineage
+
             json.dump(save_d, fp)
 
     def save_to_sql(self, dbname):
@@ -424,11 +461,13 @@ def save_to_sql(self, dbname):
         from sourmash.tax.tax_utils import LineageDB
 
         if os.path.exists(dbname):
-            raise ValueError(f"LCA database {dbname} already exists; not overwriting or appending")
+            raise ValueError(
+                f"LCA database {dbname} already exists; not overwriting or appending"
+            )
 
         # create a new in-memory lineage db...
         assignments = {}
-        available_ranks = set() # track ranks, too
+        available_ranks = set()  # track ranks, too
         for ident, idx in self._ident_to_idx.items():
             lid = self._idx_to_lid.get(idx)
             if lid is not None:
@@ -454,7 +493,7 @@ def downsample_scaled(self, scaled):
         if scaled == self.scaled:
             return
         elif scaled < self.scaled:
-            raise ValueError("cannot decrease scaled from {} to {}".format(self.scaled, scaled))
+            raise ValueError(f"cannot decrease scaled from {self.scaled} to {scaled}")
 
         self._invalidate_cache()
 
@@ -513,22 +552,28 @@ def _signatures(self):
         is_protein = False
         is_hp = False
         is_dayhoff = False
-        if self.moltype == 'protein':
+        if self.moltype == "protein":
             is_protein = True
-        elif self.moltype == 'hp':
+        elif self.moltype == "hp":
             is_hp = True
-        elif self.moltype == 'dayhoff':
+        elif self.moltype == "dayhoff":
             is_dayhoff = True
 
-        minhash = MinHash(n=0, ksize=self.ksize, scaled=self.scaled,
-                          is_protein=is_protein, hp=is_hp, dayhoff=is_dayhoff)
+        minhash = MinHash(
+            n=0,
+            ksize=self.ksize,
+            scaled=self.scaled,
+            is_protein=is_protein,
+            hp=is_hp,
+            dayhoff=is_dayhoff,
+        )
 
-        debug('creating signatures for LCA DB...')
+        debug("creating signatures for LCA DB...")
         mhd = defaultdict(minhash.copy_and_clear)
         temp_vals = defaultdict(list)
 
         # invert the hashval_to_idx dictionary
-        for (hashval, idlist) in self._hashval_to_idx.items():
+        for hashval, idlist in self._hashval_to_idx.items():
             for idx in idlist:
                 temp_hashes = temp_vals[idx]
                 temp_hashes.append(hashval)
@@ -559,7 +604,7 @@ def _signatures(self):
             if passes_all_picklists(ss, self.picklists):
                 sigd[idx] = ss
 
-        debug('=> {} signatures!', len(sigd))
+        debug("=> {} signatures!", len(sigd))
         return sigd
 
     def find(self, search_fn, query, **kwargs):
@@ -582,9 +627,13 @@ def find(self, search_fn, query, **kwargs):
         if self.scaled > query_scaled:
             query_mh = query_mh.downsample(scaled=self.scaled)
             query_scaled = query_mh.scaled
-            prepare_subject = lambda x: x # identity
+
+            def prepare_subject(x):
+                return x  # identity
         else:
-            prepare_subject = lambda subj: subj.downsample(scaled=query_scaled)
+
+            def prepare_subject(subj):
+                return subj.downsample(scaled=query_scaled)
 
         # collect matching hashes for the query:
         c = Counter()
@@ -594,7 +643,7 @@ def find(self, search_fn, query, **kwargs):
             for idx in idx_list:
                 c[idx] += 1
 
-        debug('number of matching signatures for hashes: {}', len(c))
+        debug("number of matching signatures for hashes: {}", len(c))
 
         # for each match, in order of largest overlap,
         for idx, count in c.most_common():
@@ -604,7 +653,7 @@ def find(self, search_fn, query, **kwargs):
             # this piecemeal by iterating across all the hashes, instead.
 
             subj = self._signatures.get(idx)
-            if subj is None:    # must be because of a picklist exclusion
+            if subj is None:  # must be because of a picklist exclusion
                 assert self.picklists
                 continue
 
@@ -616,8 +665,7 @@ def find(self, search_fn, query, **kwargs):
             shared_size = query_mh.count_common(subj_mh)
             total_size = len(query_mh + subj_mh)
 
-            score = search_fn.score_fn(query_size, shared_size, subj_size,
-                                       total_size)
+            score = search_fn.score_fn(query_size, shared_size, subj_size, total_size)
 
             # CTB note to self: even with JaccardSearchBestOnly, this will
             # still iterate over & score all signatures. We should come
@@ -671,14 +719,14 @@ def load_databases(filenames, scaled=None, verbose=True):
     # load all the databases
     for db_name in filenames:
         if verbose:
-            notify(u'\r\033[K', end=u'')
-            notify(f'... loading database {format(db_name)}', end='\r')
+            notify("\r\033[K", end="")
+            notify(f"... loading database {format(db_name)}", end="\r")
 
         lca_db = LCA_Database.load(db_name)
 
         ksize_vals.add(lca_db.ksize)
         if len(ksize_vals) > 1:
-            raise Exception('multiple ksizes, quitting')
+            raise Exception("multiple ksizes, quitting")
 
         if scaled and scaled > lca_db.scaled:
             lca_db.downsample_scaled(scaled)
@@ -686,7 +734,7 @@ def load_databases(filenames, scaled=None, verbose=True):
 
         moltype_vals.add(lca_db.moltype)
         if len(moltype_vals) > 1:
-            raise Exception('multiple moltypes, quitting')
+            raise Exception("multiple moltypes, quitting")
 
         dblist.append(lca_db)
 
@@ -695,7 +743,9 @@ def load_databases(filenames, scaled=None, verbose=True):
     moltype = moltype_vals.pop()
 
     if verbose:
-        notify(u'\r\033[K', end=u'')
-        notify(f'loaded {len(dblist)} LCA databases. ksize={ksize}, scaled={scaled} moltype={moltype}')
+        notify("\r\033[K", end="")
+        notify(
+            f"loaded {len(dblist)} LCA databases. ksize={ksize}, scaled={scaled} moltype={moltype}"
+        )
 
     return dblist, ksize, scaled
diff --git a/src/sourmash/lca/lca_utils.py b/src/sourmash/lca/lca_utils.py
index 8ee9340ed7..70b883bb7d 100644
--- a/src/sourmash/lca/lca_utils.py
+++ b/src/sourmash/lca/lca_utils.py
@@ -7,12 +7,23 @@
 from .lca_db import LCA_Database, load_single_database, load_databases
 
 
-__all__ = ['taxlist', 'zip_lineage', 'build_tree', 'find_lca',
-           'load_single_database', 'load_databases', 'gather_assignments',
-           'count_lca_for_assignments', 'LineagePair', 'display_lineage',
-           'make_lineage', 'pop_to_rank', 'is_lineage_match']
-
-try:                                      # py2/py3 compat
+__all__ = [
+    "taxlist",
+    "zip_lineage",
+    "build_tree",
+    "find_lca",
+    "load_single_database",
+    "load_databases",
+    "gather_assignments",
+    "count_lca_for_assignments",
+    "LineagePair",
+    "display_lineage",
+    "make_lineage",
+    "pop_to_rank",
+    "is_lineage_match",
+]
+
+try:  # py2/py3 compat
     from itertools import zip_longest
 except ImportError:
     from itertools import izip_longest as zip_longest
@@ -20,7 +31,7 @@
 from sourmash.logging import notify, error, debug
 
 # type to store an element in a taxonomic lineage
-LineagePair = namedtuple('LineagePair', ['rank', 'name'])
+LineagePair = namedtuple("LineagePair", ["rank", "name"])
 
 
 def check_files_exist(*files):
@@ -32,8 +43,12 @@ def check_files_exist(*files):
             ret = False
 
     if len(not_found):
-        error('Error! Could not find the following files.'
-              ' Make sure the file paths are specified correctly.\n{}'.format('\n'.join(not_found)))
+        error(
+            "Error! Could not find the following files."
+            " Make sure the file paths are specified correctly.\n{}".format(
+                "\n".join(not_found)
+            )
+        )
 
     return ret
 
@@ -43,11 +58,17 @@ def taxlist(include_strain=True):
     """
     Provide an ordered list of taxonomic ranks.
     """
-    for k in ['superkingdom', 'phylum', 'class', 'order', 'family', 'genus',
-              'species']:
-        yield k
+    yield from [
+        "superkingdom",
+        "phylum",
+        "class",
+        "order",
+        "family",
+        "genus",
+        "species",
+    ]
     if include_strain:
-        yield 'strain'
+        yield "strain"
 
 
 # produce an ordered list of tax names from lineage
@@ -67,10 +88,11 @@ def zip_lineage(lineage, include_strain=True, truncate_empty=False):
     ['a', '', 'c', '', '', '', '', '']
     """
 
-    empty = LineagePair(None, '')
+    empty = LineagePair(None, "")
 
-    pairs = zip_longest(taxlist(include_strain=include_strain),
-                        lineage, fillvalue=empty)
+    pairs = zip_longest(
+        taxlist(include_strain=include_strain), lineage, fillvalue=empty
+    )
     pairs = list(pairs)
 
     # eliminate empty if so requested
@@ -85,22 +107,30 @@ def zip_lineage(lineage, include_strain=True, truncate_empty=False):
     for taxrank, lineage_tup in pairs:
         # validate non-empty tax, e.g. superkingdom/phylum/class in order.
         if lineage_tup != empty and lineage_tup.rank != taxrank:
-            raise ValueError('incomplete lineage at {} - is {} instead'.format(taxrank, lineage_tup.rank))
+            raise ValueError(
+                f"incomplete lineage at {taxrank} - is {lineage_tup.rank} instead"
+            )
 
         row.append(lineage_tup.name)
     return row
 
 
 def display_lineage(lineage, include_strain=True, truncate_empty=True):
-    return ";".join(zip_lineage(lineage,
-                                include_strain=include_strain,
-                                truncate_empty=truncate_empty))
+    return ";".join(
+        zip_lineage(
+            lineage, include_strain=include_strain, truncate_empty=truncate_empty
+        )
+    )
 
 
 # filter function toreplace blank/na/null with 'unassigned'
-filter_null = lambda x: 'unassigned' if x is None or x.strip() in \
-  ('[Blank]', 'na', 'null', '') else x
-null_names = set(['[Blank]', 'na', 'null'])
+def filter_null(x):
+    return (
+        "unassigned" if x is None or x.strip() in ("[Blank]", "na", "null", "") else x
+    )
+
+
+null_names = set(["[Blank]", "na", "null"])
 
 
 def build_tree(assignments, initial=None):
@@ -142,13 +172,13 @@ def find_lca(tree):
     node = tree
     lineage = []
     while 1:
-        if len(node) == 1:                # descend to only child; track path
+        if len(node) == 1:  # descend to only child; track path
             lineage_tup = next(iter(node.keys()))
             lineage.append(lineage_tup)
             node = node[lineage_tup]
-        elif len(node) == 0:              # at leaf; end
+        elif len(node) == 0:  # at leaf; end
             return tuple(lineage), 0
-        else:                             # len(node) > 1 => confusion!!
+        else:  # len(node) > 1 => confusion!!
             return tuple(lineage), len(node)
 
 
@@ -231,14 +261,14 @@ def pop_to_rank(lin, rank):
     return tuple(lin)
 
 
-
 def make_lineage(lineage):
     "Turn a ; or ,-separated set of lineages into a tuple of LineagePair objs."
     from sourmash.tax.tax_utils import LineagePair
-    lin = lineage.split(';')
+
+    lin = lineage.split(";")
     if len(lin) == 1:
-        lin = lineage.split(',')
-    lin = [ LineagePair(rank, n) for (rank, n) in zip(taxlist(), lin) ]
+        lin = lineage.split(",")
+    lin = [LineagePair(rank, n) for (rank, n) in zip(taxlist(), lin)]
     lin = tuple(lin)
 
     return lin
diff --git a/src/sourmash/logging.py b/src/sourmash/logging.py
index 2915c43f78..ad885a7aee 100644
--- a/src/sourmash/logging.py
+++ b/src/sourmash/logging.py
@@ -3,6 +3,8 @@
 
 _quiet = False
 _debug = False
+
+
 def set_quiet(val, print_debug=False):
     global _quiet, _debug
     _quiet = bool(val)
@@ -22,10 +24,9 @@ def notify(s, *args, **kwargs):
     if _quiet:
         return
 
-    print(u'\r\033[K', end=u'', file=sys.stderr)
-    print(s.format(*args, **kwargs), file=sys.stderr,
-          end=kwargs.get('end', u'\n'))
-    if kwargs.get('flush'):
+    print("\r\033[K", end="", file=sys.stderr)
+    print(s.format(*args, **kwargs), file=sys.stderr, end=kwargs.get("end", "\n"))
+    if kwargs.get("flush"):
         sys.stderr.flush()
 
 
@@ -34,10 +35,9 @@ def debug(s, *args, **kwargs):
     if _quiet or not _debug:
         return
 
-    print(u'\r\033[K', end=u'', file=sys.stderr)
-    print(s.format(*args, **kwargs), file=sys.stderr,
-          end=kwargs.get('end', u'\n'))
-    if kwargs.get('flush'):
+    print("\r\033[K", end="", file=sys.stderr)
+    print(s.format(*args, **kwargs), file=sys.stderr, end=kwargs.get("end", "\n"))
+    if kwargs.get("flush"):
         sys.stderr.flush()
 
 
@@ -46,17 +46,17 @@ def debug_literal(s, *args, **kwargs):
     if _quiet or not _debug:
         return
 
-    print(u'\r\033[K', end=u'', file=sys.stderr)
-    print(s, file=sys.stderr, end=kwargs.get('end', u'\n'))
-    if kwargs.get('flush'):
+    print("\r\033[K", end="", file=sys.stderr)
+    print(s, file=sys.stderr, end=kwargs.get("end", "\n"))
+    if kwargs.get("flush"):
         sys.stderr.flush()
 
 
 def error(s, *args, **kwargs):
     "A simple error logging function => stderr."
-    print(u'\r\033[K', end=u'', file=sys.stderr)
+    print("\r\033[K", end="", file=sys.stderr)
     print(s.format(*args, **kwargs), file=sys.stderr)
-    if kwargs.get('flush'):
+    if kwargs.get("flush"):
         sys.stderr.flush()
 
 
@@ -67,13 +67,13 @@ def test_notify():
     saveerr, sys.stderr = sys.stderr, StringIO()
     try:
         _quiet = False
-        notify(u'hello, world')
+        notify("hello, world")
     finally:
         _quiet = qsave
         saveerr, sys.stderr = sys.stderr, saveerr
 
     print(type(saveerr))
-    assert 'hello, world\n' in saveerr.getvalue()
+    assert "hello, world\n" in saveerr.getvalue()
 
 
 def test_notify_flush():
@@ -83,13 +83,13 @@ def test_notify_flush():
     saveerr, sys.stderr = sys.stderr, StringIO()
     try:
         _quiet = False
-        notify(u'hello, world', flush=True)
+        notify("hello, world", flush=True)
     finally:
         _quiet = qsave
         saveerr, sys.stderr = sys.stderr, saveerr
 
     print(type(saveerr))
-    assert 'hello, world' in saveerr.getvalue()
+    assert "hello, world" in saveerr.getvalue()
 
 
 def test_notify_end():
@@ -99,13 +99,13 @@ def test_notify_end():
     saveerr, sys.stderr = sys.stderr, StringIO()
     try:
         _quiet = False
-        notify(u'hello, world', end=u'FOO')
+        notify("hello, world", end="FOO")
     finally:
         _quiet = qsave
         saveerr, sys.stderr = sys.stderr, saveerr
 
     print(type(saveerr))
-    assert 'hello, worldFOO' in saveerr.getvalue()
+    assert "hello, worldFOO" in saveerr.getvalue()
 
 
 def test_notify_quiet():
@@ -115,13 +115,13 @@ def test_notify_quiet():
     saveerr, sys.stderr = sys.stderr, StringIO()
     try:
         _quiet = True
-        notify(u'hello, world')
+        notify("hello, world")
     finally:
         _quiet = qsave
         saveerr, sys.stderr = sys.stderr, saveerr
 
     print(type(saveerr))
-    assert 'hello, world' not in saveerr.getvalue()
+    assert "hello, world" not in saveerr.getvalue()
 
 
 def test_error():
@@ -131,13 +131,13 @@ def test_error():
     saveerr, sys.stderr = sys.stderr, StringIO()
     try:
         _quiet = False
-        error(u'hello, world')
+        error("hello, world")
     finally:
         _quiet = qsave
         saveerr, sys.stderr = sys.stderr, saveerr
 
     print(type(saveerr))
-    assert 'hello, world\n' in saveerr.getvalue()
+    assert "hello, world\n" in saveerr.getvalue()
 
 
 def test_error_flush():
@@ -147,13 +147,13 @@ def test_error_flush():
     saveerr, sys.stderr = sys.stderr, StringIO()
     try:
         _quiet = False
-        error(u'hello, world', flush=True)
+        error("hello, world", flush=True)
     finally:
         _quiet = qsave
         saveerr, sys.stderr = sys.stderr, saveerr
 
     print(type(saveerr))
-    assert 'hello, world' in saveerr.getvalue()
+    assert "hello, world" in saveerr.getvalue()
 
 
 def test_error_quiet():
@@ -164,10 +164,10 @@ def test_error_quiet():
     saveerr, sys.stderr = sys.stderr, StringIO()
     try:
         _quiet = True
-        error(u'hello, world')
+        error("hello, world")
     finally:
         _quiet = qsave
         saveerr, sys.stderr = sys.stderr, saveerr
 
     print(type(saveerr))
-    assert 'hello, world' in saveerr.getvalue()
+    assert "hello, world" in saveerr.getvalue()
diff --git a/src/sourmash/manifest.py b/src/sourmash/manifest.py
index 466bfa8e7a..2f00f5c382 100644
--- a/src/sourmash/manifest.py
+++ b/src/sourmash/manifest.py
@@ -23,11 +23,21 @@ class BaseCollectionManifest:
     * 'locations()' returns all distinct locations for e.g. lazy loading
     * supports container protocol for signatures, e.g. 'if ss in manifest: ...'
     """
+
     # each manifest row must have the following, although they may be empty.
-    required_keys = ('internal_location',
-                     'md5', 'md5short', 'ksize', 'moltype', 'num',
-                     'scaled', 'n_hashes', 'with_abundance',
-                     'name', 'filename')
+    required_keys = (
+        "internal_location",
+        "md5",
+        "md5short",
+        "ksize",
+        "moltype",
+        "num",
+        "scaled",
+        "n_hashes",
+        "with_abundance",
+        "name",
+        "filename",
+    )
 
     @classmethod
     @abstractmethod
@@ -42,12 +52,12 @@ def load_from_filename(cls, filename):
             return db
 
         # not a SQLite db? CTB: fix this to actually try loading this as .gz...
-        if filename.endswith('.gz'):
+        if filename.endswith(".gz"):
             xopen = gzip.open
         else:
             xopen = open
 
-        with xopen(filename, 'rt', newline="") as fp:
+        with xopen(filename, "rt", newline="") as fp:
             return cls.load_from_csv(fp)
 
     @classmethod
@@ -55,10 +65,10 @@ def load_from_csv(cls, fp):
         "load a manifest from a CSV file."
         manifest_list = []
         firstline = fp.readline().rstrip()
-        if not firstline.startswith('# SOURMASH-MANIFEST-VERSION: '):
+        if not firstline.startswith("# SOURMASH-MANIFEST-VERSION: "):
             raise ValueError("manifest is missing version header")
 
-        version = firstline[len('# SOURMASH-MANIFEST-VERSION: '):]
+        version = firstline[len("# SOURMASH-MANIFEST-VERSION: ") :]
         if float(version) != 1.0:
             raise ValueError(f"unknown manifest version number {version}")
 
@@ -73,15 +83,15 @@ def load_from_csv(cls, fp):
         row = None
 
         # do row type conversion
-        introws = ('num', 'scaled', 'ksize', 'n_hashes')
-        boolrows = ('with_abundance',)
+        introws = ("num", "scaled", "ksize", "n_hashes")
+        boolrows = ("with_abundance",)
 
         for row in r:
             for k in introws:
                 row[k] = int(row[k])
             for k in boolrows:
                 row[k] = bool(ast.literal_eval(str(row[k])))
-            row['signature'] = None
+            row["signature"] = None
             manifest_list.append(row)
 
         return CollectionManifest(manifest_list)
@@ -89,69 +99,71 @@ def load_from_csv(cls, fp):
     @classmethod
     def load_from_sql(cls, filename):
         from sourmash.index.sqlite_index import load_sqlite_index
+
         db = load_sqlite_index(filename, request_manifest=True)
         if db is not None:
             return db.manifest
 
         return None
 
-    def write_to_filename(self, filename, *, database_format='csv',
-                          ok_if_exists=False):
-        if database_format == 'csv':
+    def write_to_filename(self, filename, *, database_format="csv", ok_if_exists=False):
+        if database_format == "csv":
             from .sourmash_args import FileOutputCSV
+
             if ok_if_exists or not os.path.exists(filename):
                 with FileOutputCSV(filename) as fp:
                     return self.write_to_csv(fp, write_header=True)
             elif os.path.exists(filename) and not ok_if_exists:
                 raise Exception("output manifest already exists")
 
-        elif database_format == 'sql':
+        elif database_format == "sql":
             from sourmash.index.sqlite_index import SqliteCollectionManifest
-            SqliteCollectionManifest.load_from_manifest(self, dbfile=filename,
-                                                        append=ok_if_exists)
+
+            SqliteCollectionManifest.load_from_manifest(
+                self, dbfile=filename, append=ok_if_exists
+            )
 
     @classmethod
     def write_csv_header(cls, fp):
         "write header for manifest CSV format"
-        fp.write('# SOURMASH-MANIFEST-VERSION: 1.0\n')
+        fp.write("# SOURMASH-MANIFEST-VERSION: 1.0\n")
         w = csv.DictWriter(fp, fieldnames=cls.required_keys)
         w.writeheader()
 
     def write_to_csv(self, fp, write_header=False):
         "write manifest CSV to specified file handle"
-        w = csv.DictWriter(fp, fieldnames=self.required_keys,
-                           extrasaction='ignore')
+        w = csv.DictWriter(fp, fieldnames=self.required_keys, extrasaction="ignore")
 
         if write_header:
             self.write_csv_header(fp)
 
         for row in self.rows:
             # don't write signature!
-            if 'signature' in row:
-                del row['signature']
+            if "signature" in row:
+                del row["signature"]
             w.writerow(row)
 
     @classmethod
     def make_manifest_row(cls, ss, location, *, include_signature=True):
         "make a manifest row dictionary."
         row = {}
-        row['md5'] = ss.md5sum()
-        row['md5short'] = row['md5'][:8]
-        row['ksize'] = ss.minhash.ksize
-        row['moltype'] = ss.minhash.moltype
-        row['num'] = ss.minhash.num
-        row['scaled'] = ss.minhash.scaled
-        row['n_hashes'] = len(ss.minhash)
-        row['with_abundance'] = 1 if ss.minhash.track_abundance else 0
-        row['name'] = ss.name
-        row['filename'] = ss.filename
-        row['internal_location'] = location
+        row["md5"] = ss.md5sum()
+        row["md5short"] = row["md5"][:8]
+        row["ksize"] = ss.minhash.ksize
+        row["moltype"] = ss.minhash.moltype
+        row["num"] = ss.minhash.num
+        row["scaled"] = ss.minhash.scaled
+        row["n_hashes"] = len(ss.minhash)
+        row["with_abundance"] = 1 if ss.minhash.track_abundance else 0
+        row["name"] = ss.name
+        row["filename"] = ss.filename
+        row["internal_location"] = location
 
         assert set(row.keys()) == set(cls.required_keys)
 
         # if requested, include the signature in the manifest.
         if include_signature:
-            row['signature'] = ss
+            row["signature"] = ss
         return row
 
     @classmethod
@@ -164,8 +176,9 @@ def create_manifest(cls, locations_iter, *, include_signature=True):
         """
         manifest_list = []
         for ss, location in locations_iter:
-            row = cls.make_manifest_row(ss, location,
-                                        include_signature=include_signature)
+            row = cls.make_manifest_row(
+                ss, location, include_signature=include_signature
+            )
             manifest_list.append(row)
 
         return cls(manifest_list)
@@ -216,6 +229,7 @@ class CollectionManifest(BaseCollectionManifest):
     """
     An in-memory manifest that simply stores the rows in a list.
     """
+
     def __init__(self, rows=[]):
         "Initialize from an iterable of metadata dictionaries."
         self.rows = []
@@ -237,7 +251,7 @@ def _add_rows(self, rows):
         # only iterate once, in case it's a generator
         for row in rows:
             self.rows.append(row)
-            md5set.add(row['md5'])
+            md5set.add(row["md5"])
 
     def __iadd__(self, other):
         if self is other:
@@ -258,7 +272,7 @@ def __len__(self):
 
     def __eq__(self, other):
         "Check equality on a row-by-row basis. May fail on out-of-order rows."
-        for (a, b) in itertools.zip_longest(self.rows, other.rows):
+        for a, b in itertools.zip_longest(self.rows, other.rows):
             if a is None or b is None:
                 return False
 
@@ -269,41 +283,49 @@ def __eq__(self, other):
 
         return True
 
-    def _select(self, *, ksize=None, moltype=None, scaled=0, num=0,
-                containment=False, abund=None, picklist=None):
+    def _select(
+        self,
+        *,
+        ksize=None,
+        moltype=None,
+        scaled=0,
+        num=0,
+        containment=False,
+        abund=None,
+        picklist=None,
+    ):
         """Yield manifest rows for sigs that match the specified requirements.
 
         Internal method; call `select_to_manifest` instead.
         """
         matching_rows = self.rows
         if ksize:
-            matching_rows = ( row for row in matching_rows
-                              if row['ksize'] == ksize )
+            matching_rows = (row for row in matching_rows if row["ksize"] == ksize)
         if moltype:
-            matching_rows = ( row for row in matching_rows
-                              if row['moltype'] == moltype )
+            matching_rows = (row for row in matching_rows if row["moltype"] == moltype)
         if scaled or containment:
             if containment and not scaled:
                 raise ValueError("'containment' requires 'scaled' in Index.select'")
 
-            matching_rows = ( row for row in matching_rows
-                              if row['scaled'] and not row['num'] )
+            matching_rows = (
+                row for row in matching_rows if row["scaled"] and not row["num"]
+            )
         if num:
-            matching_rows = ( row for row in matching_rows
-                              if row['num'] and not row['scaled'] )
+            matching_rows = (
+                row for row in matching_rows if row["num"] and not row["scaled"]
+            )
 
         if abund:
             # only need to concern ourselves if abundance is _required_
-            matching_rows = ( row for row in matching_rows
-                              if row['with_abundance'] )
+            matching_rows = (row for row in matching_rows if row["with_abundance"])
 
         if picklist:
-            matching_rows = ( row for row in matching_rows
-                              if picklist.matches_manifest_row(row) )
+            matching_rows = (
+                row for row in matching_rows if picklist.matches_manifest_row(row)
+            )
 
         # return only the internal filenames!
-        for row in matching_rows:
-            yield row
+        yield from matching_rows
 
     def select_to_manifest(self, **kwargs):
         "Do a 'select' and return a new CollectionManifest object."
@@ -312,22 +334,24 @@ def select_to_manifest(self, **kwargs):
 
     def filter_rows(self, row_filter_fn):
         "Create a new manifest filtered through row_filter_fn."
-        new_rows = [ row for row in self.rows if row_filter_fn(row) ]
+        new_rows = [row for row in self.rows if row_filter_fn(row)]
 
         return CollectionManifest(new_rows)
 
     def filter_on_columns(self, col_filter_fn, col_names):
         "Create a new manifest based on column matches."
+
         def row_filter_fn(row):
-            x = [ row[col] for col in col_names if row[col] is not None ]
+            x = [row[col] for col in col_names if row[col] is not None]
             return col_filter_fn(x)
+
         return self.filter_rows(row_filter_fn)
 
     def locations(self):
         "Return all distinct locations."
         seen = set()
         for row in self.rows:
-            loc = row['internal_location']
+            loc = row["internal_location"]
 
             # track/remove duplicates
             if loc not in seen:
@@ -341,8 +365,8 @@ def __contains__(self, ss):
 
     def to_picklist(self):
         "Convert this manifest to a picklist."
-        pl = picklist.SignaturePicklist('manifest')
+        pl = picklist.SignaturePicklist("manifest")
 
-        pl.pickset = { pl._get_value_for_manifest_row(row) for row in self.rows }
+        pl.pickset = {pl._get_value_for_manifest_row(row) for row in self.rows}
 
         return pl
diff --git a/src/sourmash/minhash.py b/src/sourmash/minhash.py
index 360ca6165b..ffa879b64d 100644
--- a/src/sourmash/minhash.py
+++ b/src/sourmash/minhash.py
@@ -1,22 +1,26 @@
-# -*- coding: utf-8 -*-
 """
 sourmash submodule that provides MinHash class and utility functions.
 
 class MinHash - core MinHash class.
 class FrozenMinHash - read-only MinHash class.
 """
-from __future__ import unicode_literals, division
-from .distance_utils import jaccard_to_distance, containment_to_distance, set_size_exact_prob
+from .distance_utils import (
+    jaccard_to_distance,
+    containment_to_distance,
+    set_size_exact_prob,
+)
 from .logging import notify
 
 import numpy as np
 
 
-__all__ = ['get_minhash_default_seed',
-           'get_minhash_max_hash',
-           'hash_murmur',
-           'MinHash',
-           'FrozenMinHash']
+__all__ = [
+    "get_minhash_default_seed",
+    "get_minhash_max_hash",
+    "hash_murmur",
+    "MinHash",
+    "FrozenMinHash",
+]
 
 from collections.abc import Mapping
 
@@ -52,20 +56,14 @@ def _get_max_hash_for_scaled(scaled):
     elif scaled == 1:
         return get_minhash_max_hash()
 
-    return min(
-        int(round(get_minhash_max_hash() / scaled, 0)),
-        MINHASH_MAX_HASH
-    )
+    return min(int(round(get_minhash_max_hash() / scaled, 0)), MINHASH_MAX_HASH)
 
 
 def _get_scaled_for_max_hash(max_hash):
     "Convert a 'max_hash' value into a 'scaled' value."
     if max_hash == 0:
         return 0
-    return min(
-        int(round(get_minhash_max_hash() / max_hash, 0)),
-        MINHASH_MAX_HASH
-    )
+    return min(int(round(get_minhash_max_hash() / max_hash, 0)), MINHASH_MAX_HASH)
 
 
 def to_bytes(s):
@@ -75,7 +73,7 @@ def to_bytes(s):
     if isinstance(s, bytes):
         return s
 
-    if not isinstance(s, (str, bytes, int)):
+    if not isinstance(s, str | bytes | int):
         raise TypeError("Requires a string-like sequence")
 
     if isinstance(s, str):
@@ -97,8 +95,7 @@ def hash_murmur(kmer, seed=MINHASH_DEFAULT_SEED):
 def translate_codon(codon):
     "Translate a codon into an amino acid."
     try:
-        return rustcall(lib.sourmash_translate_codon,
-                        to_bytes(codon)).decode('utf-8')
+        return rustcall(lib.sourmash_translate_codon, to_bytes(codon)).decode("utf-8")
     except SourmashError as e:
         raise ValueError(e.message)
 
@@ -106,7 +103,7 @@ def translate_codon(codon):
 def flatten_and_downsample_scaled(mh, *scaled_vals):
     "Flatten MinHash object and downsample to max of scaled values."
     assert mh.scaled
-    assert all( (x > 0 for x in scaled_vals) )
+    assert all(x > 0 for x in scaled_vals)
 
     mh = mh.flatten()
     scaled = max(scaled_vals)
@@ -118,7 +115,7 @@ def flatten_and_downsample_scaled(mh, *scaled_vals):
 def flatten_and_downsample_num(mh, *num_vals):
     "Flatten MinHash object and downsample to min of num values."
     assert mh.num
-    assert all( (x > 0 for x in num_vals) )
+    assert all(x > 0 for x in num_vals)
 
     mh = mh.flatten()
     num = min(num_vals)
@@ -138,6 +135,7 @@ def flatten_and_intersect_scaled(mh1, mh2):
 
 class _HashesWrapper(Mapping):
     "A read-only view of the hashes contained by a MinHash object."
+
     def __init__(self, h):
         self._data = h
 
@@ -186,6 +184,7 @@ class MinHash(RustObject):
     >>> round(mh1.similarity(mh2), 2)
     0.85
     """
+
     __dealloc_func__ = lib.kmerminhash_free
 
     def __init__(
@@ -236,13 +235,13 @@ def __init__(
 
         if dayhoff:
             hash_function = lib.HASH_FUNCTIONS_MURMUR64_DAYHOFF
-            ksize = ksize*3
+            ksize = ksize * 3
         elif hp:
             hash_function = lib.HASH_FUNCTIONS_MURMUR64_HP
-            ksize = ksize*3
+            ksize = ksize * 3
         elif is_protein:
             hash_function = lib.HASH_FUNCTIONS_MURMUR64_PROTEIN
-            ksize = ksize*3
+            ksize = ksize * 3
         else:
             hash_function = lib.HASH_FUNCTIONS_MURMUR64_DNA
 
@@ -281,7 +280,7 @@ def __getstate__(self):
         # get a ksize that makes sense to the Rust layer. See #2262.
         return (
             self.num,
-            self.ksize if self.is_dna else self.ksize*3,
+            self.ksize if self.is_dna else self.ksize * 3,
             self.is_protein,
             self.dayhoff,
             self.hp,
@@ -294,16 +293,29 @@ def __getstate__(self):
 
     def __setstate__(self, tup):
         "support pickling via __getstate__/__setstate__"
-        (n, ksize, is_protein, dayhoff, hp, mins, _, track_abundance,
-         max_hash, seed) = tup
+        (
+            n,
+            ksize,
+            is_protein,
+            dayhoff,
+            hp,
+            mins,
+            _,
+            track_abundance,
+            max_hash,
+            seed,
+        ) = tup
 
         self.__del__()
 
         hash_function = (
-            lib.HASH_FUNCTIONS_MURMUR64_DAYHOFF if dayhoff else
-            lib.HASH_FUNCTIONS_MURMUR64_HP if hp else
-            lib.HASH_FUNCTIONS_MURMUR64_PROTEIN if is_protein else
-            lib.HASH_FUNCTIONS_MURMUR64_DNA
+            lib.HASH_FUNCTIONS_MURMUR64_DAYHOFF
+            if dayhoff
+            else lib.HASH_FUNCTIONS_MURMUR64_HP
+            if hp
+            else lib.HASH_FUNCTIONS_MURMUR64_PROTEIN
+            if is_protein
+            else lib.HASH_FUNCTIONS_MURMUR64_DNA
         )
 
         scaled = _get_scaled_for_max_hash(max_hash)
@@ -335,10 +347,11 @@ def copy_and_clear(self):
 
     def add_sequence(self, sequence, force=False):
         "Add a sequence into the sketch."
-        self._methodcall(lib.kmerminhash_add_sequence, to_bytes(sequence),
-                         force)
+        self._methodcall(lib.kmerminhash_add_sequence, to_bytes(sequence), force)
 
-    def seq_to_hashes(self, sequence, *, force=False, bad_kmers_as_zeroes=False, is_protein=False):
+    def seq_to_hashes(
+        self, sequence, *, force=False, bad_kmers_as_zeroes=False, is_protein=False
+    ):
         """Convert sequence to hashes without adding to the sketch.
 
         If input sequence is DNA and this is a protein, dayhoff, or hp
@@ -354,10 +367,20 @@ def seq_to_hashes(self, sequence, *, force=False, bad_kmers_as_zeroes=False, is_
             raise ValueError("cannot add protein sequence to DNA MinHash")
 
         if bad_kmers_as_zeroes and not force:
-            raise ValueError("cannot represent invalid kmers as 0 while force is not set to True")
+            raise ValueError(
+                "cannot represent invalid kmers as 0 while force is not set to True"
+            )
 
         size = ffi.new("uintptr_t *")
-        hashes_ptr = self._methodcall(lib.kmerminhash_seq_to_hashes, to_bytes(sequence), len(sequence), force, bad_kmers_as_zeroes, is_protein, size)
+        hashes_ptr = self._methodcall(
+            lib.kmerminhash_seq_to_hashes,
+            to_bytes(sequence),
+            len(sequence),
+            force,
+            bad_kmers_as_zeroes,
+            is_protein,
+            size,
+        )
         size = size[0]
 
         try:
@@ -384,21 +407,24 @@ def kmers_and_hashes(self, sequence, *, force=False, is_protein=False):
             bad_kmers_as_zeroes = True
 
         sequence = sequence.upper()
-        hashvals = self.seq_to_hashes(sequence,
-                                      force=force, is_protein=is_protein,
-                                      bad_kmers_as_zeroes=bad_kmers_as_zeroes)
+        hashvals = self.seq_to_hashes(
+            sequence,
+            force=force,
+            is_protein=is_protein,
+            bad_kmers_as_zeroes=bad_kmers_as_zeroes,
+        )
 
         if bad_kmers_as_zeroes:
-            hashvals = [ None if h == 0 else h for h in hashvals ]
+            hashvals = [None if h == 0 else h for h in hashvals]
 
         ksize = self.ksize
         translate = False
-        if self.moltype == 'DNA':
+        if self.moltype == "DNA":
             pass
         elif is_protein:
             pass
-        else:                   # translate input DNA sequence => aa
-            assert self.moltype in ('protein', 'dayhoff', 'hp')
+        else:  # translate input DNA sequence => aa
+            assert self.moltype in ("protein", "dayhoff", "hp")
             translate = True
             ksize = self.ksize * 3
 
@@ -415,13 +441,13 @@ def kmers_and_hashes(self, sequence, *, force=False, is_protein=False):
             for frame in (0, 1, 2):
                 # get forward k-mers
                 for start in range(0, len(sequence) - ksize + 1 - frame, 3):
-                    kmer = sequence[start + frame:start + frame + ksize]
+                    kmer = sequence[start + frame : start + frame + ksize]
                     yield kmer, hashvals[hash_i]
                     hash_i += 1
 
                 # get rc k-mers
                 for start in range(0, len(seqrc) - ksize + 1 - frame, 3):
-                    kmer = seqrc[start + frame:start + frame + ksize]
+                    kmer = seqrc[start + frame : start + frame + ksize]
                     yield kmer, hashvals[hash_i]
                     hash_i += 1
         else:
@@ -429,17 +455,17 @@ def kmers_and_hashes(self, sequence, *, force=False, is_protein=False):
             n_kmers = len(sequence) - ksize + 1
             assert n_kmers == len(hashvals)
             for i, hashval in zip(range(0, n_kmers), hashvals):
-                kmer = sequence[i:i+ksize]
+                kmer = sequence[i : i + ksize]
                 yield kmer, hashval
 
     def add_kmer(self, kmer):
         "Add a kmer into the sketch."
         if self.is_dna:
             if len(kmer) != self.ksize:
-                raise ValueError("kmer to add is not {} in length".format(self.ksize))
+                raise ValueError(f"kmer to add is not {self.ksize} in length")
         else:
-            if len(kmer) != self.ksize*3:
-                raise ValueError("kmer to add is not {} in length".format(self.ksize*3))
+            if len(kmer) != self.ksize * 3:
+                raise ValueError(f"kmer to add is not {self.ksize * 3} in length")
         self.add_sequence(kmer)
 
     def add_many(self, hashes):
@@ -468,9 +494,12 @@ def __len__(self):
         "Number of hashes."
         return self._methodcall(lib.kmerminhash_get_mins_size)
 
-    @deprecated(deprecated_in="3.5", removed_in="5.0",
-                current_version=VERSION,
-                details='Use .hashes property instead.')
+    @deprecated(
+        deprecated_in="3.5",
+        removed_in="5.0",
+        current_version=VERSION,
+        details="Use .hashes property instead.",
+    )
     def get_mins(self, with_abundance=False):
         """Return list of hashes or if ``with_abundance`` a list
         of (hash, abund).
@@ -480,10 +509,12 @@ def get_mins(self, with_abundance=False):
             return mins.keys()
         return mins
 
-
-    @deprecated(deprecated_in="3.5", removed_in="5.0",
-                current_version=VERSION,
-                details='Use .hashes property instead.')
+    @deprecated(
+        deprecated_in="3.5",
+        removed_in="5.0",
+        current_version=VERSION,
+        details="Use .hashes property instead.",
+    )
     def get_hashes(self):
         "Return the list of hashes."
         return self.hashes.keys()
@@ -500,17 +531,18 @@ def hashes(self):
                 abunds_ptr = self._methodcall(lib.kmerminhash_get_abunds, size_abunds)
                 size_abunds = size_abunds[0]
                 assert size == size_abunds
-                result = dict(zip(ffi.unpack(mins_ptr, size), ffi.unpack(abunds_ptr, size)))
+                result = dict(
+                    zip(ffi.unpack(mins_ptr, size), ffi.unpack(abunds_ptr, size))
+                )
                 lib.kmerminhash_slice_free(abunds_ptr, size)
                 return _HashesWrapper(result)
             else:
                 d = ffi.unpack(mins_ptr, size)
-                return _HashesWrapper({ k : 1 for k in d })
+                return _HashesWrapper({k: 1 for k in d})
 
         finally:
             lib.kmerminhash_slice_free(mins_ptr, size)
 
-
     @property
     def seed(self):
         return self._methodcall(lib.kmerminhash_seed)
@@ -551,9 +583,12 @@ def ksize(self):
         return k
 
     @property
-    @deprecated(deprecated_in="3.5", removed_in="5.0",
-                current_version=VERSION,
-                details='Use scaled instead.')
+    @deprecated(
+        deprecated_in="3.5",
+        removed_in="5.0",
+        current_version=VERSION,
+        details="Use scaled instead.",
+    )
     def max_hash(self):
         return self._methodcall(lib.kmerminhash_max_hash)
 
@@ -574,7 +609,9 @@ def track_abundance(self, b):
         if b is False:
             self._methodcall(lib.kmerminhash_disable_abundance)
         elif len(self) > 0:
-            raise RuntimeError("Can only set track_abundance=True if the MinHash is empty")
+            raise RuntimeError(
+                "Can only set track_abundance=True if the MinHash is empty"
+            )
         else:
             self._methodcall(lib.kmerminhash_enable_abundance)
 
@@ -604,7 +641,9 @@ def count_common(self, other, downsample=False):
         """
         if not isinstance(other, MinHash):
             raise TypeError("Must be a MinHash!")
-        return self._methodcall(lib.kmerminhash_count_common, other._get_objptr(), downsample)
+        return self._methodcall(
+            lib.kmerminhash_count_common, other._get_objptr(), downsample
+        )
 
     def intersection_and_union_size(self, other):
         "Calculate intersection and union sizes between `self` and `other`."
@@ -614,8 +653,9 @@ def intersection_and_union_size(self, other):
             raise TypeError("incompatible MinHash objects")
 
         usize = ffi.new("uint64_t *")
-        common = self._methodcall(lib.kmerminhash_intersection_union_size,
-                                  other._get_objptr(), usize)
+        common = self._methodcall(
+            lib.kmerminhash_intersection_union_size, other._get_objptr(), usize
+        )
 
         usize = ffi.unpack(usize, 1)[0]
         return common, usize
@@ -628,11 +668,11 @@ def downsample(self, *, num=None, scaled=None):
 
         # at least one must be specified!
         if num is None and scaled is None:
-            raise ValueError('must specify either num or scaled to downsample')
+            raise ValueError("must specify either num or scaled to downsample")
 
         # both cannot be specified
         if num is not None and scaled is not None:
-            raise ValueError('cannot specify both num and scaled')
+            raise ValueError("cannot specify both num and scaled")
 
         if num is not None:
             # cannot downsample a scaled MinHash with num:
@@ -644,13 +684,15 @@ def downsample(self, *, num=None, scaled=None):
 
             # acceptable num value? make sure to set max_hash to 0.
             max_hash = 0
-            
+
         elif scaled is not None:
             # cannot downsample a num MinHash with scaled
             if self.num:
                 raise ValueError("cannot downsample a num MinHash using scaled")
             if self.scaled > scaled:
-                raise ValueError(f"new scaled {scaled} is lower than current sample scaled {self.scaled}")
+                raise ValueError(
+                    f"new scaled {scaled} is lower than current sample scaled {self.scaled}"
+                )
 
             # acceptable scaled value? reconfigure max_hash, keep num 0.
             max_hash = _get_max_hash_for_scaled(scaled)
@@ -658,10 +700,14 @@ def downsample(self, *, num=None, scaled=None):
 
         # end checks! create new object:
         a = MinHash(
-            num, self.ksize,
-            is_protein=self.is_protein, dayhoff=self.dayhoff, hp=self.hp,
-            track_abundance=self.track_abundance, seed=self.seed,
-            max_hash=max_hash
+            num,
+            self.ksize,
+            is_protein=self.is_protein,
+            dayhoff=self.dayhoff,
+            hp=self.hp,
+            track_abundance=self.track_abundance,
+            seed=self.seed,
+            max_hash=max_hash,
         )
         # copy over hashes:
         if self.track_abundance:
@@ -676,9 +722,14 @@ def flatten(self):
         if self.track_abundance:
             # create new object:
             a = MinHash(
-                self.num, self.ksize,
-                is_protein=self.is_protein, dayhoff=self.dayhoff, hp=self.hp,
-                track_abundance=False, seed=self.seed, max_hash=self._max_hash
+                self.num,
+                self.ksize,
+                is_protein=self.is_protein,
+                dayhoff=self.dayhoff,
+                hp=self.hp,
+                track_abundance=False,
+                seed=self.seed,
+                max_hash=self._max_hash,
             )
             a.add_many(self)
 
@@ -688,11 +739,21 @@ def flatten(self):
     def jaccard(self, other, downsample=False):
         "Calculate Jaccard similarity of two MinHash objects."
         if self.num != other.num:
-            err = "must have same num: {} != {}".format(self.num, other.num)
+            err = f"must have same num: {self.num} != {other.num}"
             raise TypeError(err)
-        return self._methodcall(lib.kmerminhash_similarity, other._get_objptr(), True, downsample)
+        return self._methodcall(
+            lib.kmerminhash_similarity, other._get_objptr(), True, downsample
+        )
 
-    def jaccard_ani(self, other, *, downsample=False, jaccard=None, prob_threshold=1e-3,  err_threshold=1e-4):
+    def jaccard_ani(
+        self,
+        other,
+        *,
+        downsample=False,
+        jaccard=None,
+        prob_threshold=1e-3,
+        err_threshold=1e-4,
+    ):
         "Use jaccard to estimate ANI between two MinHash objects."
         if not (self.scaled and other.scaled):
             raise TypeError("Error: can only calculate ANI for scaled MinHashes")
@@ -705,12 +766,18 @@ def jaccard_ani(self, other, *, downsample=False, jaccard=None, prob_threshold=1
             other_mh = other.downsample(scaled=scaled)
         if jaccard is None:
             jaccard = self_mh.similarity(other_mh, ignore_abundance=True)
-        avg_sketch_kmers = (len(self_mh) + len(other_mh))/2
-        avg_n_kmers = round(avg_sketch_kmers * scaled)  # would be better if hll estimate - see #1798
-        j_aniresult = jaccard_to_distance(jaccard, self_mh.ksize, scaled,
-                                          n_unique_kmers=avg_n_kmers,
-                                          prob_threshold = prob_threshold,
-                                          err_threshold = err_threshold)
+        avg_sketch_kmers = (len(self_mh) + len(other_mh)) / 2
+        avg_n_kmers = round(
+            avg_sketch_kmers * scaled
+        )  # would be better if hll estimate - see #1798
+        j_aniresult = jaccard_to_distance(
+            jaccard,
+            self_mh.ksize,
+            scaled,
+            n_unique_kmers=avg_n_kmers,
+            prob_threshold=prob_threshold,
+            err_threshold=err_threshold,
+        )
         # null out ANI if either mh size estimation is inaccurate
         if not self.size_is_accurate() or not other.size_is_accurate():
             j_aniresult.size_is_inaccurate = True
@@ -730,16 +797,20 @@ def similarity(self, other, ignore_abundance=False, downsample=False):
 
         See https://en.wikipedia.org/wiki/Cosine_similarity
         """
-        return self._methodcall(lib.kmerminhash_similarity,
-                                other._get_objptr(),
-                                ignore_abundance, downsample)
+        return self._methodcall(
+            lib.kmerminhash_similarity,
+            other._get_objptr(),
+            ignore_abundance,
+            downsample,
+        )
 
     def angular_similarity(self, other):
         "Calculate the angular similarity."
         if not (self.track_abundance and other.track_abundance):
-            raise TypeError("Error: Angular (cosine) similarity requires both sketches to track hash abundance.")
-        return self._methodcall(lib.kmerminhash_angular_similarity,
-                                other._get_objptr())
+            raise TypeError(
+                "Error: Angular (cosine) similarity requires both sketches to track hash abundance."
+            )
+        return self._methodcall(lib.kmerminhash_angular_similarity, other._get_objptr())
 
     def is_compatible(self, other):
         return self._methodcall(lib.kmerminhash_is_compatible, other._get_objptr())
@@ -749,12 +820,16 @@ def contained_by(self, other, downsample=False):
         Calculate how much of self is contained by other.
         """
         if not (self.scaled and other.scaled):
-            raise TypeError("Error: can only calculate containment for scaled MinHashes")
+            raise TypeError(
+                "Error: can only calculate containment for scaled MinHashes"
+            )
         denom = len(self)
         if not denom:
             return 0.0
-        total_denom = float(denom * self.scaled) # would be better if hll estimate - see #1798
-        bias_factor = 1.0 - (1.0 - 1.0/self.scaled) ** total_denom
+        total_denom = float(
+            denom * self.scaled
+        )  # would be better if hll estimate - see #1798
+        bias_factor = 1.0 - (1.0 - 1.0 / self.scaled) ** total_denom
         containment = self.count_common(other, downsample) / (denom * bias_factor)
         # debiasing containment can lead to vals outside of 0-1 range. constrain.
         if containment >= 1:
@@ -764,8 +839,16 @@ def contained_by(self, other, downsample=False):
         else:
             return containment
 
-
-    def containment_ani(self, other, *, downsample=False, containment=None, confidence=0.95, estimate_ci = False, prob_threshold=1e-3):
+    def containment_ani(
+        self,
+        other,
+        *,
+        downsample=False,
+        containment=None,
+        confidence=0.95,
+        estimate_ci=False,
+        prob_threshold=1e-3,
+    ):
         "Use self contained by other to estimate ANI between two MinHash objects."
         if not (self.scaled and other.scaled):
             raise TypeError("Error: can only calculate ANI for scaled MinHashes")
@@ -778,11 +861,17 @@ def containment_ani(self, other, *, downsample=False, containment=None, confiden
             other_mh = other.downsample(scaled=scaled)
         if containment is None:
             containment = self_mh.contained_by(other_mh)
-        n_kmers = len(self_mh) * scaled # would be better if hll estimate - see #1798
-
-        c_aniresult = containment_to_distance(containment, self_mh.ksize, self_mh.scaled,
-                                                        n_unique_kmers=n_kmers, confidence=confidence,
-                                                        estimate_ci = estimate_ci, prob_threshold=prob_threshold)
+        n_kmers = len(self_mh) * scaled  # would be better if hll estimate - see #1798
+
+        c_aniresult = containment_to_distance(
+            containment,
+            self_mh.ksize,
+            self_mh.scaled,
+            n_unique_kmers=n_kmers,
+            confidence=confidence,
+            estimate_ci=estimate_ci,
+            prob_threshold=prob_threshold,
+        )
         # null out ANI if either mh size estimation is inaccurate
         if not self.size_is_accurate() or not other.size_is_accurate():
             c_aniresult.size_is_inaccurate = True
@@ -793,13 +882,19 @@ def max_containment(self, other, downsample=False):
         Calculate maximum containment.
         """
         if not (self.scaled and other.scaled):
-            raise TypeError("Error: can only calculate containment for scaled MinHashes")
+            raise TypeError(
+                "Error: can only calculate containment for scaled MinHashes"
+            )
         min_denom = min((len(self), len(other)))
         if not min_denom:
             return 0.0
-        total_denom =  float(min_denom * self.scaled) # would be better if hll estimate - see #1798
-        bias_factor = 1.0 - (1.0 - 1.0/self.scaled) ** total_denom
-        max_containment = self.count_common(other, downsample) / (min_denom * bias_factor)
+        total_denom = float(
+            min_denom * self.scaled
+        )  # would be better if hll estimate - see #1798
+        bias_factor = 1.0 - (1.0 - 1.0 / self.scaled) ** total_denom
+        max_containment = self.count_common(other, downsample) / (
+            min_denom * bias_factor
+        )
         # debiasing containment can lead to vals outside of 0-1 range. constrain.
         if max_containment >= 1:
             return 1.0
@@ -808,7 +903,16 @@ def max_containment(self, other, downsample=False):
         else:
             return max_containment
 
-    def max_containment_ani(self, other, *, downsample=False, max_containment=None, confidence=0.95, estimate_ci=False, prob_threshold=1e-3):  
+    def max_containment_ani(
+        self,
+        other,
+        *,
+        downsample=False,
+        max_containment=None,
+        confidence=0.95,
+        estimate_ci=False,
+        prob_threshold=1e-3,
+    ):
         "Use max_containment to estimate ANI between two MinHash objects."
         if not (self.scaled and other.scaled):
             raise TypeError("Error: can only calculate ANI for scaled MinHashes")
@@ -824,9 +928,15 @@ def max_containment_ani(self, other, *, downsample=False, max_containment=None,
         min_n_kmers = min(len(self_mh), len(other_mh))
         n_kmers = min_n_kmers * scaled  # would be better if hll estimate - see #1798
 
-        c_aniresult = containment_to_distance(max_containment, self_mh.ksize, scaled,
-                                           n_unique_kmers=n_kmers,confidence=confidence,
-                                           estimate_ci = estimate_ci, prob_threshold=prob_threshold)
+        c_aniresult = containment_to_distance(
+            max_containment,
+            self_mh.ksize,
+            scaled,
+            n_unique_kmers=n_kmers,
+            confidence=confidence,
+            estimate_ci=estimate_ci,
+            prob_threshold=prob_threshold,
+        )
         # null out ANI if either mh size estimation is inaccurate
         if not self.size_is_accurate() or not other.size_is_accurate():
             c_aniresult.size_is_inaccurate = True
@@ -838,12 +948,14 @@ def avg_containment(self, other, *, downsample=False):
         Note: this is average of the containments, *not* count_common/ avg_denom
         """
         if not (self.scaled and other.scaled):
-            raise TypeError("Error: can only calculate containment for scaled MinHashes")
+            raise TypeError(
+                "Error: can only calculate containment for scaled MinHashes"
+            )
 
         c1 = self.contained_by(other, downsample)
         c2 = other.contained_by(self, downsample)
 
-        return (c1 + c2)/2
+        return (c1 + c2) / 2
 
     def avg_containment_ani(self, other, *, downsample=False, prob_threshold=1e-3):
         """
@@ -852,11 +964,15 @@ def avg_containment_ani(self, other, *, downsample=False, prob_threshold=1e-3):
         """
         if not (self.scaled and other.scaled):
             raise TypeError("Error: can only calculate ANI for scaled MinHashes")
-        a1 = self.containment_ani(other, downsample=downsample, prob_threshold=prob_threshold).ani
-        a2 = other.containment_ani(self, downsample=downsample, prob_threshold=prob_threshold).ani
+        a1 = self.containment_ani(
+            other, downsample=downsample, prob_threshold=prob_threshold
+        ).ani
+        a2 = other.containment_ani(
+            self, downsample=downsample, prob_threshold=prob_threshold
+        ).ani
         if any([a1 is None, a2 is None]):
             return None
-        return (a1 + a2)/2
+        return (a1 + a2) / 2
 
     def __add__(self, other):
         if not isinstance(other, MinHash):
@@ -864,11 +980,14 @@ def __add__(self, other):
 
         if self.num and other.num:
             if self.num != other.num:
-                raise TypeError(f"incompatible num values: self={self.num} other={other.num}")
+                raise TypeError(
+                    f"incompatible num values: self={self.num} other={other.num}"
+                )
 
         new_obj = self.to_mutable()
         new_obj += other
         return new_obj
+
     __or__ = __add__
 
     def __iadd__(self, other):
@@ -890,6 +1009,7 @@ def intersection(self, other):
 
         ptr = self._methodcall(lib.kmerminhash_intersection, other._get_objptr())
         return MinHash._from_objptr(ptr)
+
     __and__ = intersection
 
     def set_abundances(self, values, clear=True):
@@ -904,12 +1024,14 @@ def set_abundances(self, values, clear=True):
             abunds = []
 
             for h, v in values.items():
-                hashes.append(h)                
+                hashes.append(h)
                 if v < 0:
                     raise ValueError("Abundance cannot be set to a negative value.")
                 abunds.append(v)
 
-            self._methodcall(lib.kmerminhash_set_abundances, hashes, abunds, len(hashes), clear)
+            self._methodcall(
+                lib.kmerminhash_set_abundances, hashes, abunds, len(hashes), clear
+            )
         else:
             raise RuntimeError(
                 "Use track_abundance=True when constructing "
@@ -921,15 +1043,15 @@ def add_protein(self, sequence):
         self._methodcall(lib.kmerminhash_add_protein, to_bytes(sequence))
 
     @property
-    def moltype(self):                    # TODO: test in minhash tests
+    def moltype(self):  # TODO: test in minhash tests
         if self.is_protein:
-            return 'protein'
+            return "protein"
         elif self.dayhoff:
-            return 'dayhoff'
+            return "dayhoff"
         elif self.hp:
-            return 'hp'
+            return "hp"
         else:
-            return 'DNA'
+            return "DNA"
 
     def to_mutable(self):
         "Return a copy of this MinHash that can be changed."
@@ -954,7 +1076,7 @@ def inflate(self, from_mh):
         """
         if not self.track_abundance and from_mh.track_abundance:
             orig_abunds = from_mh.hashes
-            abunds = { h: orig_abunds.get(h, 0) for h in self.hashes }
+            abunds = {h: orig_abunds.get(h, 0) for h in self.hashes}
 
             abund_mh = from_mh.copy_and_clear()
 
@@ -963,7 +1085,9 @@ def inflate(self, from_mh):
 
             return abund_mh
         else:
-            raise ValueError("inflate operates on a flat MinHash and takes a MinHash object with track_abundance=True")
+            raise ValueError(
+                "inflate operates on a flat MinHash and takes a MinHash object with track_abundance=True"
+            )
 
     @property
     def sum_abundances(self):
@@ -995,9 +1119,11 @@ def unique_dataset_hashes(self):
         Approximate total number of hashes (num_hashes *scaled).
         """
         if not self.scaled:
-            raise TypeError("can only approximate unique_dataset_hashes for scaled MinHashes")
+            raise TypeError(
+                "can only approximate unique_dataset_hashes for scaled MinHashes"
+            )
         # TODO: replace set_size with HLL estimate when that gets implemented
-        return len(self) * self.scaled # + (self.ksize - 1) for bp estimation
+        return len(self) * self.scaled  # + (self.ksize - 1) for bp estimation
 
     def size_is_accurate(self, relative_error=0.20, confidence=0.95):
         """
@@ -1008,41 +1134,47 @@ def size_is_accurate(self, relative_error=0.20, confidence=0.95):
         Returns True if probability is greater than or equal to the desired confidence.
         """
         if not self.scaled:
-            raise TypeError("Error: can only estimate dataset size for scaled MinHashes")
+            raise TypeError(
+                "Error: can only estimate dataset size for scaled MinHashes"
+            )
         if any([not (0 <= relative_error <= 1), not (0 <= confidence <= 1)]):
-            raise ValueError("Error: relative error and confidence values must be between 0 and 1.")
-        # to do: replace unique_dataset_hashes with HLL estimation when it gets implemented 
-        probability = set_size_exact_prob(self.unique_dataset_hashes, self.scaled, relative_error=relative_error)
+            raise ValueError(
+                "Error: relative error and confidence values must be between 0 and 1."
+            )
+        # to do: replace unique_dataset_hashes with HLL estimation when it gets implemented
+        probability = set_size_exact_prob(
+            self.unique_dataset_hashes, self.scaled, relative_error=relative_error
+        )
         return probability >= confidence
 
 
 class FrozenMinHash(MinHash):
     def add_sequence(self, *args, **kwargs):
-        raise TypeError('FrozenMinHash does not support modification')
+        raise TypeError("FrozenMinHash does not support modification")
 
     def add_kmer(self, *args, **kwargs):
-        raise TypeError('FrozenMinHash does not support modification')
+        raise TypeError("FrozenMinHash does not support modification")
 
     def add_many(self, *args, **kwargs):
-        raise TypeError('FrozenMinHash does not support modification')
+        raise TypeError("FrozenMinHash does not support modification")
 
     def remove_many(self, *args, **kwargs):
-        raise TypeError('FrozenMinHash does not support modification')
+        raise TypeError("FrozenMinHash does not support modification")
 
     def add_hash(self, *args, **kwargs):
-        raise TypeError('FrozenMinHash does not support modification')
+        raise TypeError("FrozenMinHash does not support modification")
 
     def add_hash_with_abundance(self, *args, **kwargs):
-        raise TypeError('FrozenMinHash does not support modification')
+        raise TypeError("FrozenMinHash does not support modification")
 
     def clear(self, *args, **kwargs):
-        raise TypeError('FrozenMinHash does not support modification')
+        raise TypeError("FrozenMinHash does not support modification")
 
     def set_abundances(self, *args, **kwargs):
-        raise TypeError('FrozenMinHash does not support modification')
+        raise TypeError("FrozenMinHash does not support modification")
 
     def add_protein(self, *args, **kwargs):
-        raise TypeError('FrozenMinHash does not support modification')
+        raise TypeError("FrozenMinHash does not support modification")
 
     def downsample(self, *, num=None, scaled=None):
         if scaled and self.scaled == scaled:
@@ -1062,10 +1194,10 @@ def flatten(self):
         return flat_mh
 
     def __iadd__(self, *args, **kwargs):
-        raise TypeError('FrozenMinHash does not support modification')
+        raise TypeError("FrozenMinHash does not support modification")
 
     def merge(self, *args, **kwargs):
-        raise TypeError('FrozenMinHash does not support modification')
+        raise TypeError("FrozenMinHash does not support modification")
 
     def to_mutable(self):
         "Return a copy of this MinHash that can be changed."
@@ -1085,16 +1217,29 @@ def into_frozen(self):
 
     def __setstate__(self, tup):
         "support pickling via __getstate__/__setstate__"
-        (n, ksize, is_protein, dayhoff, hp, mins, _, track_abundance,
-         max_hash, seed) = tup
+        (
+            n,
+            ksize,
+            is_protein,
+            dayhoff,
+            hp,
+            mins,
+            _,
+            track_abundance,
+            max_hash,
+            seed,
+        ) = tup
 
         self.__del__()
 
         hash_function = (
-            lib.HASH_FUNCTIONS_MURMUR64_DAYHOFF if dayhoff else
-            lib.HASH_FUNCTIONS_MURMUR64_HP if hp else
-            lib.HASH_FUNCTIONS_MURMUR64_PROTEIN if is_protein else
-            lib.HASH_FUNCTIONS_MURMUR64_DNA
+            lib.HASH_FUNCTIONS_MURMUR64_DAYHOFF
+            if dayhoff
+            else lib.HASH_FUNCTIONS_MURMUR64_HP
+            if hp
+            else lib.HASH_FUNCTIONS_MURMUR64_PROTEIN
+            if is_protein
+            else lib.HASH_FUNCTIONS_MURMUR64_DNA
         )
 
         scaled = _get_scaled_for_max_hash(max_hash)
@@ -1108,4 +1253,5 @@ def __setstate__(self, tup):
 
     def __copy__(self):
         return self
+
     copy = __copy__
diff --git a/src/sourmash/nodegraph.py b/src/sourmash/nodegraph.py
index 8faa2eb874..3204e11b7e 100644
--- a/src/sourmash/nodegraph.py
+++ b/src/sourmash/nodegraph.py
@@ -88,6 +88,7 @@ def matches(self, mh):
 
     def to_khmer_nodegraph(self):
         import khmer
+
         try:
             load_nodegraph = khmer.load_nodegraph
         except AttributeError:
@@ -117,41 +118,44 @@ def extract_nodegraph_info(filename):
     ht_type = None
     occupied = None
 
-    uint_size = len(pack('I', 0))
-    uchar_size = len(pack('B', 0))
-    ulonglong_size = len(pack('Q', 0))
+    uint_size = len(pack("I", 0))
+    uchar_size = len(pack("B", 0))
+    ulonglong_size = len(pack("Q", 0))
 
     try:
-        with open(filename, 'rb') as nodegraph:
-            signature, = unpack('4s', nodegraph.read(4))
-            version, = unpack('B', nodegraph.read(1))
-            ht_type, = unpack('B', nodegraph.read(1))
-            ksize, = unpack('I', nodegraph.read(uint_size))
-            n_tables, = unpack('B', nodegraph.read(uchar_size))
-            occupied, = unpack('Q', nodegraph.read(ulonglong_size))
-            table_size, = unpack('Q', nodegraph.read(ulonglong_size))
+        with open(filename, "rb") as nodegraph:
+            (signature,) = unpack("4s", nodegraph.read(4))
+            (version,) = unpack("B", nodegraph.read(1))
+            (ht_type,) = unpack("B", nodegraph.read(1))
+            (ksize,) = unpack("I", nodegraph.read(uint_size))
+            (n_tables,) = unpack("B", nodegraph.read(uchar_size))
+            (occupied,) = unpack("Q", nodegraph.read(ulonglong_size))
+            (table_size,) = unpack("Q", nodegraph.read(ulonglong_size))
         if signature != b"OXLI":
-            raise ValueError("Node graph '{}' is missing file type "
-                             "signature".format(filename) + str(signature))
+            raise ValueError(
+                f"Node graph '{filename}' is missing file type "
+                "signature" + str(signature)
+            )
     except:
-        raise ValueError("Node graph '{}' is corrupt ".format(filename))
+        raise ValueError(f"Node graph '{filename}' is corrupt ")
 
     return ksize, round(table_size, -2), n_tables, version, ht_type, occupied
 
 
-def calc_expected_collisions(graph, force=False, max_false_pos=.2):
+def calc_expected_collisions(graph, force=False, max_false_pos=0.2):
     fp_all = graph.expected_collisions
 
     if fp_all > max_false_pos:
         print("**", file=sys.stderr)
-        print("** ERROR: the graph structure is too small for ",
-              file=sys.stderr)
-        print("** this data set.  Increase data structure size.",
-              file=sys.stderr)
+        print("** ERROR: the graph structure is too small for ", file=sys.stderr)
+        print("** this data set.  Increase data structure size.", file=sys.stderr)
         print("** Do not use these results!!", file=sys.stderr)
         print("**", file=sys.stderr)
-        print("** (estimated false positive rate of %.3f;" % fp_all,
-              file=sys.stderr, end=' ')
+        print(
+            "** (estimated false positive rate of %.3f;" % fp_all,
+            file=sys.stderr,
+            end=" ",
+        )
         print("max recommended %.3f)" % max_false_pos, file=sys.stderr)
         print("**", file=sys.stderr)
 
diff --git a/src/sourmash/np_utils.py b/src/sourmash/np_utils.py
index 683f0be6f6..5c69a0bd5d 100644
--- a/src/sourmash/np_utils.py
+++ b/src/sourmash/np_utils.py
@@ -12,9 +12,11 @@ def to_memmap(array):
     """
     import numpy as np
 
-    filename = tempfile.NamedTemporaryFile(prefix="array", suffix=".mmap", delete=False).name
+    filename = tempfile.NamedTemporaryFile(
+        prefix="array", suffix=".mmap", delete=False
+    ).name
     shape = array.shape
-    f = np.memmap(filename, mode='w+', shape=shape, dtype=array.dtype)
+    f = np.memmap(filename, mode="w+", shape=shape, dtype=array.dtype)
     f[:] = array[:]
     del f
     large_memmap = np.memmap(filename, dtype=array.dtype, shape=shape)
diff --git a/src/sourmash/picklist.py b/src/sourmash/picklist.py
index 8f43aca739..8a5652eb1a 100644
--- a/src/sourmash/picklist.py
+++ b/src/sourmash/picklist.py
@@ -17,29 +17,32 @@
 preprocess = {}
 
 # exact matches
-preprocess['name'] = lambda x: x
-preprocess['md5'] = lambda x: x
+preprocess["name"] = lambda x: x
+preprocess["md5"] = lambda x: x
 
 
 # identifier matches/prefix foo - space delimited identifiers
-preprocess['identprefix'] = lambda x: x.split(' ')[0].split('.')[0]
-preprocess['ident'] = lambda x: x.split(' ')[0]
+preprocess["identprefix"] = lambda x: x.split(" ")[0].split(".")[0]
+preprocess["ident"] = lambda x: x.split(" ")[0]
 
 # match 8 characters
-preprocess['md5prefix8'] = lambda x: x[:8]
-preprocess['md5short'] = lambda x: x[:8]
+preprocess["md5prefix8"] = lambda x: x[:8]
+preprocess["md5short"] = lambda x: x[:8]
+
 
 # all meta-coltypes use the same preprocessing of tuple => (ident, md5short)
 def combine_ident_md5(x):
     "preprocess (name, md5) tup into (ident, md5short) tup"
     name, md5 = x
-    ident = name.split(' ')[0]
+    ident = name.split(" ")[0]
     md5 = md5[:8]
     return (ident, md5)
-preprocess['manifest'] = combine_ident_md5
-preprocess['prefetch'] = combine_ident_md5
-preprocess['gather'] = combine_ident_md5
-preprocess['search'] = combine_ident_md5
+
+
+preprocess["manifest"] = combine_ident_md5
+preprocess["prefetch"] = combine_ident_md5
+preprocess["gather"] = combine_ident_md5
+preprocess["search"] = combine_ident_md5
 
 
 class PickStyle(Enum):
@@ -74,12 +77,20 @@ class SignaturePicklist:
     blank in this case: e.g. use 'pickfile.csv::gather'. These "meta-coltypes"
     use composite selection on (ident, md5short) tuples.
     """
-    meta_coltypes = ('manifest', 'gather', 'prefetch', 'search')
-    supported_coltypes = ('md5', 'md5prefix8', 'md5short',
-                          'name', 'ident', 'identprefix')
 
-    def __init__(self, coltype, *, pickfile=None, column_name=None,
-                 pickstyle=PickStyle.INCLUDE):
+    meta_coltypes = ("manifest", "gather", "prefetch", "search")
+    supported_coltypes = (
+        "md5",
+        "md5prefix8",
+        "md5short",
+        "name",
+        "ident",
+        "identprefix",
+    )
+
+    def __init__(
+        self, coltype, *, pickfile=None, column_name=None, pickstyle=PickStyle.INCLUDE
+    ):
         "create a picklist of column type 'coltype'."
 
         # first, check coltype...
@@ -96,10 +107,10 @@ def __init__(self, coltype, *, pickfile=None, column_name=None,
             if column_name:
                 raise ValueError(f"no column name allowed for coltype '{coltype}'")
 
-            if coltype == 'prefetch':
-                column_name = '(match_name, match_md5)'
+            if coltype == "prefetch":
+                column_name = "(match_name, match_md5)"
             else:
-                column_name = '(name, md5)'
+                column_name = "(name, md5)"
 
         self.coltype = coltype
         self.pickfile = pickfile
@@ -114,18 +125,20 @@ def __init__(self, coltype, *, pickfile=None, column_name=None,
     @classmethod
     def from_picklist_args(cls, argstr):
         "load a picklist from an argument string 'pickfile:col:coltype:style'"
-        picklist = argstr.split(':')
+        picklist = argstr.split(":")
         pickstyle = PickStyle.INCLUDE
 
         # pickstyle specified?
         if len(picklist) == 4:
             pickstyle_str = picklist.pop()
-            if pickstyle_str == 'include':
+            if pickstyle_str == "include":
                 pickstyle = PickStyle.INCLUDE
-            elif pickstyle_str == 'exclude':
+            elif pickstyle_str == "exclude":
                 pickstyle = PickStyle.EXCLUDE
             else:
-                raise ValueError(f"invalid picklist 'pickstyle' argument 4: '{pickstyle_str}' must be 'include' or 'exclude'")
+                raise ValueError(
+                    f"invalid picklist 'pickstyle' argument 4: '{pickstyle_str}' must be 'include' or 'exclude'"
+                )
 
         if len(picklist) != 3:
             raise ValueError(f"invalid picklist argument '{argstr}'")
@@ -133,36 +146,39 @@ def from_picklist_args(cls, argstr):
         assert len(picklist) == 3
         pickfile, column, coltype = picklist
 
-        return cls(coltype, pickfile=pickfile, column_name=column,
-                   pickstyle=pickstyle)
+        return cls(coltype, pickfile=pickfile, column_name=column, pickstyle=pickstyle)
 
     def _get_sig_attribute(self, ss):
         "for a given SourmashSignature, return relevant picklist value."
         coltype = self.coltype
-        if coltype in self.meta_coltypes: # gather, prefetch, search, manifest
+        if coltype in self.meta_coltypes:  # gather, prefetch, search, manifest
             q = (ss.name, ss.md5sum())
-        elif coltype in ('md5', 'md5prefix8', 'md5short'):
+        elif coltype in ("md5", "md5prefix8", "md5short"):
             q = ss.md5sum()
-        elif coltype in ('name', 'ident', 'identprefix'):
+        elif coltype in ("name", "ident", "identprefix"):
             q = ss.name
         else:
-            raise ValueError(f"picklist get_sig_attribute {coltype} has unhandled branch")
+            raise ValueError(
+                f"picklist get_sig_attribute {coltype} has unhandled branch"
+            )
 
         return q
 
     def _get_value_for_manifest_row(self, row):
         "return the picklist value from a manifest row"
-        if self.coltype in self.meta_coltypes: # gather, prefetch, search, manifest
-            q = (row['name'], row['md5'])
+        if self.coltype in self.meta_coltypes:  # gather, prefetch, search, manifest
+            q = (row["name"], row["md5"])
         else:
-            if self.coltype == 'md5':
-                colkey = 'md5'
-            elif self.coltype in ('md5prefix8', 'md5short'):
-                colkey = 'md5short'
-            elif self.coltype in ('name', 'ident', 'identprefix'):
-                colkey = 'name'
+            if self.coltype == "md5":
+                colkey = "md5"
+            elif self.coltype in ("md5prefix8", "md5short"):
+                colkey = "md5short"
+            elif self.coltype in ("name", "ident", "identprefix"):
+                colkey = "name"
             else:
-                raise ValueError(f"picklist get_value_for_row {colkey} has unhandled branch")
+                raise ValueError(
+                    f"picklist get_value_for_row {colkey} has unhandled branch"
+                )
 
             q = row.get(colkey)
 
@@ -175,12 +191,12 @@ def _get_value_for_csv_row(self, row):
         "return the picklist value from a CSV pickfile row - supplied by user, typically"
 
         # customize for each type of meta_coltypes
-        if self.coltype == 'manifest':
-            q = (row['name'], row['md5'])
-        elif self.coltype == 'prefetch':
-            q = (row['match_name'], row['match_md5'])
-        elif self.coltype in ('gather', 'search'):
-            q = (row['name'], row['md5'])
+        if self.coltype == "manifest":
+            q = (row["name"], row["md5"])
+        elif self.coltype == "prefetch":
+            q = (row["match_name"], row["match_md5"])
+        elif self.coltype in ("gather", "search"):
+            q = (row["name"], row["md5"])
         else:
             q = row[self.column_name]
 
@@ -218,7 +234,9 @@ def load(self, *, allow_empty=False):
             self.pickfile = pickfile
             if not r.fieldnames:
                 if not allow_empty:
-                    raise ValueError(f"empty or improperly formatted pickfile '{pickfile}'")
+                    raise ValueError(
+                        f"empty or improperly formatted pickfile '{pickfile}'"
+                    )
                 else:
                     return 0, 0
 
diff --git a/src/sourmash/plugins.py b/src/sourmash/plugins.py
index 4c18f27533..0871154f2d 100644
--- a/src/sourmash/plugins.py
+++ b/src/sourmash/plugins.py
@@ -18,7 +18,7 @@
 import itertools
 import argparse
 
-from .logging import (debug_literal, error, notify, set_quiet)
+from .logging import debug_literal, error, notify, set_quiet
 
 # cover for older versions of Python that don't support selection on load
 # (the 'group=' below).
@@ -26,20 +26,22 @@
 
 # load 'load_from' entry points. NOTE: this executes on import of this module.
 try:
-    _plugin_load_from = entry_points(group='sourmash.load_from')
+    _plugin_load_from = entry_points(group="sourmash.load_from")
 except TypeError:
     from importlib_metadata import entry_points
-    _plugin_load_from = entry_points(group='sourmash.load_from')
+
+    _plugin_load_from = entry_points(group="sourmash.load_from")
 
 # load 'save_to' entry points as well.
-_plugin_save_to = entry_points(group='sourmash.save_to')
+_plugin_save_to = entry_points(group="sourmash.save_to")
 
 # aaaaand CLI entry points:
-_plugin_cli = entry_points(group='sourmash.cli_script')
+_plugin_cli = entry_points(group="sourmash.cli_script")
 _plugin_cli_once = False
 
 ###
 
+
 def get_load_from_functions():
     "Load the 'load_from' plugins and yield tuples (priority, name, fn)."
     debug_literal(f"load_from plugins: {_plugin_load_from}")
@@ -49,11 +51,13 @@ def get_load_from_functions():
         try:
             loader_fn = plugin.load()
         except (ModuleNotFoundError, AttributeError) as e:
-            debug_literal(f"plugins.load_from_functions: got error loading {plugin.name}: {str(e)}")
+            debug_literal(
+                f"plugins.load_from_functions: got error loading {plugin.name}: {str(e)}"
+            )
             continue
 
         # get 'priority' if it is available
-        priority = getattr(loader_fn, 'priority', DEFAULT_LOAD_FROM_PRIORITY)
+        priority = getattr(loader_fn, "priority", DEFAULT_LOAD_FROM_PRIORITY)
 
         # retrieve name (which is specified by plugin?)
         name = plugin.name
@@ -70,11 +74,13 @@ def get_save_to_functions():
         try:
             save_cls = plugin.load()
         except (ModuleNotFoundError, AttributeError) as e:
-            debug_literal(f"plugins.load_from_functions: got error loading {plugin.name}: {str(e)}")
+            debug_literal(
+                f"plugins.load_from_functions: got error loading {plugin.name}: {str(e)}"
+            )
             continue
 
         # get 'priority' if it is available
-        priority = getattr(save_cls, 'priority', DEFAULT_SAVE_TO_PRIORITY)
+        priority = getattr(save_cls, "priority", DEFAULT_SAVE_TO_PRIORITY)
 
         # retrieve name (which is specified by plugin?)
         name = plugin.name
@@ -88,17 +94,16 @@ class CommandLinePlugin:
 
     Subclasses should call super().__init__(parser) and super().main(args).
     """
+
     command = None
     description = None
 
     def __init__(self, parser):
         parser.add_argument(
-            '-q', '--quiet', action='store_true',
-            help='suppress non-error output'
+            "-q", "--quiet", action="store_true", help="suppress non-error output"
         )
         parser.add_argument(
-            '-d', '--debug', action='store_true',
-            help='provide debugging output'
+            "-d", "--debug", action="store_true", help="provide debugging output"
         )
 
     def main(self, args):
@@ -116,14 +121,18 @@ def get_cli_script_plugins():
             script_cls = plugin.load()
         except (ModuleNotFoundError, AttributeError):
             if _plugin_cli_once is False:
-                error(f"ERROR: cannot find or load module for cli_script plugin '{name}'")
+                error(
+                    f"ERROR: cannot find or load module for cli_script plugin '{name}'"
+                )
             continue
 
-        command = getattr(script_cls, 'command', None)
+        command = getattr(script_cls, "command", None)
         if command is None:
             # print error message only once...
             if _plugin_cli_once is False:
-                error(f"ERROR: no command provided by cli_script plugin '{name}' from {mod}; skipping")
+                error(
+                    f"ERROR: no command provided by cli_script plugin '{name}' from {mod}; skipping"
+                )
         else:
             x.append(plugin)
 
@@ -137,8 +146,8 @@ def get_cli_scripts_descriptions():
         name = plugin.name
         script_cls = plugin.load()
 
-        command = getattr(script_cls, 'command')
-        description = getattr(script_cls, 'description', "")
+        command = getattr(script_cls, "command")
+        description = getattr(script_cls, "description", "")
         if description:
             description = description.splitlines()[0]
         if not description:
@@ -155,18 +164,21 @@ def add_cli_scripts(parser):
         name = plugin.name
         script_cls = plugin.load()
 
-        usage = getattr(script_cls, 'usage', None)
-        description = getattr(script_cls, 'description', None)
-        epilog = getattr(script_cls, 'epilog', None)
-        formatter_class = getattr(script_cls, 'formatter_class',
-                                  argparse.HelpFormatter)
-
-        subparser = parser.add_parser(script_cls.command,
-                                      usage=usage,
-                                      description=description,
-                                      epilog=epilog,
-                                      formatter_class=formatter_class)
-        debug_literal(f"cls_script plugin '{name}' adding command '{script_cls.command}'")
+        usage = getattr(script_cls, "usage", None)
+        description = getattr(script_cls, "description", None)
+        epilog = getattr(script_cls, "epilog", None)
+        formatter_class = getattr(script_cls, "formatter_class", argparse.HelpFormatter)
+
+        subparser = parser.add_parser(
+            script_cls.command,
+            usage=usage,
+            description=description,
+            epilog=epilog,
+            formatter_class=formatter_class,
+        )
+        debug_literal(
+            f"cls_script plugin '{name}' adding command '{script_cls.command}'"
+        )
         obj = script_cls(subparser)
         d[script_cls.command] = obj
 
@@ -174,9 +186,7 @@ def add_cli_scripts(parser):
 
 
 def list_all_plugins():
-    plugins = itertools.chain(_plugin_load_from,
-                              _plugin_save_to,
-                              _plugin_cli)
+    plugins = itertools.chain(_plugin_load_from, _plugin_save_to, _plugin_cli)
     plugins = list(plugins)
 
     if not plugins:
@@ -185,7 +195,9 @@ def list_all_plugins():
     notify("")
     notify("the following plugins are installed:")
     notify("")
-    notify(f"{'plugin type':<20s} {'from python module':<30s} {'v':<5s} {'entry point name':<20s}")
+    notify(
+        f"{'plugin type':<20s} {'from python module':<30s} {'v':<5s} {'entry point name':<20s}"
+    )
     notify(f"{'-'*20} {'-'*30} {'-'*5} {'-'*20}")
 
     for plugin in plugins:
diff --git a/src/sourmash/save_load.py b/src/sourmash/save_load.py
index f7109f0fb1..1f73c116c7 100644
--- a/src/sourmash/save_load.py
+++ b/src/sourmash/save_load.py
@@ -43,7 +43,7 @@
 from .sbtmh import load_sbt_index
 from .lca.lca_db import load_single_database
 from . import signature as sigmod
-from .index import (LinearIndex, ZipFileLinearIndex, MultiIndex)
+from .index import LinearIndex, ZipFileLinearIndex, MultiIndex
 from .manifest import CollectionManifest
 
 
@@ -74,16 +74,18 @@ def SaveSignaturesToLocation(location):
     with SaveSignaturesToLocation(filename_or_location) as save_sigs:
        save_sigs.add(sig_obj)
     """
-    save_list = itertools.chain(_save_classes,
-                                sourmash_plugins.get_save_to_functions())
-    for priority, cls in sorted(save_list, key=lambda x:x[0]):
+    save_list = itertools.chain(_save_classes, sourmash_plugins.get_save_to_functions())
+    for priority, cls in sorted(save_list, key=lambda x: x[0]):
         debug_literal(f"trying to match save function {cls}, priority={priority}")
 
         if cls.matches(location):
             debug_literal(f"{cls} is a match!")
             return cls(location)
 
-    raise Exception(f"cannot determine how to open location {location} for saving; this should never happen!?")
+    raise Exception(
+        f"cannot determine how to open location {location} for saving; this should never happen!?"
+    )
+
 
 ### Implementation machinery for _load_databases
 
@@ -101,18 +103,19 @@ def _load_database(filename, traverse_yield_all, *, cache_size=None):
     plugin_fns = sourmash_plugins.get_load_from_functions()
 
     # aggregate with default load_from functions & sort by priority
-    load_from_functions = sorted(itertools.chain(_loader_functions,
-                                                 plugin_fns))
-                                                 
+    load_from_functions = sorted(itertools.chain(_loader_functions, plugin_fns))
+
     # iterate through loader functions, sorted by priority; try them all.
     # Catch ValueError & IndexNotLoaded but nothing else.
-    for (priority, desc, load_fn) in load_from_functions:
+    for priority, desc, load_fn in load_from_functions:
         db = None
         try:
-            debug_literal(f"_load_databases: trying loader fn - priority {priority} - '{desc}'")
-            db = load_fn(filename,
-                         traverse_yield_all=traverse_yield_all,
-                         cache_size=cache_size)
+            debug_literal(
+                f"_load_databases: trying loader fn - priority {priority} - '{desc}'"
+            )
+            db = load_fn(
+                filename, traverse_yield_all=traverse_yield_all, cache_size=cache_size
+            )
         except (ValueError, IndexNotLoaded):
             debug_literal(f"_load_databases: FAIL with ValueError: on fn {desc}.")
             debug_literal(traceback.format_exc())
@@ -126,16 +129,20 @@ def _load_database(filename, traverse_yield_all, *, cache_size=None):
     if loaded:
         assert db is not None
         return db
-    
+
     raise ValueError(f"Error while reading signatures from '{filename}'.")
 
 
 _loader_functions = []
+
+
 def add_loader(name, priority):
     "decorator to add name/priority to _loader_functions"
+
     def dec_priority(func):
         _loader_functions.append((priority, name, func))
         return func
+
     return dec_priority
 
 
@@ -143,10 +150,10 @@ def dec_priority(func):
 def _load_stdin(filename, **kwargs):
     "Load collection from .sig file streamed in via stdin"
     db = None
-    if filename == '-':
+    if filename == "-":
         # load as LinearIndex, then pass into MultiIndex to generate a
         # manifest.
-        lidx = LinearIndex.load(sys.stdin, filename='-')
+        lidx = LinearIndex.load(sys.stdin, filename="-")
         db = MultiIndex.load((lidx,), (None,), parent="-")
 
     return db
@@ -175,7 +182,7 @@ def _multiindex_load_from_pathlist(filename, **kwargs):
 @add_loader("load from path (file or directory)", 40)
 def _multiindex_load_from_path(filename, **kwargs):
     "Load collection from a directory."
-    traverse_yield_all = kwargs['traverse_yield_all']
+    traverse_yield_all = kwargs["traverse_yield_all"]
     db = MultiIndex.load_from_path(filename, traverse_yield_all)
 
     return db
@@ -184,7 +191,7 @@ def _multiindex_load_from_path(filename, **kwargs):
 @add_loader("load SBT", 60)
 def _load_sbt(filename, **kwargs):
     "Load collection from an SBT."
-    cache_size = kwargs.get('cache_size')
+    cache_size = kwargs.get("cache_size")
 
     try:
         db = load_sbt_index(filename, cache_size=cache_size)
@@ -210,11 +217,12 @@ def _load_sqlite_db(filename, **kwargs):
 def _load_zipfile(filename, **kwargs):
     "Load collection from a .zip file."
     db = None
-    if filename.endswith('.zip'):
-        traverse_yield_all = kwargs['traverse_yield_all']
+    if filename.endswith(".zip"):
+        traverse_yield_all = kwargs["traverse_yield_all"]
         try:
-            db = ZipFileLinearIndex.load(filename,
-                                         traverse_yield_all=traverse_yield_all)
+            db = ZipFileLinearIndex.load(
+                filename, traverse_yield_all=traverse_yield_all
+            )
         except FileNotFoundError as exc:
             # turn this into an IndexNotLoaded => proper exception handling by
             # _load_database.
@@ -236,13 +244,17 @@ def _error_on_fastaq(filename, **kwargs):
         pass
 
     if success:
-        raise Exception(f"Error while reading signatures from '{filename}' - got sequences instead! Is this a FASTA/FASTQ file?")
+        raise Exception(
+            f"Error while reading signatures from '{filename}' - got sequences instead! Is this a FASTA/FASTQ file?"
+        )
 
 
 ### Implementation machinery for SaveSignaturesToLocation
 
+
 class Base_SaveSignaturesToLocation:
     "Base signature saving class. Track location (if any) and count."
+
     def __init__(self, location):
         self.location = location
         self.count = 0
@@ -288,14 +300,14 @@ def _get_signatures_from_rust(siglist):
     # Rust supports multiple. For now, go through serializing
     # and deserializing the signature! See issue #1167 for more.
     json_str = sourmash.save_signatures(siglist)
-    for ss in sourmash.signature.load_signatures(json_str):
-        yield ss
+    yield from sourmash.signature.load_signatures(json_str)
 
 
 class SaveSignatures_NoOutput(Base_SaveSignaturesToLocation):
     "Do not save signatures."
+
     def __repr__(self):
-        return 'SaveSignatures_NoOutput()'
+        return "SaveSignatures_NoOutput()"
 
     @classmethod
     def matches(cls, location):
@@ -310,6 +322,7 @@ def close(self):
 
 class SaveSignatures_Directory(Base_SaveSignaturesToLocation):
     "Save signatures within a directory, using md5sum names."
+
     def __init__(self, location):
         super().__init__(location)
 
@@ -320,7 +333,7 @@ def __repr__(self):
     def matches(cls, location):
         "anything ending in /"
         if location:
-            return location.endswith('/')
+            return location.endswith("/")
 
     def close(self):
         pass
@@ -354,6 +367,7 @@ def add(self, ss):
 
 class SaveSignatures_SqliteIndex(Base_SaveSignaturesToLocation):
     "Save signatures within a directory, using md5sum names."
+
     def __init__(self, location):
         super().__init__(location)
         self.location = location
@@ -364,14 +378,14 @@ def __init__(self, location):
     def matches(cls, location):
         "anything ending in .sqldb"
         if location:
-            return location.endswith('.sqldb')
+            return location.endswith(".sqldb")
 
     def __repr__(self):
         return f"SaveSignatures_SqliteIndex('{self.location}')"
 
     def close(self):
         self.idx.commit()
-        self.cursor.execute('VACUUM')
+        self.cursor.execute("VACUUM")
         self.idx.close()
 
     def open(self):
@@ -390,11 +404,12 @@ def add(self, add_sig):
 
 class SaveSignatures_SigFile(Base_SaveSignaturesToLocation):
     "Save signatures to a .sig JSON file."
+
     def __init__(self, location):
         super().__init__(location)
         self.keep = []
         self.compress = 0
-        if self.location.endswith('.gz'):
+        if self.location.endswith(".gz"):
             self.compress = 1
 
     @classmethod
@@ -409,12 +424,12 @@ def open(self):
         pass
 
     def close(self):
-        if self.location == '-':
+        if self.location == "-":
             sourmash.save_signatures(self.keep, sys.stdout)
         else:
             # text mode? encode in utf-8
             mode = "w"
-            encoding = 'utf-8'
+            encoding = "utf-8"
 
             # compressed? bytes & binary.
             if self.compress:
@@ -422,8 +437,7 @@ def close(self):
                 mode = "wb"
 
             with open(self.location, mode, encoding=encoding) as fp:
-                sourmash.save_signatures(self.keep, fp,
-                                         compression=self.compress)
+                sourmash.save_signatures(self.keep, fp, compression=self.compress)
 
     def add(self, ss):
         super().add(ss)
@@ -432,6 +446,7 @@ def add(self, ss):
 
 class SaveSignatures_ZipFile(Base_SaveSignaturesToLocation):
     "Save compressed signatures in an uncompressed Zip file."
+
     def __init__(self, location):
         super().__init__(location)
         self.storage = None
@@ -440,7 +455,7 @@ def __init__(self, location):
     def matches(cls, location):
         "anything ending in .zip"
         if location:
-            return location.endswith('.zip')
+            return location.endswith(".zip")
 
     def __repr__(self):
         return f"SaveSignatures_ZipFile('{self.location}')"
@@ -454,8 +469,7 @@ def close(self):
         manifest.write_to_csv(manifest_fp, write_header=True)
         manifest_data = manifest_fp.getvalue().encode("utf-8")
 
-        self.storage.save(manifest_name, manifest_data, overwrite=True,
-                          compress=True)
+        self.storage.save(manifest_name, manifest_data, overwrite=True, compress=True)
         self.storage.flush()
         self.storage.close()
 
@@ -476,19 +490,21 @@ def open(self):
             raise ValueError(f"File '{self.location}' cannot be opened as a zip file.")
 
         if not storage.subdir:
-            storage.subdir = 'signatures'
+            storage.subdir = "signatures"
 
         # now, try to load manifest
         try:
-            manifest_data = storage.load('SOURMASH-MANIFEST.csv')
+            manifest_data = storage.load("SOURMASH-MANIFEST.csv")
         except (FileNotFoundError, KeyError):
             # if file already exists must have manifest...
             if not do_create:
-                raise ValueError(f"Cannot add to existing zipfile '{self.location}' without a manifest")
+                raise ValueError(
+                    f"Cannot add to existing zipfile '{self.location}' without a manifest"
+                )
             self.manifest_rows = []
         else:
             # success! decode manifest_data, create manifest rows => append.
-            manifest_data = manifest_data.decode('utf-8')
+            manifest_data = manifest_data.decode("utf-8")
             manifest_fp = StringIO(manifest_data)
             manifest = CollectionManifest.load_from_csv(manifest_fp)
             self.manifest_rows = list(manifest._select())
@@ -511,12 +527,13 @@ def add(self, add_sig):
             md5 = ss.md5sum()
 
             storage = self.storage
-            path = f'{storage.subdir}/{md5}.sig.gz'
+            path = f"{storage.subdir}/{md5}.sig.gz"
             location = storage.save(path, buf)
 
             # update manifest
-            row = CollectionManifest.make_manifest_row(ss, location,
-                                                       include_signature=False)
+            row = CollectionManifest.make_manifest_row(
+                ss, location, include_signature=False
+            )
             self.manifest_rows.append(row)
             super().add(ss)
 
diff --git a/src/sourmash/sbt.py b/src/sourmash/sbt.py
index 3ad36ebe1f..452ca29375 100644
--- a/src/sourmash/sbt.py
+++ b/src/sourmash/sbt.py
@@ -26,10 +26,10 @@
 from .nodegraph import Nodegraph, extract_nodegraph_info, calc_expected_collisions
 
 STORAGES = {
-    'FSStorage': FSStorage,
-    'IPFSStorage': IPFSStorage,
-    'RedisStorage': RedisStorage,
-    'ZipStorage': ZipStorage,
+    "FSStorage": FSStorage,
+    "IPFSStorage": IPFSStorage,
+    "RedisStorage": RedisStorage,
+    "ZipStorage": ZipStorage,
 }
 
 
@@ -103,7 +103,7 @@ def popitem(self):
             # we just need to select the maximum key/node id
             (key, _) = max(c for c in common if c[1] == count)
         except IndexError:
-            msg = '%s is empty' % self.__class__.__name__
+            msg = "%s is empty" % self.__class__.__name__
             raise KeyError(msg) from None
         else:
             value = self.pop(key)
@@ -136,6 +136,7 @@ class SBT(Index):
     We use two dicts to store the tree structure: One for the internal nodes,
     and another for the leaves (datasets).
     """
+
     is_database = True
 
     def __init__(self, factory, *, d=2, storage=None, cache_size=None):
@@ -162,6 +163,7 @@ def signatures(self):
             # if manifest, use it & load using direct path to storage.
             # this will be faster when using picklists.
             from .signature import load_one_signature
+
             manifest = self.manifest
 
             # iteratively select picklists; no other selection criteria
@@ -191,8 +193,16 @@ def _signatures_with_internal(self):
             ss = k.data
             yield ss, k._path
 
-    def select(self, ksize=None, moltype=None, num=0, scaled=0,
-               containment=False, abund=None, picklist=None):
+    def select(
+        self,
+        ksize=None,
+        moltype=None,
+        num=0,
+        scaled=0,
+        containment=False,
+        abund=None,
+        picklist=None,
+    ):
         """Make sure this database matches the requested requirements.
 
         Will always raise ValueError if a requirement cannot be met.
@@ -216,33 +226,45 @@ def select(self, ksize=None, moltype=None, num=0, scaled=0,
 
         # check ksize.
         if ksize is not None and db_mh.ksize != ksize:
-            raise ValueError(f"search ksize {ksize} is different from database ksize {db_mh.ksize}")
+            raise ValueError(
+                f"search ksize {ksize} is different from database ksize {db_mh.ksize}"
+            )
 
         # check moltype.
         if moltype is not None and db_mh.moltype != moltype:
-            raise ValueError(f"search moltype {moltype} is different from database moltype {db_mh.moltype}")
+            raise ValueError(
+                f"search moltype {moltype} is different from database moltype {db_mh.moltype}"
+            )
 
         # containment requires 'scaled'.
         if containment:
             if not scaled:
                 raise ValueError("'containment' requires 'scaled' in SBT.select'")
             if not db_mh.scaled:
-                raise ValueError("cannot search this SBT for containment; signatures are not calculated with scaled")
+                raise ValueError(
+                    "cannot search this SBT for containment; signatures are not calculated with scaled"
+                )
 
         # 'num' and 'scaled' do not mix.
         if num:
             if not db_mh.num:
-                raise ValueError(f"this database was created with 'scaled' MinHash sketches, not 'num'")
+                raise ValueError(
+                    "this database was created with 'scaled' MinHash sketches, not 'num'"
+                )
             if num != db_mh.num:
                 raise ValueError(f"num mismatch for SBT: num={num}, {db_mh.num}")
 
         if scaled:
             if not db_mh.scaled:
-                raise ValueError(f"this database was created with 'num' MinHash sketches, not 'scaled'")
+                raise ValueError(
+                    "this database was created with 'num' MinHash sketches, not 'scaled'"
+                )
 
             # we can downsample SBTs for containment operations.
             if scaled > db_mh.scaled and not containment:
-                raise ValueError(f"search scaled value {scaled} is less than database scaled value of {db_mh.scaled}")
+                raise ValueError(
+                    f"search scaled value {scaled} is less than database scaled value of {db_mh.scaled}"
+                )
 
         if abund:
             raise ValueError("SBT indices do not support sketches with abund=True")
@@ -269,9 +291,13 @@ def new_node_pos(self, node):
         next_internal_node = None
         if self.next_node <= min_leaf:
             for i in range(min_leaf):
-                if all((i not in self._nodes,
+                if all(
+                    (
+                        i not in self._nodes,
                         i not in self._leaves,
-                        i not in self._missing_nodes)):
+                        i not in self._missing_nodes,
+                    )
+                ):
                     next_internal_node = i
                     break
 
@@ -285,7 +311,7 @@ def new_node_pos(self, node):
     def insert(self, signature):
         "Add a new SourmashSignature in to the SBT."
         from .sbtmh import SigLeaf
-        
+
         leaf = SigLeaf(signature.md5sum(), signature)
         self.add_node(leaf)
 
@@ -315,19 +341,19 @@ def add_node(self, node):
             c1, c2 = self.children(p.pos)[:2]
 
             self._leaves[c1.pos] = p.node
-            self._leaves[c2.pos] = node 
+            self._leaves[c2.pos] = node
             del self._leaves[p.pos]
 
             for child in (p.node, node):
                 child.update(n)
         elif isinstance(p.node, Node):
-            self._leaves[pos] = node 
+            self._leaves[pos] = node
             node.update(p.node)
         elif p.node is None:
             n = Node(self.factory, name="internal." + str(p.pos))
             self._nodes[p.pos] = n
             c1 = self.children(p.pos)[0]
-            self._leaves[c1.pos] = node 
+            self._leaves[c1.pos] = node
             node.update(n)
         else:
             # this branch should never be reached; put guard in to make sure!
@@ -375,16 +401,15 @@ def _find_nodes(self, search_fn, *args, **kwargs):
 
                 # apply search fn. If return false, truncate search.
                 if search_fn(node_g, *args):
-
                     # leaf node? it's a match!
                     if isinstance(node_g, Leaf):
                         matches.append(node_g)
                     # internal node? descend.
                     elif isinstance(node_g, Node):
-                        if kwargs.get('dfs', True):  # defaults search to dfs
+                        if kwargs.get("dfs", True):  # defaults search to dfs
                             for c in self.children(node_p):
                                 queue.insert(0, c.pos)
-                        else: # bfs
+                        else:  # bfs
                             queue.extend(c.pos for c in self.children(node_p))
 
                 if unload_data:
@@ -423,8 +448,11 @@ def find(self, search_fn, query, **kwargs):
 
             # provide function to downsample leaf_node as well
             if scaled == tree_scaled:
-                downsample_node = lambda x: x
+
+                def downsample_node(x):
+                    return x
             else:
+
                 def downsample_node(node_mh):
                     return node_mh.downsample(scaled=scaled)
         else:
@@ -439,8 +467,11 @@ def downsample_node(node_mh):
 
             # provide function to downsample leaf nodes.
             if min_num == a_leaf.data.minhash.num:
-                downsample_node = lambda x: x
+
+                def downsample_node(x):
+                    return x
             else:
+
                 def downsample_node(node_mh):
                     return node_mh.downsample(num=min_num)
 
@@ -469,23 +500,22 @@ def node_search(node, *args, **kwargs):
             else:  # Node / Nodegraph by minhash comparison
                 # no downsampling needed --
                 shared_size = node.data.matches(query_mh)
-                subj_size = node.metadata.get('min_n_below', -1)
+                subj_size = node.metadata.get("min_n_below", -1)
                 if subj_size == -1:
-                    raise ValueError("ERROR: no min_n_below on this tree, cannot search.")
-                total_size = subj_size # approximate; do not collect
+                    raise ValueError(
+                        "ERROR: no min_n_below on this tree, cannot search."
+                    )
+                total_size = subj_size  # approximate; do not collect
 
             # calculate score (exact, if leaf; approximate, if not)
-            score = search_fn.score_fn(query_size,
-                                       shared_size,
-                                       subj_size,
-                                       total_size)
+            score = search_fn.score_fn(query_size, shared_size, subj_size, total_size)
 
             if search_fn.passes(score):
-                if is_leaf:     # terminal node? keep.
+                if is_leaf:  # terminal node? keep.
                     if search_fn.collect(score, node.data):
                         results[node.data] = score
                         return True
-                else:           # it's a good internal node, keep.
+                else:  # it's a good internal node, keep.
                     return True
 
             return False
@@ -514,7 +544,7 @@ def _rebuild_node(self, pos=0):
             # this node was already build, skip
             return
 
-        node = Node(self.factory, name="internal.{}".format(pos))
+        node = Node(self.factory, name=f"internal.{pos}")
         self._nodes[pos] = node
         for c in self.children(pos):
             if c.pos in self._missing_nodes or isinstance(c.node, Leaf):
@@ -614,8 +644,8 @@ def save(self, path, storage=None, sparseness=0.0, structure_only=False):
             full path to the new SBT description
         """
         info = {}
-        info['d'] = self.d
-        info['version'] = 6
+        info["d"] = self.d
+        info["version"] = 6
         info["index_type"] = self.__class__.__name__  # TODO: check
 
         # choose between ZipStorage and FS (file system/directory) storage.
@@ -623,22 +653,22 @@ def save(self, path, storage=None, sparseness=0.0, structure_only=False):
         kind = None
         if not path.endswith(".sbt.json"):
             kind = "Zip"
-            if not path.endswith('.sbt.zip'):
-                path += '.sbt.zip'
+            if not path.endswith(".sbt.zip"):
+                path += ".sbt.zip"
             storage = ZipStorage(path, mode="w")
             backend = "FSStorage"
 
-            assert path[-8:] == '.sbt.zip'
+            assert path[-8:] == ".sbt.zip"
             name = os.path.basename(path[:-8])
 
             # align the storage prefix with what we do for FSStorage, below.
-            subdir = '.sbt.{}'.format(name)
+            subdir = f".sbt.{name}"
             storage_args = FSStorage("", subdir, make_dirs=False).init_args()
             storage.save(subdir + "/", b"")
             storage.subdir = subdir
             index_filename = os.path.abspath(path)
-        else:                             # path.endswith('.sbt.json')
-            assert path.endswith('.sbt.json')
+        else:  # path.endswith('.sbt.json')
+            assert path.endswith(".sbt.json")
             name = os.path.basename(path)
             name = name[:-9]
             index_filename = os.path.abspath(path)
@@ -649,7 +679,7 @@ def save(self, path, storage=None, sparseness=0.0, structure_only=False):
                 location = os.path.dirname(index_filename)
 
                 # align subdir names with what we do above for ZipStorage
-                subdir = '.sbt.{}'.format(name)
+                subdir = f".sbt.{name}"
 
                 # when we go to default of FSStorage, use full location for
                 # storage, e.g. location/.sbt.{name}/
@@ -659,13 +689,10 @@ def save(self, path, storage=None, sparseness=0.0, structure_only=False):
             backend = [k for (k, v) in STORAGES.items() if v == type(storage)][0]
             storage_args = storage.init_args()
 
-        info['storage'] = {
-            'backend': backend,
-            'args': storage_args
-        }
-        info['factory'] = {
-            'class': GraphFactory.__name__,
-            'args': self.factory.init_args()
+        info["storage"] = {"backend": backend, "args": storage_args}
+        info["factory"] = {
+            "class": GraphFactory.__name__,
+            "args": self.factory.init_args(),
         }
 
         nodes = {}
@@ -685,16 +712,16 @@ def save(self, path, storage=None, sparseness=0.0, structure_only=False):
 
             data = {
                 # TODO: start using md5sum instead?
-                'filename': os.path.basename(node.name),
-                'name': node.name
+                "filename": os.path.basename(node.name),
+                "name": node.name,
             }
 
             try:
-                node.metadata.pop('max_n_below')
+                node.metadata.pop("max_n_below")
             except (AttributeError, KeyError):
                 pass
 
-            data['metadata'] = node.metadata
+            data["metadata"] = node.metadata
 
             if structure_only is False:
                 # trigger data loading before saving to the new place
@@ -703,27 +730,26 @@ def save(self, path, storage=None, sparseness=0.0, structure_only=False):
                 node.storage = storage
 
                 if kind == "Zip":
-                    new_name = node.save(os.path.join(subdir, data['filename']))
-                    assert new_name.startswith(subdir + '/')
+                    new_name = node.save(os.path.join(subdir, data["filename"]))
+                    assert new_name.startswith(subdir + "/")
 
                     # strip off prefix
-                    new_name = new_name[len(subdir) + 1:]
-                    data['filename'] = new_name
+                    new_name = new_name[len(subdir) + 1 :]
+                    data["filename"] = new_name
                 else:
-                    data['filename'] = node.save(data['filename'])
-
+                    data["filename"] = node.save(data["filename"])
 
             if isinstance(node, Node):
                 nodes[i] = data
             else:
                 leaves[i] = data
 
-                row = node.make_manifest_row(data['filename'])
+                row = node.make_manifest_row(data["filename"])
                 if row:
                     manifest_rows.append(row)
 
             if n % 100 == 0:
-                notify(f"{format(n+1)} of {format(total_nodes)} nodes saved", end='\r')
+                notify(f"{format(n+1)} of {format(total_nodes)} nodes saved", end="\r")
 
         # now, save the index file and manifests.
         #
@@ -736,8 +762,8 @@ def save(self, path, storage=None, sparseness=0.0, structure_only=False):
         # (CTB: manifests are not yet supported for Redis and IPFS)
         #
         notify("Finished saving nodes, now saving SBT index file.")
-        info['nodes'] = nodes
-        info['signatures'] = leaves
+        info["nodes"] = nodes
+        info["signatures"] = leaves
 
         # finish constructing manifest object & save
         manifest = CollectionManifest(manifest_rows)
@@ -749,39 +775,46 @@ def save(self, path, storage=None, sparseness=0.0, structure_only=False):
 
         if kind == "Zip":
             manifest_name = os.path.join(storage.subdir, manifest_name)
-            manifest_path = storage.save(manifest_name, manifest_data,
-                                         overwrite=True, compress=True)
+            manifest_path = storage.save(
+                manifest_name, manifest_data, overwrite=True, compress=True
+            )
         elif kind == "FS":
             manifest_name = manifest_name
-            manifest_path = storage.save(manifest_name, manifest_data,
-                                         overwrite=True)
+            manifest_path = storage.save(manifest_name, manifest_data, overwrite=True)
         else:
             manifest_path = None
 
         if manifest_path:
-            info['manifest_path'] = manifest_path
+            info["manifest_path"] = manifest_path
 
         # now, save index.
         tree_data = json.dumps(info).encode("utf-8")
 
         if kind == "Zip":
-            save_path = "{}.sbt.json".format(name)
+            save_path = f"{name}.sbt.json"
             storage.save(save_path, tree_data, overwrite=True)
             storage.flush()
         elif kind == "FS":
             storage.save(index_filename, tree_data, overwrite=True)
         else:
             # save tree locally.
-            with open(index_filename, 'wb') as tree_fp:
+            with open(index_filename, "wb") as tree_fp:
                 tree_fp.write(tree_data)
 
         notify(f"Finished saving SBT index, available at {format(index_filename)}\n")
 
         return path
 
-
     @classmethod
-    def load(cls, location, *, leaf_loader=None, storage=None, print_version_warning=True, cache_size=None):
+    def load(
+        cls,
+        location,
+        *,
+        leaf_loader=None,
+        storage=None,
+        print_version_warning=True,
+        cache_size=None,
+    ):
         """Load an SBT description from a file.
 
         Parameters
@@ -807,8 +840,8 @@ def load(cls, location, *, leaf_loader=None, storage=None, print_version_warning
             if ZipStorage.can_open(location):
                 storage = ZipStorage(location)
             else:
-                if not location.endswith('.sbt.zip'):
-                    location2 = location + '.sbt.zip'
+                if not location.endswith(".sbt.zip"):
+                    location2 = location + ".sbt.zip"
                     if ZipStorage.can_open(location2):
                         storage = ZipStorage(location2)
 
@@ -828,12 +861,12 @@ def load(cls, location, *, leaf_loader=None, storage=None, print_version_warning
         if sbt_name is None:
             dirname = os.path.dirname(os.path.abspath(location))
             sbt_name = os.path.basename(location)
-            if sbt_name.endswith('.sbt.json'):
+            if sbt_name.endswith(".sbt.json"):
                 sbt_name = sbt_name[:-9]
 
         sbt_fn = os.path.join(dirname, sbt_name)
-        if not sbt_fn.endswith('.sbt.json') and tempfile is None:
-            sbt_fn += '.sbt.json'
+        if not sbt_fn.endswith(".sbt.json") and tempfile is None:
+            sbt_fn += ".sbt.json"
 
         try:
             with open(sbt_fn) as fp:
@@ -846,7 +879,7 @@ def load(cls, location, *, leaf_loader=None, storage=None, print_version_warning
 
         version = 1
         if isinstance(jnodes, Mapping):
-            version = jnodes['version']
+            version = jnodes["version"]
 
         if leaf_loader is None:
             leaf_loader = Leaf.load
@@ -865,26 +898,33 @@ def load(cls, location, *, leaf_loader=None, storage=None, print_version_warning
         except KeyError:
             raise IndexNotSupported()
 
-        #if version >= 6:
+        # if version >= 6:
         #    if jnodes.get("index_type", "SBT") == "LocalizedSBT":
         #        loaders[6] = LocalizedSBT._load_v6
 
         if version < 3 and storage is None:
-            storage = FSStorage(dirname, '.sbt.{}'.format(sbt_name))
+            storage = FSStorage(dirname, f".sbt.{sbt_name}")
         elif storage is None:
-            klass = STORAGES[jnodes['storage']['backend']]
-            if jnodes['storage']['backend'] == "FSStorage":
-                storage = FSStorage(dirname, jnodes['storage']['args']['path'])
+            klass = STORAGES[jnodes["storage"]["backend"]]
+            if jnodes["storage"]["backend"] == "FSStorage":
+                storage = FSStorage(dirname, jnodes["storage"]["args"]["path"])
             elif storage is None:
-                storage = klass(**jnodes['storage']['args'])
-
-        obj = loader(jnodes, leaf_loader, dirname, storage, print_version_warning=print_version_warning, cache_size=cache_size)
+                storage = klass(**jnodes["storage"]["args"])
+
+        obj = loader(
+            jnodes,
+            leaf_loader,
+            dirname,
+            storage,
+            print_version_warning=print_version_warning,
+            cache_size=cache_size,
+        )
         obj._location = location
 
-        if 'manifest_path' in jnodes:
-            manifest_path = jnodes['manifest_path']
+        if "manifest_path" in jnodes:
+            manifest_path = jnodes["manifest_path"]
             manifest_data = storage.load(manifest_path)
-            manifest_data = manifest_data.decode('utf-8')
+            manifest_data = manifest_data.decode("utf-8")
             manifest_fp = StringIO(manifest_data)
             obj.manifest = CollectionManifest.load_from_csv(manifest_fp)
         else:
@@ -893,15 +933,22 @@ def load(cls, location, *, leaf_loader=None, storage=None, print_version_warning
         return obj
 
     @staticmethod
-    def _load_v1(jnodes, leaf_loader, dirname, storage, *, print_version_warning=True, cache_size=None):
-
+    def _load_v1(
+        jnodes,
+        leaf_loader,
+        dirname,
+        storage,
+        *,
+        print_version_warning=True,
+        cache_size=None,
+    ):
         if jnodes[0] is None:
             raise ValueError("Empty tree!")
 
         sbt_nodes = {}
         sbt_leaves = {}
 
-        sample_bf = os.path.join(dirname, jnodes[0]['filename'])
+        sample_bf = os.path.join(dirname, jnodes[0]["filename"])
         ksize, tablesize, ntables = extract_nodegraph_info(sample_bf)[:3]
         factory = GraphFactory(ksize, tablesize, ntables)
 
@@ -909,10 +956,10 @@ def _load_v1(jnodes, leaf_loader, dirname, storage, *, print_version_warning=Tru
             if jnode is None:
                 continue
 
-            jnode['filename'] = os.path.join(dirname, jnode['filename'])
+            jnode["filename"] = os.path.join(dirname, jnode["filename"])
 
-            if 'internal' in jnode['name']:
-                jnode['factory'] = factory
+            if "internal" in jnode["name"]:
+                jnode["factory"] = factory
                 sbt_node = Node.load(jnode, storage)
                 sbt_nodes[i] = sbt_node
             else:
@@ -926,8 +973,17 @@ def _load_v1(jnodes, leaf_loader, dirname, storage, *, print_version_warning=Tru
         return tree
 
     @classmethod
-    def _load_v2(cls, info, leaf_loader, dirname, storage, *, print_version_warning=True, cache_size=None):
-        nodes = {int(k): v for (k, v) in info['nodes'].items()}
+    def _load_v2(
+        cls,
+        info,
+        leaf_loader,
+        dirname,
+        storage,
+        *,
+        print_version_warning=True,
+        cache_size=None,
+    ):
+        nodes = {int(k): v for (k, v) in info["nodes"].items()}
 
         if nodes[0] is None:
             raise ValueError("Empty tree!")
@@ -935,7 +991,7 @@ def _load_v2(cls, info, leaf_loader, dirname, storage, *, print_version_warning=
         sbt_nodes = {}
         sbt_leaves = {}
 
-        sample_bf = os.path.join(dirname, nodes[0]['filename'])
+        sample_bf = os.path.join(dirname, nodes[0]["filename"])
         k, size, ntables = extract_nodegraph_info(sample_bf)[:3]
         factory = GraphFactory(k, size, ntables)
 
@@ -943,25 +999,34 @@ def _load_v2(cls, info, leaf_loader, dirname, storage, *, print_version_warning=
             if node is None:
                 continue
 
-            node['filename'] = os.path.join(dirname, node['filename'])
+            node["filename"] = os.path.join(dirname, node["filename"])
 
-            if 'internal' in node['name']:
-                node['factory'] = factory
+            if "internal" in node["name"]:
+                node["factory"] = factory
                 sbt_node = Node.load(node, storage)
                 sbt_nodes[k] = sbt_node
             else:
                 sbt_node = leaf_loader(node, storage)
                 sbt_leaves[k] = sbt_node
 
-        tree = cls(factory, d=info['d'], cache_size=cache_size)
+        tree = cls(factory, d=info["d"], cache_size=cache_size)
         tree._nodes = sbt_nodes
         tree._leaves = sbt_leaves
 
         return tree
 
     @classmethod
-    def _load_v3(cls, info, leaf_loader, dirname, storage, *, print_version_warning=True, cache_size=None):
-        nodes = {int(k): v for (k, v) in info['nodes'].items()}
+    def _load_v3(
+        cls,
+        info,
+        leaf_loader,
+        dirname,
+        storage,
+        *,
+        print_version_warning=True,
+        cache_size=None,
+    ):
+        nodes = {int(k): v for (k, v) in info["nodes"].items()}
 
         if not nodes:
             raise ValueError("Empty tree!")
@@ -969,15 +1034,15 @@ def _load_v3(cls, info, leaf_loader, dirname, storage, *, print_version_warning=
         sbt_nodes = {}
         sbt_leaves = {}
 
-        factory = GraphFactory(*info['factory']['args'])
+        factory = GraphFactory(*info["factory"]["args"])
 
         max_node = 0
         for k, node in nodes.items():
             if node is None:
                 continue
 
-            if 'internal' in node['name']:
-                node['factory'] = factory
+            if "internal" in node["name"]:
+                node["factory"] = factory
                 sbt_node = Node.load(node, storage)
                 sbt_nodes[k] = sbt_node
             else:
@@ -986,23 +1051,37 @@ def _load_v3(cls, info, leaf_loader, dirname, storage, *, print_version_warning=
 
             max_node = max(max_node, k)
 
-        tree = cls(factory, d=info['d'], storage=storage, cache_size=cache_size)
+        tree = cls(factory, d=info["d"], storage=storage, cache_size=cache_size)
         tree._nodes = sbt_nodes
         tree._leaves = sbt_leaves
-        tree._missing_nodes = {i for i in range(max_node)
-                              if i not in sbt_nodes and i not in sbt_leaves}
+        tree._missing_nodes = {
+            i for i in range(max_node) if i not in sbt_nodes and i not in sbt_leaves
+        }
 
         if print_version_warning:
-            error("WARNING: this is an old index version, please run `sourmash migrate` to update it.")
-            error("WARNING: proceeding with execution, but it will take longer to finish!")
+            error(
+                "WARNING: this is an old index version, please run `sourmash migrate` to update it."
+            )
+            error(
+                "WARNING: proceeding with execution, but it will take longer to finish!"
+            )
 
         tree._fill_min_n_below()
 
         return tree
 
     @classmethod
-    def _load_v4(cls, info, leaf_loader, dirname, storage, *, print_version_warning=True, cache_size=None):
-        nodes = {int(k): v for (k, v) in info['nodes'].items()}
+    def _load_v4(
+        cls,
+        info,
+        leaf_loader,
+        dirname,
+        storage,
+        *,
+        print_version_warning=True,
+        cache_size=None,
+    ):
+        nodes = {int(k): v for (k, v) in info["nodes"].items()}
 
         if not nodes:
             raise ValueError("Empty tree!")
@@ -1010,12 +1089,12 @@ def _load_v4(cls, info, leaf_loader, dirname, storage, *, print_version_warning=
         sbt_nodes = {}
         sbt_leaves = {}
 
-        factory = GraphFactory(*info['factory']['args'])
+        factory = GraphFactory(*info["factory"]["args"])
 
         max_node = 0
         for k, node in nodes.items():
-            if 'internal' in node['name']:
-                node['factory'] = factory
+            if "internal" in node["name"]:
+                node["factory"] = factory
                 sbt_node = Node.load(node, storage)
                 sbt_nodes[k] = sbt_node
             else:
@@ -1024,20 +1103,30 @@ def _load_v4(cls, info, leaf_loader, dirname, storage, *, print_version_warning=
 
             max_node = max(max_node, k)
 
-        tree = cls(factory, d=info['d'], storage=storage, cache_size=cache_size)
+        tree = cls(factory, d=info["d"], storage=storage, cache_size=cache_size)
         tree._nodes = sbt_nodes
         tree._leaves = sbt_leaves
-        tree._missing_nodes = {i for i in range(max_node)
-                              if i not in sbt_nodes and i not in sbt_leaves}
+        tree._missing_nodes = {
+            i for i in range(max_node) if i not in sbt_nodes and i not in sbt_leaves
+        }
 
         tree.next_node = max_node
 
         return tree
 
     @classmethod
-    def _load_v5(cls, info, leaf_loader, dirname, storage, *, print_version_warning=True, cache_size=None):
-        nodes = {int(k): v for (k, v) in info['nodes'].items()}
-        leaves = {int(k): v for (k, v) in info['leaves'].items()}
+    def _load_v5(
+        cls,
+        info,
+        leaf_loader,
+        dirname,
+        storage,
+        *,
+        print_version_warning=True,
+        cache_size=None,
+    ):
+        nodes = {int(k): v for (k, v) in info["nodes"].items()}
+        leaves = {int(k): v for (k, v) in info["leaves"].items()}
 
         if not leaves:
             raise ValueError("Empty tree!")
@@ -1046,17 +1135,17 @@ def _load_v5(cls, info, leaf_loader, dirname, storage, *, print_version_warning=
         sbt_leaves = {}
 
         if storage is None:
-            klass = STORAGES[info['storage']['backend']]
-            if info['storage']['backend'] == "FSStorage":
-                storage = FSStorage(dirname, info['storage']['args']['path'])
+            klass = STORAGES[info["storage"]["backend"]]
+            if info["storage"]["backend"] == "FSStorage":
+                storage = FSStorage(dirname, info["storage"]["args"]["path"])
             elif storage is None:
-                storage = klass(**info['storage']['args'])
+                storage = klass(**info["storage"]["args"])
 
-        factory = GraphFactory(*info['factory']['args'])
+        factory = GraphFactory(*info["factory"]["args"])
 
         max_node = 0
         for k, node in nodes.items():
-            node['factory'] = factory
+            node["factory"] = factory
             sbt_node = Node.load(node, storage)
 
             sbt_nodes[k] = sbt_node
@@ -1067,18 +1156,28 @@ def _load_v5(cls, info, leaf_loader, dirname, storage, *, print_version_warning=
             sbt_leaves[k] = sbt_leaf
             max_node = max(max_node, k)
 
-        tree = cls(factory, d=info['d'], storage=storage, cache_size=cache_size)
+        tree = cls(factory, d=info["d"], storage=storage, cache_size=cache_size)
         tree._nodes = sbt_nodes
         tree._leaves = sbt_leaves
-        tree._missing_nodes = {i for i in range(max_node)
-                              if i not in sbt_nodes and i not in sbt_leaves}
+        tree._missing_nodes = {
+            i for i in range(max_node) if i not in sbt_nodes and i not in sbt_leaves
+        }
 
         return tree
 
     @classmethod
-    def _load_v6(cls, info, leaf_loader, dirname, storage, *, print_version_warning=True, cache_size=None):
-        nodes = {int(k): v for (k, v) in info['nodes'].items()}
-        leaves = {int(k): v for (k, v) in info['signatures'].items()}
+    def _load_v6(
+        cls,
+        info,
+        leaf_loader,
+        dirname,
+        storage,
+        *,
+        print_version_warning=True,
+        cache_size=None,
+    ):
+        nodes = {int(k): v for (k, v) in info["nodes"].items()}
+        leaves = {int(k): v for (k, v) in info["signatures"].items()}
 
         if not leaves:
             raise ValueError("Empty tree!")
@@ -1087,17 +1186,17 @@ def _load_v6(cls, info, leaf_loader, dirname, storage, *, print_version_warning=
         sbt_leaves = {}
 
         if storage is None:
-            klass = STORAGES[info['storage']['backend']]
-            if info['storage']['backend'] == "FSStorage":
-                storage = FSStorage(dirname, info['storage']['args']['path'])
+            klass = STORAGES[info["storage"]["backend"]]
+            if info["storage"]["backend"] == "FSStorage":
+                storage = FSStorage(dirname, info["storage"]["args"]["path"])
             elif storage is None:
-                storage = klass(**info['storage']['args'])
+                storage = klass(**info["storage"]["args"])
 
-        factory = GraphFactory(*info['factory']['args'])
+        factory = GraphFactory(*info["factory"]["args"])
 
         max_node = 0
         for k, node in nodes.items():
-            node['factory'] = factory
+            node["factory"] = factory
             sbt_node = Node.load(node, storage)
 
             sbt_nodes[k] = sbt_node
@@ -1108,11 +1207,12 @@ def _load_v6(cls, info, leaf_loader, dirname, storage, *, print_version_warning=
             sbt_leaves[k] = sbt_leaf
             max_node = max(max_node, k)
 
-        tree = cls(factory, d=info['d'], storage=storage, cache_size=cache_size)
+        tree = cls(factory, d=info["d"], storage=storage, cache_size=cache_size)
         tree._nodes = sbt_nodes
         tree._leaves = sbt_leaves
-        tree._missing_nodes = {i for i in range(max_node)
-                              if i not in sbt_nodes and i not in sbt_leaves}
+        tree._missing_nodes = {
+            i for i in range(max_node) if i not in sbt_nodes and i not in sbt_leaves
+        }
 
         return tree
 
@@ -1121,31 +1221,31 @@ def _fill_min_n_below(self):
         Propagate the smallest hash size below each node up the tree from
         the leaves.
         """
+
         def fill_min_n_below(node, *args, **kwargs):
-            original_min_n_below = node.metadata.get('min_n_below', sys.maxsize)
+            original_min_n_below = node.metadata.get("min_n_below", sys.maxsize)
             min_n_below = original_min_n_below
 
-            children = kwargs['children']
+            children = kwargs["children"]
             for child in children:
                 if child.node is not None:
                     if isinstance(child.node, Leaf):
                         min_n_below = min(len(child.node.data.minhash), min_n_below)
                     else:
-                        child_n = child.node.metadata.get('min_n_below', sys.maxsize)
+                        child_n = child.node.metadata.get("min_n_below", sys.maxsize)
                         min_n_below = min(child_n, min_n_below)
 
             if min_n_below == 0:
                 min_n_below = 1
 
-            node.metadata['min_n_below'] = min_n_below
+            node.metadata["min_n_below"] = min_n_below
             return original_min_n_below != min_n_below
 
         self._fill_up(fill_min_n_below)
 
     def _fill_internal(self):
-
         def fill_nodegraphs(node, *args, **kwargs):
-            children = kwargs['children']
+            children = kwargs["children"]
             for child in children:
                 if child.node is not None:
                     child.node.update(node)
@@ -1191,28 +1291,29 @@ def _fill_up(self, search_fn, *args, **kwargs):
 
             processed += 1
             if processed % 100 == 0:
-                debug("processed {}, in queue {}", processed, len(queue), sep='\r')
+                debug("processed {}, in queue {}", processed, len(queue), sep="\r")
 
     def __len__(self):
         return len(self._leaves)
 
     def print_dot(self):
-        print("""
+        print(
+            """
         digraph G {
         nodesep=0.3;
         ranksep=0.2;
         margin=0.1;
         node [shape=ellipse];
         edge [arrowsize=0.8];
-        """)
+        """
+        )
 
         for i, node in self._nodes.items():
             if isinstance(node, Node):
-                print('"{}" [shape=box fillcolor=gray style=filled]'.format(
-                      node.name))
+                print(f'"{node.name}" [shape=box fillcolor=gray style=filled]')
                 for j, child in self.children(i):
                     if child is not None:
-                        print('"{}" -> "{}"'.format(node.name, child.name))
+                        print(f'"{node.name}" -> "{child.name}"')
         print("}")
 
     def print(self):
@@ -1225,8 +1326,9 @@ def print(self):
                 depth = int(math.floor(math.log(node_p + 1, self.d)))
                 print(" " * 4 * depth, node_g)
                 if isinstance(node_g, Node):
-                    stack.extend(c.pos for c in self.children(node_p)
-                                       if c.pos not in visited)
+                    stack.extend(
+                        c.pos for c in self.children(node_p) if c.pos not in visited
+                    )
 
     def __iter__(self):
         for i, node in self._nodes.items():
@@ -1274,14 +1376,14 @@ def combine(self, other):
                 for pos in range(n_previous, n_next):
                     if tree._nodes.get(pos, None) is not None:
                         new_node = copy(tree._nodes[pos])
-                        new_node.name = "internal.{}".format(current_pos)
+                        new_node.name = f"internal.{current_pos}"
                         new_nodes[current_pos] = new_node
                     elif tree._leaves.get(pos, None) is not None:
                         new_node = copy(tree._leaves[pos])
                         new_leaves[current_pos] = new_node
                     current_pos += 1
             n_previous = n_next
-            n_next = n_previous + int(self.d ** level)
+            n_next = n_previous + int(self.d**level)
             current_pos = n_next
 
         # TODO: do we want to return a new tree, or merge into this one?
@@ -1299,12 +1401,14 @@ def __init__(self, factory, name=None, path=None, storage=None):
         self._factory = factory
         self._data = None
         self._path = path
-        self.metadata = dict()
+        self.metadata = {}
 
     def __str__(self):
-        return '*Node:{name} [occupied: {nb}, fpr: {fpr:.2}]'.format(
-                name=self.name, nb=self.data.n_occupied(),
-                fpr=calc_expected_collisions(self.data, True, 1.1))
+        return "*Node:{name} [occupied: {nb}, fpr: {fpr:.2}]".format(
+            name=self.name,
+            nb=self.data.n_occupied(),
+            fpr=calc_expected_collisions(self.data, True, 1.1),
+        )
 
     def save(self, path):
         buf = self.data.to_bytes(compression=1)
@@ -1332,21 +1436,22 @@ def unload(self):
 
     @staticmethod
     def load(info, storage=None):
-        new_node = Node(info['factory'],
-                        name=info['name'],
-                        path=info['filename'],
-                        storage=storage)
-        new_node.metadata = info.get('metadata', {})
+        new_node = Node(
+            info["factory"], name=info["name"], path=info["filename"], storage=storage
+        )
+        new_node.metadata = info.get("metadata", {})
         return new_node
 
     def update(self, parent):
         parent.data.update(self.data)
-        if 'min_n_below' in self.metadata:
-            min_n_below = min(parent.metadata.get('min_n_below', sys.maxsize),
-                              self.metadata.get('min_n_below'))
+        if "min_n_below" in self.metadata:
+            min_n_below = min(
+                parent.metadata.get("min_n_below", sys.maxsize),
+                self.metadata.get("min_n_below"),
+            )
             if min_n_below == 0:
                 min_n_below = 1
-            parent.metadata['min_n_below'] = min_n_below
+            parent.metadata["min_n_below"] = min_n_below
 
 
 class Leaf:
@@ -1363,10 +1468,12 @@ def __init__(self, metadata, data=None, name=None, storage=None, path=None):
         self._path = path
 
     def __str__(self):
-        return '**Leaf:{name} [occupied: {nb}, fpr: {fpr:.2}] -> {metadata}'.format(
-                name=self.name, metadata=self.metadata,
-                nb=self.data.n_occupied(),
-                fpr=calc_expected_collisions(self.data, True, 1.1))
+        return "**Leaf:{name} [occupied: {nb}, fpr: {fpr:.2}] -> {metadata}".format(
+            name=self.name,
+            metadata=self.metadata,
+            nb=self.data.n_occupied(),
+            fpr=calc_expected_collisions(self.data, True, 1.1),
+        )
 
     def make_manifest_row(self, location):
         return None
@@ -1397,10 +1504,9 @@ def update(self, parent):
 
     @classmethod
     def load(cls, info, storage=None):
-        return cls(info['metadata'],
-                   name=info['name'],
-                   path=info['filename'],
-                   storage=storage)
+        return cls(
+            info["metadata"], name=info["name"], path=info["filename"], storage=storage
+        )
 
 
 def filter_distance(filter_a, filter_b, n=1000):
@@ -1428,9 +1534,15 @@ def filter_distance(filter_a, filter_b, n=1000):
         a = array(q, copy=False)
         b = array(p, copy=False)
         for i in map(lambda x: randint(0, len(a)), range(n)):
-            distance += sum(map(int,
-                            [not bool((a[i] >> j) & 1) ^ bool((b[i] >> j) & 1)
-                             for j in range(8)]))
+            distance += sum(
+                map(
+                    int,
+                    [
+                        not bool((a[i] >> j) & 1) ^ bool((b[i] >> j) & 1)
+                        for j in range(8)
+                    ],
+                )
+            )
     return distance / (8.0 * len(A) * n)
 
 
@@ -1438,41 +1550,41 @@ def convert_cmd(name, backend):
     "Convert an SBT to use a different back end."
     from .sbtmh import SigLeaf
 
-    options = backend.split('(')
+    options = backend.split("(")
     backend = options.pop(0)
     backend = backend.lower().strip("'")
 
     kwargs = {}
 
     if options:
-      print(options)
-      options = options[0].split(')')
-      options = [options.pop(0)]
-      #options = {}
+        print(options)
+        options = options[0].split(")")
+        options = [options.pop(0)]
+        # options = {}
     else:
-      options = []
+        options = []
 
-    if backend.lower() in ('ipfs', 'ipfsstorage'):
+    if backend.lower() in ("ipfs", "ipfsstorage"):
         backend = IPFSStorage
-    elif backend.lower() in ('redis', 'redisstorage'):
+    elif backend.lower() in ("redis", "redisstorage"):
         backend = RedisStorage
-    elif backend.lower() in ('zip', 'zipstorage'):
+    elif backend.lower() in ("zip", "zipstorage"):
         backend = ZipStorage
-        kwargs['mode'] = 'w'
-    elif backend.lower() in ('fs', 'fsstorage'):
+        kwargs["mode"] = "w"
+    elif backend.lower() in ("fs", "fsstorage"):
         backend = FSStorage
         if options:
             options = [os.path.dirname(options[0]), os.path.basename(options[0])]
         else:
             # this is the default for SBT v2
-            tag = '.sbt.' + os.path.basename(name)
-            if tag.endswith('.sbt.json'):
+            tag = ".sbt." + os.path.basename(name)
+            if tag.endswith(".sbt.json"):
                 tag = tag[:-9]
             path = os.path.dirname(name)
             options = [path, tag]
 
     else:
-        error('backend not recognized: {}'.format(backend))
+        error(f"backend not recognized: {backend}")
 
     with backend(*options, **kwargs) as storage:
         sbt = SBT.load(name, leaf_loader=SigLeaf.load)
diff --git a/src/sourmash/sbt_storage.py b/src/sourmash/sbt_storage.py
index 42a4fceaa6..1b7a9e7d78 100644
--- a/src/sourmash/sbt_storage.py
+++ b/src/sourmash/sbt_storage.py
@@ -15,7 +15,6 @@
 
 
 class Storage(ABC):
-
     @abc.abstractmethod
     def save(self, path, content, *, overwrite=False):
         pass
@@ -44,7 +43,6 @@ def can_open(self, location):
 
 
 class FSStorage(Storage):
-
     def __init__(self, location, subdir, make_dirs=True):
         self.location = location
         self.subdir = subdir
@@ -55,7 +53,7 @@ def __init__(self, location, subdir, make_dirs=True):
                 os.makedirs(fullpath)
 
     def init_args(self):
-        return {'path': self.subdir}
+        return {"path": self.subdir}
 
     def save(self, path, content, overwrite=False):
         "Save a node/leaf."
@@ -64,27 +62,27 @@ def save(self, path, content, overwrite=False):
 
         if os.path.exists(fullpath):
             # check for content, if same return path,
-            with open(fullpath, 'rb') as f:
+            with open(fullpath, "rb") as f:
                 old_content = f.read()
                 if old_content == content:
                     return path
 
             if overwrite:
-                pass            #  fine to overwrite file!
+                pass  #  fine to overwrite file!
             else:
                 # different content, need to find new path to save
                 newpath = None
                 n = 0
                 while newpath is None:
-                    testpath = "{}_{}".format(fullpath, n)
+                    testpath = f"{fullpath}_{n}"
                     if os.path.exists(testpath):
                         n += 1
                     else:
                         # testpath is available, use it as newpath
-                        newpath = "{}_{}".format(path, n)
+                        newpath = f"{path}_{n}"
 
         fullpath = os.path.join(self.location, self.subdir, newpath)
-        with open(fullpath, 'wb') as f:
+        with open(fullpath, "wb") as f:
             f.write(content)
 
         return newpath
@@ -95,7 +93,6 @@ def load(self, path):
 
 
 class ZipStorage(RustObject, Storage):
-
     __dealloc_func__ = lib.zipstorage_free
 
     def __init__(self, path, *, mode="r"):
@@ -146,7 +143,9 @@ def _filenames(self):
 
     def save(self, path, content, *, overwrite=False, compress=False):
         if self.__inner:
-            return self.__inner.save(path, content, overwrite=overwrite, compress=compress)
+            return self.__inner.save(
+                path, content, overwrite=overwrite, compress=compress
+            )
         raise NotImplementedError()
 
     def load(self, path):
@@ -155,7 +154,9 @@ def load(self, path):
 
         try:
             size = ffi.new("uintptr_t *")
-            rawbuf = self._methodcall(lib.zipstorage_load, to_bytes(path), len(path), size)
+            rawbuf = self._methodcall(
+                lib.zipstorage_load, to_bytes(path), len(path), size
+            )
             size = size[0]
 
             rawbuf = ffi.gc(rawbuf, lambda o: lib.nodegraph_buffer_free(o, size), size)
@@ -182,7 +183,7 @@ def list_sbts(self):
         return paths
 
     def init_args(self):
-        return {'path': self.path}
+        return {"path": self.path}
 
     def flush(self):
         if self.__inner:
@@ -198,7 +199,6 @@ def can_open(location):
 
 
 class _RwZipStorage(Storage):
-
     def __init__(self, path):
         self.path = os.path.abspath(path)
 
@@ -212,14 +212,15 @@ def __init__(self, path):
         # so we need to check some things:
         if not os.path.exists(self.path):
             # If the file doesn't exist open it in write mode.
-            self.zipfile = zipfile.ZipFile(path, mode='w',
-                                           compression=zipfile.ZIP_STORED)
+            self.zipfile = zipfile.ZipFile(
+                path, mode="w", compression=zipfile.ZIP_STORED
+            )
         else:
             # If it exists, open it in read mode and prepare a buffer for
             # new/duplicated items. During close() there are checks to see
             # how the original file needs to be updated (append new items,
             # deal with duplicates, and so on)
-            self.zipfile = zipfile.ZipFile(path, 'r')
+            self.zipfile = zipfile.ZipFile(path, "r")
             self.bufferzip = zipfile.ZipFile(BytesIO(), mode="w")
 
         self.subdir = ""
@@ -250,7 +251,7 @@ def _generate_filename(self, zf, path, content):
         newpath = None
         n = 0
         while newpath is None:
-            testpath = "{}_{}".format(path, n)
+            testpath = f"{path}_{n}"
             try:
                 matches = self._content_matches(zf, testpath, content)
                 if matches:
@@ -260,7 +261,7 @@ def _generate_filename(self, zf, path, content):
             except KeyError:
                 return testpath, True
 
-        assert 0 # should never get here!
+        assert 0  # should never get here!
 
     def _write_to_zf(self, zf, path, content, *, compress=False):
         compress_type = zipfile.ZIP_STORED
@@ -272,9 +273,9 @@ def _write_to_zf(self, zf, path, content, *, compress=False):
 
         # set permissions
         zi = zf.getinfo(path)
-        perms = 0o444 << 16     # give a+r access
-        if path.endswith('/'):
-            perms = 0o755 << 16 # directories get u+rwx, a+rx
+        perms = 0o444 << 16  # give a+r access
+        if path.endswith("/"):
+            perms = 0o755 << 16  # directories get u+rwx, a+rx
         zi.external_attr = perms
 
     def save(self, path, content, *, overwrite=False, compress=False):
@@ -287,15 +288,15 @@ def save(self, path, content, *, overwrite=False, compress=False):
             newpath, do_write = self._generate_filename(self.zipfile, path, content)
         if do_write:
             try:
-                self._write_to_zf(self.zipfile, newpath, content,
-                                  compress=compress)
+                self._write_to_zf(self.zipfile, newpath, content, compress=compress)
             except (ValueError, RuntimeError):
                 # Can't write in the zipfile, write in buffer instead
                 # CTB: do we need to generate a new filename wrt to the
                 # bufferzip, too? Not sure this code is working as intended...
                 if self.bufferzip:
-                    self._write_to_zf(self.bufferzip, newpath, content,
-                                      compress=compress)
+                    self._write_to_zf(
+                        self.bufferzip, newpath, content, compress=compress
+                    )
                 else:
                     # Throw error, can't write the data
                     raise ValueError("can't write data")
@@ -326,7 +327,7 @@ def close(self):
 
         # might not have self.zipfile if was invalid zipfile and __init__
         # failed.
-        if hasattr(self, 'zipfile'):
+        if hasattr(self, "zipfile"):
             if self.zipfile is not None or self.bufferzip is not None:
                 self.flush(keep_closed=True)
                 self.zipfile.close()
@@ -341,8 +342,9 @@ def flush(self, *, keep_closed=False):
             if self.zipfile is not None:
                 self.zipfile.close()
                 if not keep_closed:
-                    self.zipfile = zipfile.ZipFile(self.path, mode='a',
-                                                   compression=zipfile.ZIP_STORED)
+                    self.zipfile = zipfile.ZipFile(
+                        self.path, mode="a", compression=zipfile.ZIP_STORED
+                    )
         else:
             # The complicated one. Need to consider:
             # - Is there data in the buffer?
@@ -367,7 +369,9 @@ def flush(self, *, keep_closed=False):
                         if item in duplicated or item in buffer_names:
                             # we prioritize writing data from the buffer to the
                             # final file
-                            self._write_to_zf(final_file, item, self.bufferzip.read(item))
+                            self._write_to_zf(
+                                final_file, item, self.bufferzip.read(item)
+                            )
                         else:
                             # it is only in the zipfile, so write from it
                             self._write_to_zf(final_file, item, self.zipfile.read(item))
@@ -379,8 +383,9 @@ def flush(self, *, keep_closed=False):
                     os.unlink(self.path)
                     shutil.move(tempfile.name, self.path)
                     if not keep_closed:
-                        self.zipfile = zipfile.ZipFile(self.path, mode='a',
-                                                       compression=zipfile.ZIP_STORED)
+                        self.zipfile = zipfile.ZipFile(
+                            self.path, mode="a", compression=zipfile.ZIP_STORED
+                        )
                 elif new_data:
                     # Since there is no duplicated data, we can
                     # reopen self.zipfile in append mode and write the new data
@@ -388,8 +393,9 @@ def flush(self, *, keep_closed=False):
                     if keep_closed:
                         raise Exception("unexpected error")
                     else:
-                        zf = zipfile.ZipFile(self.path, mode='a',
-                                             compression=zipfile.ZIP_STORED)
+                        zf = zipfile.ZipFile(
+                            self.path, mode="a", compression=zipfile.ZIP_STORED
+                        )
                     for item in new_data:
                         self._write_to_zf(zf, item, self.bufferzip.read(item))
                     self.zipfile = zf
@@ -405,9 +411,9 @@ def __del__(self):
 
 
 class IPFSStorage(Storage):
-
     def __init__(self, pin_on_add=True, **kwargs):
         import ipfshttpclient
+
         self.ipfs_args = kwargs
         self.pin_on_add = pin_on_add
         self.api = ipfshttpclient.connect(**self.ipfs_args)
@@ -444,9 +450,9 @@ def __exit__(self, type, value, traceback):
 
 
 class RedisStorage(Storage):
-
     def __init__(self, **kwargs):
         import redis
+
         self.redis_args = kwargs
         self.conn = redis.Redis(**self.redis_args)
 
diff --git a/src/sourmash/sbtmh.py b/src/sourmash/sbtmh.py
index 6cb9cc0135..3fa7aa23f2 100644
--- a/src/sourmash/sbtmh.py
+++ b/src/sourmash/sbtmh.py
@@ -7,9 +7,12 @@
 
 def load_sbt_index(filename, *, print_version_warning=True, cache_size=None):
     "Load and return an SBT index."
-    return SBT.load(filename, leaf_loader=SigLeaf.load,
-                    print_version_warning=print_version_warning,
-                    cache_size=cache_size)
+    return SBT.load(
+        filename,
+        leaf_loader=SigLeaf.load,
+        print_version_warning=print_version_warning,
+        cache_size=cache_size,
+    )
 
 
 def create_sbt_index(bloom_filter_size=1e5, n_children=2):
@@ -29,21 +32,18 @@ def search_sbt_index(tree, query, threshold):
         for match_sig, similarity in search_sbt_index(tree, query, threshold):
            ...
     """
-    for (score, match, _) in tree.search(query, threshold=threshold,
-                                         unload_data=True):
+    for score, match, _ in tree.search(query, threshold=threshold, unload_data=True):
         yield match, score
 
 
 class SigLeaf(Leaf):
     def __str__(self):
-        return '**Leaf:{name} -> {metadata}'.format(
-                name=self.name, metadata=self.metadata)
+        return f"**Leaf:{self.name} -> {self.metadata}"
 
     def make_manifest_row(self, loc):
         from .index import CollectionManifest
-        row = CollectionManifest.make_manifest_row(self.data,
-                                                   loc,
-                                                   include_signature=0)
+
+        row = CollectionManifest.make_manifest_row(self.data, loc, include_signature=0)
         return row
 
     def save(self, path):
@@ -58,13 +58,13 @@ def save(self, path):
     def update(self, parent):
         mh = self.data.minhash
         parent.data.update(mh)
-        min_n_below = parent.metadata.get('min_n_below', sys.maxsize)
+        min_n_below = parent.metadata.get("min_n_below", sys.maxsize)
         min_n_below = min(len(mh), min_n_below)
 
         if min_n_below == 0:
             min_n_below = 1
 
-        parent.metadata['min_n_below'] = min_n_below
+        parent.metadata["min_n_below"] = min_n_below
 
     @property
     def data(self):
diff --git a/src/sourmash/search.py b/src/sourmash/search.py
index 7b2db8008f..f730d1daf5 100644
--- a/src/sourmash/search.py
+++ b/src/sourmash/search.py
@@ -4,10 +4,10 @@
 import csv
 import numpy as np
 from enum import Enum
-import numpy as np
 from dataclasses import dataclass
 
-from .signature import SourmashSignature, MinHash
+from .minhash import MinHash
+from .signature import SourmashSignature
 from .sketchcomparison import FracMinHashComparison, NumMinHashComparison
 
 
@@ -42,11 +42,9 @@ class SearchType(Enum):
     MAX_CONTAINMENT = 3
 
 
-def make_jaccard_search_query(*,
-                              do_containment=False,
-                              do_max_containment=False,
-                              best_only=False,
-                              threshold=None):
+def make_jaccard_search_query(
+    *, do_containment=False, do_max_containment=False, best_only=False, threshold=None
+):
     """\
     Make a "flat" search object for Jaccard search & containment.
     """
@@ -81,11 +79,9 @@ def make_containment_query(query_mh, threshold_bp, *, best_only=True):
     threshold, _ = calc_threshold_from_bp(threshold_bp, scaled, len(query_mh))
 
     if best_only:
-        search_obj = JaccardSearchBestOnly(SearchType.CONTAINMENT,
-                                           threshold=threshold)
+        search_obj = JaccardSearchBestOnly(SearchType.CONTAINMENT, threshold=threshold)
     else:
-        search_obj = JaccardSearch(SearchType.CONTAINMENT,
-                                   threshold=threshold)
+        search_obj = JaccardSearch(SearchType.CONTAINMENT, threshold=threshold)
 
     return search_obj
 
@@ -94,6 +90,7 @@ class JaccardSearch:
     """
     A class used by Index classes for searching/gathering.
     """
+
     def __init__(self, search_type, threshold=None):
         "Constructor. Takes type of search, and optional threshold."
         score_fn = None
@@ -148,15 +145,13 @@ def score_jaccard(self, query_size, shared_size, subject_size, total_size):
             return 0
         return shared_size / total_size
 
-    def score_containment(self, query_size, shared_size, subject_size,
-                          total_size):
+    def score_containment(self, query_size, shared_size, subject_size, total_size):
         "Calculate Jaccard containment."
         if query_size == 0:
             return 0
         return shared_size / query_size
 
-    def score_max_containment(self, query_size, shared_size, subject_size,
-                              total_size):
+    def score_max_containment(self, query_size, shared_size, subject_size, total_size):
         "Calculate Jaccard max containment."
         min_denom = min(query_size, subject_size)
         if min_denom == 0:
@@ -166,11 +161,13 @@ def score_max_containment(self, query_size, shared_size, subject_size,
 
 class JaccardSearchBestOnly(JaccardSearch):
     "A subclass of JaccardSearch that implements best-only."
+
     def collect(self, score, match):
         "Raise the threshold to the best match found so far."
         self.threshold = max(self.threshold, score)
         return True
 
+
 @dataclass
 class BaseResult:
     """
@@ -179,10 +176,11 @@ class BaseResult:
     properly initialize a SketchComparison, this class doesn't actually do
     anything other than define some functions needed by *Result classes.
     """
+
     query: SourmashSignature
     match: SourmashSignature
     filename: str = None
-    ignore_abundance: bool = False # optionally ignore abundances
+    ignore_abundance: bool = False  # optionally ignore abundances
     # need these for scaled result comparisons
     estimate_ani_ci: bool = False
     ani_confidence: float = 0.95
@@ -196,18 +194,24 @@ def init_result(self):
         self.mh2 = self.match.minhash
 
     def build_fracminhashcomparison(self):
-        self.cmp = FracMinHashComparison(self.mh1, self.mh2, cmp_scaled=self.cmp_scaled,
-                                        threshold_bp=self.threshold_bp,
-                                        ignore_abundance=self.ignore_abundance,
-                                        estimate_ani_ci=self.estimate_ani_ci,
-                                        ani_confidence=self.ani_confidence)
+        self.cmp = FracMinHashComparison(
+            self.mh1,
+            self.mh2,
+            cmp_scaled=self.cmp_scaled,
+            threshold_bp=self.threshold_bp,
+            ignore_abundance=self.ignore_abundance,
+            estimate_ani_ci=self.estimate_ani_ci,
+            ani_confidence=self.ani_confidence,
+        )
         self.cmp_scaled = self.cmp.cmp_scaled
         self.query_scaled = self.mh1.scaled
         self.match_scaled = self.mh2.scaled
         self.size_may_be_inaccurate = self.cmp.size_may_be_inaccurate
 
     def build_numminhashcomparison(self, cmp_num=None):
-        self.cmp = NumMinHashComparison(self.mh1, self.mh2, cmp_num=cmp_num, ignore_abundance=self.ignore_abundance)
+        self.cmp = NumMinHashComparison(
+            self.mh1, self.mh2, cmp_num=cmp_num, ignore_abundance=self.ignore_abundance
+        )
         self.cmp_num = self.cmp.cmp_num
         self.query_num = self.mh1.num
         self.match_num = self.mh2.num
@@ -230,7 +234,7 @@ def get_cmpinfo(self):
             self.filename = self.match_filename
         self.match_md5 = self.match.md5sum()
         # set these from self.match_*
-        self.md5= self.match_md5
+        self.md5 = self.match_md5
         self.name = self.match_name
         # could define in PrefetchResult instead, same reasoning as above
         self.query_abundance = self.mh1.track_abundance
@@ -248,8 +252,9 @@ def shorten_md5(self, md5):
     def to_write(self, columns=[]):
         # convert comparison attrs into a dictionary
         # that can be used by csv dictwriter
-        info = {k: v for k, v in self.__dict__.items()
-                if k in columns and v is not None}
+        info = {
+            k: v for k, v in self.__dict__.items() if k in columns and v is not None
+        }
         return info
 
     def init_dictwriter(self, csv_handle):
@@ -279,13 +284,22 @@ class SearchResult(BaseResult):
     """
     SearchResult class supports 'sourmash search' operations.
     """
+
     similarity: float = None
     cmp_num: int = None
     searchtype: SearchType = None
 
-    #columns for standard SearchResult output
-    search_write_cols = ['similarity', 'md5', 'filename', 'name',  # here we use 'filename'
-                         'query_filename', 'query_name', 'query_md5', 'ani']
+    # columns for standard SearchResult output
+    search_write_cols = [
+        "similarity",
+        "md5",
+        "filename",
+        "name",  # here we use 'filename'
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "ani",
+    ]
 
     ci_cols = ["ani_low", "ani_high"]
 
@@ -297,10 +311,10 @@ def init_sigcomparison(self):
             self.build_fracminhashcomparison()
         elif any([self.mh1.num, self.mh2.num]):
             self.build_numminhashcomparison(cmp_num=self.cmp_num)
-        self.get_cmpinfo() # grab comparison metadata
+        self.get_cmpinfo()  # grab comparison metadata
 
     def __post_init__(self):
-        self.init_sigcomparison() # build sketch comparison
+        self.init_sigcomparison()  # build sketch comparison
         self.check_similarity()
         if self.cmp_scaled is not None and self.searchtype is not None:
             self.estimate_search_ani()
@@ -317,11 +331,13 @@ def check_similarity(self):
             raise ValueError("Error: Must provide 'similarity' for SearchResult.")
 
     def estimate_search_ani(self):
-        #future: could estimate ANI from abund searches if we want (use query containment?)
+        # future: could estimate ANI from abund searches if we want (use query containment?)
         if self.cmp_scaled is None:
             raise TypeError("Error: ANI can only be estimated from scaled signatures.")
         if self.searchtype == SearchType.CONTAINMENT:
-            self.cmp.estimate_ani_from_mh1_containment_in_mh2(containment = self.similarity)
+            self.cmp.estimate_ani_from_mh1_containment_in_mh2(
+                containment=self.similarity
+            )
             self.ani = self.cmp.ani_from_mh1_containment_in_mh2
             if self.estimate_ani_ci:
                 self.ani_low = self.cmp.ani_from_mh1_containment_in_mh2_low
@@ -347,16 +363,38 @@ class PrefetchResult(BaseResult):
     """
 
     # current prefetch columns
-    prefetch_write_cols = ['intersect_bp', 'jaccard', 'max_containment', 'f_query_match',
-                           'f_match_query', 'match_filename', 'match_name', # here we use 'match_filename'
-                           'match_md5', 'match_bp', 'query_filename', 'query_name',
-                           'query_md5', 'query_bp', 'ksize', 'moltype', 'scaled',
-                           'query_n_hashes', 'query_abundance', 'query_containment_ani',
-                           'match_containment_ani', 'average_containment_ani', 'max_containment_ani',
-                           'potential_false_negative'] #'match_abundance'
-
-    ci_cols = ["query_containment_ani_low", "query_containment_ani_high",
-                   "match_containment_ani_low", "match_containment_ani_high"]
+    prefetch_write_cols = [
+        "intersect_bp",
+        "jaccard",
+        "max_containment",
+        "f_query_match",
+        "f_match_query",
+        "match_filename",
+        "match_name",  # here we use 'match_filename'
+        "match_md5",
+        "match_bp",
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "query_bp",
+        "ksize",
+        "moltype",
+        "scaled",
+        "query_n_hashes",
+        "query_abundance",
+        "query_containment_ani",
+        "match_containment_ani",
+        "average_containment_ani",
+        "max_containment_ani",
+        "potential_false_negative",
+    ]  #'match_abundance'
+
+    ci_cols = [
+        "query_containment_ani_low",
+        "query_containment_ani_high",
+        "match_containment_ani_low",
+        "match_containment_ani_high",
+    ]
 
     prefetch_write_cols_ci = prefetch_write_cols + ci_cols
 
@@ -366,8 +404,10 @@ def init_sigcomparison(self):
         if all([self.mh1.scaled, self.mh2.scaled]):
             self.build_fracminhashcomparison()
         else:
-            raise TypeError("Error: prefetch and gather results must be between scaled signatures.")
-        self.get_cmpinfo() # grab comparison metadata
+            raise TypeError(
+                "Error: prefetch and gather results must be between scaled signatures."
+            )
+        self.get_cmpinfo()  # grab comparison metadata
         self.intersect_bp = self.cmp.total_unique_intersect_hashes
         self.max_containment = self.cmp.max_containment
         self.query_bp = self.mh1.unique_dataset_hashes
@@ -394,8 +434,12 @@ def handle_ani_ci(self):
     def build_prefetch_result(self):
         # unique prefetch values
         self.jaccard = self.cmp.jaccard
-        self.f_query_match = self.cmp.mh2_containment_in_mh1 #db_mh.contained_by(query_mh)
-        self.f_match_query = self.cmp.mh1_containment_in_mh2 #query_mh.contained_by(db_mh)
+        self.f_query_match = (
+            self.cmp.mh2_containment_in_mh1
+        )  # db_mh.contained_by(query_mh)
+        self.f_match_query = (
+            self.cmp.mh1_containment_in_mh2
+        )  # query_mh.contained_by(db_mh)
         # set write columns for prefetch result
         self.write_cols = self.prefetch_write_cols
         if self.estimate_ani_ci:
@@ -433,50 +477,80 @@ class GatherResult(PrefetchResult):
     sum_weighted_found: int = None
     total_weighted_hashes: int = None
 
-    gather_write_cols = ['intersect_bp', 'f_orig_query', 'f_match',
-                         'f_unique_to_query',
-                         'f_unique_weighted','average_abund',
-                         'median_abund', 'std_abund', 'filename',
-                          'name', 'md5',
-                         'f_match_orig', 'unique_intersect_bp',
-                         'gather_result_rank', 'remaining_bp',
-                         'query_filename', 'query_name', 'query_md5',
-                         'query_bp', 'ksize', 'moltype', 'scaled',
-                         'query_n_hashes', 'query_abundance',
-                         'query_containment_ani',
-                         'match_containment_ani',
-                         'average_containment_ani',
-                         'max_containment_ani',
-                         'potential_false_negative',
-                         'n_unique_weighted_found',
-                         'sum_weighted_found',
-                         'total_weighted_hashes']
-
-    ci_cols = ["query_containment_ani_low", "query_containment_ani_high",
-               "match_containment_ani_low", "match_containment_ani_high"]
+    gather_write_cols = [
+        "intersect_bp",
+        "f_orig_query",
+        "f_match",
+        "f_unique_to_query",
+        "f_unique_weighted",
+        "average_abund",
+        "median_abund",
+        "std_abund",
+        "filename",
+        "name",
+        "md5",
+        "f_match_orig",
+        "unique_intersect_bp",
+        "gather_result_rank",
+        "remaining_bp",
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "query_bp",
+        "ksize",
+        "moltype",
+        "scaled",
+        "query_n_hashes",
+        "query_abundance",
+        "query_containment_ani",
+        "match_containment_ani",
+        "average_containment_ani",
+        "max_containment_ani",
+        "potential_false_negative",
+        "n_unique_weighted_found",
+        "sum_weighted_found",
+        "total_weighted_hashes",
+    ]
+
+    ci_cols = [
+        "query_containment_ani_low",
+        "query_containment_ani_high",
+        "match_containment_ani_low",
+        "match_containment_ani_high",
+    ]
 
     gather_write_cols_ci = gather_write_cols + ci_cols
 
     def init_gathersketchcomparison(self):
         # compare remaining gather hashes with match. Force at cmp_scaled. Force match flatten(), bc we don't need abunds.
-        self.gather_comparison = FracMinHashComparison(self.gather_querymh, self.match.minhash.flatten())
+        self.gather_comparison = FracMinHashComparison(
+            self.gather_querymh, self.match.minhash.flatten()
+        )
 
     def check_gatherresult_input(self):
         # check we have what we need:
         if self.cmp_scaled is None:
-            raise ValueError("Error: must provide comparison scaled value ('cmp_scaled') for GatherResult")
+            raise ValueError(
+                "Error: must provide comparison scaled value ('cmp_scaled') for GatherResult"
+            )
         if self.gather_querymh is None:
-            raise ValueError("Error: must provide current gather sketch (remaining hashes) for GatherResult")
+            raise ValueError(
+                "Error: must provide current gather sketch (remaining hashes) for GatherResult"
+            )
         if self.gather_result_rank is None:
             raise ValueError("Error: must provide 'gather_result_rank' to GatherResult")
-        if not self.total_weighted_hashes: # catch total_weighted_hashes = 0 as well
-            raise ValueError("Error: must provide sum of all abundances ('total_weighted_hashes') to GatherResult")
+        if not self.total_weighted_hashes:  # catch total_weighted_hashes = 0 as well
+            raise ValueError(
+                "Error: must provide sum of all abundances ('total_weighted_hashes') to GatherResult"
+            )
         if not self.orig_query_abunds:
-            raise ValueError("Error: must provide original query abundances ('orig_query_abunds') to GatherResult")
+            raise ValueError(
+                "Error: must provide original query abundances ('orig_query_abunds') to GatherResult"
+            )
 
     def build_gather_result(self):
         # build gather-specific attributes
-    
+
         # the 'query' that is passed into gather is all _matched_ hashes, after subtracting noident_mh
         # this affects estimation of original query information, and requires us to pass in orig_query_len and orig_query_abunds.
         # we also need to overwrite self.query_bp, self.query_n_hashes, and self.query_abundance
@@ -486,43 +560,70 @@ def build_gather_result(self):
 
         # calculate intersection with query hashes:
         self.unique_intersect_bp = self.gather_comparison.total_unique_intersect_hashes
-    
+
         # calculate fraction of subject match with orig query
         self.f_match_orig = self.cmp.mh2_containment_in_mh1
 
         # calculate fractions wrt first denominator - genome size
-        self.f_match = self.gather_comparison.mh2_containment_in_mh1 # unique match containment
+        self.f_match = (
+            self.gather_comparison.mh2_containment_in_mh1
+        )  # unique match containment
         self.f_orig_query = len(self.cmp.intersect_mh) / self.orig_query_len
-        assert self.gather_comparison.intersect_mh.contained_by(self.gather_comparison.mh1_cmp) == 1.0
-    
+        assert (
+            self.gather_comparison.intersect_mh.contained_by(
+                self.gather_comparison.mh1_cmp
+            )
+            == 1.0
+        )
+
         # calculate fractions wrt second denominator - metagenome size
-        assert self.gather_comparison.intersect_mh.contained_by(self.gather_comparison.mh2_cmp) == 1.0
-        self.f_unique_to_query = len(self.gather_comparison.intersect_mh)/self.orig_query_len
+        assert (
+            self.gather_comparison.intersect_mh.contained_by(
+                self.gather_comparison.mh2_cmp
+            )
+            == 1.0
+        )
+        self.f_unique_to_query = (
+            len(self.gather_comparison.intersect_mh) / self.orig_query_len
+        )
 
         # here, need to make sure to use the mh1_cmp (bc was downsampled to cmp_scaled)
-        self.remaining_bp = (self.gather_comparison.mh1_cmp.unique_dataset_hashes - self.gather_comparison.total_unique_intersect_hashes)
+        self.remaining_bp = (
+            self.gather_comparison.mh1_cmp.unique_dataset_hashes
+            - self.gather_comparison.total_unique_intersect_hashes
+        )
 
         # calculate stats on abundances, if desired.
         self.average_abund, self.median_abund, self.std_abund = None, None, None
         if not self.ignore_abundance:
-            self.query_weighted_unique_intersection = self.gather_comparison.weighted_intersection(from_abundD = self.orig_query_abunds)
+            self.query_weighted_unique_intersection = (
+                self.gather_comparison.weighted_intersection(
+                    from_abundD=self.orig_query_abunds
+                )
+            )
             self.average_abund = self.query_weighted_unique_intersection.mean_abundance
             self.median_abund = self.query_weighted_unique_intersection.median_abundance
             self.std_abund = self.query_weighted_unique_intersection.std_abundance
             # 'query' will be flattened by default. reset track abundance if we have abunds
-            self.query_abundance = self.query_weighted_unique_intersection.track_abundance
+            self.query_abundance = (
+                self.query_weighted_unique_intersection.track_abundance
+            )
             # calculate scores weighted by abundances
-            self.n_unique_weighted_found = self.query_weighted_unique_intersection.sum_abundances
-            self.f_unique_weighted = self.n_unique_weighted_found / self.total_weighted_hashes
+            self.n_unique_weighted_found = (
+                self.query_weighted_unique_intersection.sum_abundances
+            )
+            self.f_unique_weighted = (
+                self.n_unique_weighted_found / self.total_weighted_hashes
+            )
         else:
             self.f_unique_weighted = self.f_unique_to_query
             self.query_abundance = False
 
     def __post_init__(self):
         self.check_gatherresult_input()
-        self.init_sigcomparison() # initialize original sketch vs match sketch comparison (inherited from PrefetchResult)
-        self.init_gathersketchcomparison() # initialize remaining gather sketch vs match sketch comparison
-        self.build_gather_result() # build gather-specific attributes
+        self.init_sigcomparison()  # initialize original sketch vs match sketch comparison (inherited from PrefetchResult)
+        self.init_gathersketchcomparison()  # initialize remaining gather sketch vs match sketch comparison
+        self.build_gather_result()  # build gather-specific attributes
         # set write columns for prefetch result
         self.write_cols = self.gather_write_cols
         if self.estimate_ani_ci:
@@ -550,8 +651,12 @@ def prefetchresultdict(self):
         if self.estimate_ani_ci:
             prefetch_cols = self.prefetch_write_cols_ci
         self.jaccard = self.cmp.jaccard
-        self.f_query_match = self.cmp.mh2_containment_in_mh1 #db_mh.contained_by(query_mh)
-        self.f_match_query = self.cmp.mh1_containment_in_mh2 #query_mh.contained_by(db_mh)
+        self.f_query_match = (
+            self.cmp.mh2_containment_in_mh1
+        )  # db_mh.contained_by(query_mh)
+        self.f_match_query = (
+            self.cmp.mh1_containment_in_mh2
+        )  # query_mh.contained_by(db_mh)
         self.prep_prefetch_result()
         return self.to_write(columns=prefetch_cols)
 
@@ -560,14 +665,14 @@ def format_bp(bp):
     "Pretty-print bp information."
     bp = float(bp)
     if bp < 500:
-        return '{:.0f} bp'.format(bp)
+        return f"{bp:.0f} bp"
     elif bp <= 500e3:
-        return '{:.1f} kbp'.format(round(bp / 1e3, 1))
+        return f"{round(bp / 1e3, 1):.1f} kbp"
     elif bp < 500e6:
-        return '{:.1f} Mbp'.format(round(bp / 1e6, 1))
+        return f"{round(bp / 1e6, 1):.1f} Mbp"
     elif bp < 500e9:
-        return '{:.1f} Gbp'.format(round(bp / 1e9, 1))
-    return '???'
+        return f"{round(bp / 1e9, 1):.1f} Gbp"
+    return "???"
 
 
 def search_databases_with_flat_query(query, databases, **kwargs):
@@ -576,7 +681,7 @@ def search_databases_with_flat_query(query, databases, **kwargs):
 
     for db in databases:
         search_iter = db.search(query, **kwargs)
-        for (score, match, filename) in search_iter:
+        for score, match, filename in search_iter:
             md5 = match.md5sum()
             if md5 not in found_md5:
                 results.append((score, match, filename))
@@ -589,22 +694,27 @@ def search_databases_with_flat_query(query, databases, **kwargs):
     # repetitive/not optimal - would it be better to produce SearchResult from db.search?
     estimate_ani_ci = False
     search_type = SearchType.JACCARD
-    if kwargs.get('do_containment'):
+    if kwargs.get("do_containment"):
         search_type = SearchType.CONTAINMENT
-        if kwargs.get('estimate_ani_ci'):
+        if kwargs.get("estimate_ani_ci"):
             estimate_ani_ci = True
-    elif kwargs.get('do_max_containment'):
+    elif kwargs.get("do_max_containment"):
         search_type = SearchType.MAX_CONTAINMENT
-        if kwargs.get('estimate_ani_ci'):
+        if kwargs.get("estimate_ani_ci"):
             estimate_ani_ci = True
 
     x = []
-    for (score, match, filename) in results:
-        x.append(SearchResult(query, match,
-                              similarity=score,
-                              filename = filename,
-                              searchtype=search_type,
-                              estimate_ani_ci=estimate_ani_ci))
+    for score, match, filename in results:
+        x.append(
+            SearchResult(
+                query,
+                match,
+                similarity=score,
+                filename=filename,
+                searchtype=search_type,
+                estimate_ani_ci=estimate_ani_ci,
+            )
+        )
     return x
 
 
@@ -612,12 +722,14 @@ def search_databases_with_abund_query(query, databases, **kwargs):
     results = []
     found_md5 = set()
 
-    if kwargs.get('do_containment') or kwargs.get('do_max_containment'):
+    if kwargs.get("do_containment") or kwargs.get("do_max_containment"):
         raise TypeError("containment searches cannot be done with abund sketches")
 
     for db in databases:
-        search_iter = db.search_abund(query, **kwargs) # could return SearchResult here instead of tuple?
-        for (score, match, filename) in search_iter:
+        search_iter = db.search_abund(
+            query, **kwargs
+        )  # could return SearchResult here instead of tuple?
+        for score, match, filename in search_iter:
             md5 = match.md5sum()
             if md5 not in found_md5:
                 results.append((score, match, filename))
@@ -627,16 +739,16 @@ def search_databases_with_abund_query(query, databases, **kwargs):
     results.sort(key=lambda x: -x[0])
 
     x = []
-    for (score, match, filename) in results:
-        x.append(SearchResult(query, match,
-                               similarity=score,
-                               filename = filename))
+    for score, match, filename in results:
+        x.append(SearchResult(query, match, similarity=score, filename=filename))
     return x
 
+
 ###
 ### gather code
 ###
 
+
 def _find_best(counters, query, threshold_bp):
     """
     Search for the best containment, return precisely one match.
@@ -667,8 +779,17 @@ def _find_best(counters, query, threshold_bp):
 class GatherDatabases:
     "Iterator object for doing gather/min-set-cov."
 
-    def __init__(self, query, counters, *,
-                 threshold_bp=0, ignore_abundance=False, noident_mh=None, ident_mh=None, estimate_ani_ci=False):
+    def __init__(
+        self,
+        query,
+        counters,
+        *,
+        threshold_bp=0,
+        ignore_abundance=False,
+        noident_mh=None,
+        ident_mh=None,
+        estimate_ani_ci=False,
+    ):
         # track original query information for later usage?
         track_abundance = query.minhash.track_abundance and not ignore_abundance
         self.orig_query = query
@@ -683,7 +804,7 @@ def __init__(self, query, counters, *,
         if track_abundance:
             orig_query_abunds = query_hashes
         else:
-            orig_query_abunds = { k: 1 for k in query_hashes }
+            orig_query_abunds = {k: 1 for k in query_hashes}
 
         # adjust for not found...
         if noident_mh is None:  # create empty
@@ -702,7 +823,7 @@ def __init__(self, query, counters, *,
         query = query.to_mutable()
         query.minhash = orig_query_mh
 
-        cmp_scaled = query.minhash.scaled    # initialize with resolution of query
+        cmp_scaled = query.minhash.scaled  # initialize with resolution of query
 
         self.result_n = 0
         self.query = query
@@ -713,10 +834,12 @@ def __init__(self, query, counters, *,
         self.orig_query_mh = orig_query_mh
         self.orig_query_abunds = orig_query_abunds
 
-        self.cmp_scaled = 0     # initialize with something very low!
+        self.cmp_scaled = 0  # initialize with something very low!
         self._update_scaled(cmp_scaled)
 
-        self.estimate_ani_ci = estimate_ani_ci # by default, do not report ANI confidence intervals
+        self.estimate_ani_ci = (
+            estimate_ani_ci  # by default, do not report ANI confidence intervals
+        )
 
     def _update_scaled(self, scaled):
         max_scaled = max(self.cmp_scaled, scaled)
@@ -729,10 +852,12 @@ def _update_scaled(self, scaled):
 
             # NOTE: orig_query_abunds can be used w/o downsampling
             orig_query_abunds = self.orig_query_abunds
-            self.noident_query_sum_abunds = sum(( orig_query_abunds[k] \
-                                                  for k in self.noident_mh.hashes ))
-            self.total_weighted_hashes = sum(( orig_query_abunds[k] \
-                                    for k in self.orig_query_mh.hashes ))
+            self.noident_query_sum_abunds = sum(
+                orig_query_abunds[k] for k in self.noident_mh.hashes
+            )
+            self.total_weighted_hashes = sum(
+                orig_query_abunds[k] for k in self.orig_query_mh.hashes
+            )
             self.total_weighted_hashes += self.noident_query_sum_abunds
 
         if max_scaled != scaled:
@@ -753,7 +878,6 @@ def __next__(self):
 
         # may be changed:
         counters = self.counters
-        cmp_scaled = self.cmp_scaled
 
         # will not be changed::
         threshold_bp = self.threshold_bp
@@ -762,7 +886,7 @@ def __next__(self):
         # find the best match!
         best_result, intersect_mh = _find_best(counters, query, threshold_bp)
 
-        if not best_result:          # no matches at all for this cutoff!
+        if not best_result:  # no matches at all for this cutoff!
             raise StopIteration
 
         best_match = best_result.signature
@@ -794,24 +918,26 @@ def __next__(self):
 
         # compute weighted information for remaining query hashes
         query_hashes = set(new_query_mh.hashes)
-        n_weighted_missed = sum((orig_query_abunds[k] for k in query_hashes))
+        n_weighted_missed = sum(orig_query_abunds[k] for k in query_hashes)
         n_weighted_missed += self.noident_query_sum_abunds
         sum_weighted_found = total_weighted_hashes - n_weighted_missed
 
         # build a GatherResult
-        result = GatherResult(self.orig_query, best_match,
-                              cmp_scaled=scaled,
-                              filename=filename,
-                              gather_result_rank=self.result_n,
-                              gather_querymh=query.minhash,
-                              ignore_abundance=not self.track_abundance,
-                              threshold_bp=threshold_bp,
-                              orig_query_len=orig_query_len,
-                              orig_query_abunds=self.orig_query_abunds,
-                              estimate_ani_ci=self.estimate_ani_ci,
-                              sum_weighted_found=sum_weighted_found,
-                              total_weighted_hashes=total_weighted_hashes,
-                              )
+        result = GatherResult(
+            self.orig_query,
+            best_match,
+            cmp_scaled=scaled,
+            filename=filename,
+            gather_result_rank=self.result_n,
+            gather_querymh=query.minhash,
+            ignore_abundance=not self.track_abundance,
+            threshold_bp=threshold_bp,
+            orig_query_len=orig_query_len,
+            orig_query_abunds=self.orig_query_abunds,
+            estimate_ani_ci=self.estimate_ani_ci,
+            sum_weighted_found=sum_weighted_found,
+            total_weighted_hashes=total_weighted_hashes,
+        )
 
         self.result_n += 1
         self.query = new_query
@@ -823,6 +949,7 @@ def __next__(self):
 ### prefetch code
 ###
 
+
 def prefetch_database(query, database, threshold_bp, *, estimate_ani_ci=False):
     """
     Find all matches to `query_mh` >= `threshold_bp` in `database`.
@@ -830,7 +957,14 @@ def prefetch_database(query, database, threshold_bp, *, estimate_ani_ci=False):
     scaled = query.minhash.scaled
     assert scaled
     # iterate over all signatures in database, find matches
-    for result in database.prefetch(query, threshold_bp): # future: could return PrefetchResult directly here
-        result = PrefetchResult(query, result.signature, threshold_bp=threshold_bp, estimate_ani_ci=estimate_ani_ci)
+    for result in database.prefetch(
+        query, threshold_bp
+    ):  # future: could return PrefetchResult directly here
+        result = PrefetchResult(
+            query,
+            result.signature,
+            threshold_bp=threshold_bp,
+            estimate_ani_ci=estimate_ani_ci,
+        )
         assert result.pass_threshold
         yield result
diff --git a/src/sourmash/sig/__init__.py b/src/sourmash/sig/__init__.py
index 0fafe39246..441c8fa37f 100644
--- a/src/sourmash/sig/__init__.py
+++ b/src/sourmash/sig/__init__.py
@@ -1,2 +1,2 @@
-from .__main__ import *         # bring all functions into top-level
+from .__main__ import *  # bring all functions into top-level
 from . import grep
diff --git a/src/sourmash/sig/__main__.py b/src/sourmash/sig/__main__.py
index d10e8745f9..1a89d6239f 100644
--- a/src/sourmash/sig/__main__.py
+++ b/src/sourmash/sig/__main__.py
@@ -1,47 +1,55 @@
 """
 Command-line entry point for 'python -m sourmash.sig'
 """
-__all__ = ["cat",
-           "split",
-           "describe",
-           "manifest",
-           "overlap",
-           "merge",
-           "intersect",
-           "inflate",
-           "subtract",
-           "rename",
-           "extract",
-           "filter",
-           "flatten",
-           "downsample",
-           "ingest",
-           "export",
-           "kmers",
-           "fileinfo",
-           "check",
-           "collect"]
+__all__ = [
+    "cat",
+    "split",
+    "describe",
+    "manifest",
+    "overlap",
+    "merge",
+    "intersect",
+    "inflate",
+    "subtract",
+    "rename",
+    "extract",
+    "filter",
+    "flatten",
+    "downsample",
+    "ingest",
+    "export",
+    "kmers",
+    "fileinfo",
+    "check",
+    "collect",
+]
 
 import sys
 import csv
 import json
 import os
 from collections import defaultdict, namedtuple, Counter
-import json
 import re
 
 import screed
 import sourmash
 from sourmash.sourmash_args import FileOutput
 
-from sourmash.logging import (set_quiet, error, notify, print_results, debug,
-                              debug_literal, _debug)
+from sourmash.logging import (
+    set_quiet,
+    error,
+    notify,
+    print_results,
+    debug,
+    debug_literal,
+    _debug,
+)
 from sourmash import sourmash_args
 from sourmash.minhash import _get_max_hash_for_scaled
 from sourmash.manifest import CollectionManifest
 
 
-usage='''
+usage = """
 sourmash signature <command> [<args>] - manipulate/work with signature files.
 
 ** Commands can be:
@@ -67,15 +75,19 @@
 ** Use '-h' to get subcommand-specific help, e.g.
 
 sourmash signature merge -h
-'''
+"""
 
 
 def _check_abundance_compatibility(sig1, sig2):
     if sig1.minhash.track_abundance != sig2.minhash.track_abundance:
-        raise ValueError("incompatible signatures: track_abundance is {} in first sig, {} in second".format(sig1.minhash.track_abundance, sig2.minhash.track_abundance))
+        raise ValueError(
+            "incompatible signatures: track_abundance is {} in first sig, {} in second".format(
+                sig1.minhash.track_abundance, sig2.minhash.track_abundance
+            )
+        )
 
 
-def _extend_signatures_with_from_file(args, *, target_attr='signatures'):
+def _extend_signatures_with_from_file(args, *, target_attr="signatures"):
     # extend input signatures with --from-file
     if args.from_file:
         more_files = sourmash_args.load_pathlist_from_file(args.from_file)
@@ -109,7 +121,7 @@ def cat(args):
     picklist = sourmash_args.load_picklist(args)
     pattern_search = sourmash_args.load_include_exclude_db_patterns(args)
 
-    encountered_md5sums = defaultdict(int)   # used by --unique
+    encountered_md5sums = defaultdict(int)  # used by --unique
 
     # open output for saving sigs
     save_sigs = sourmash_args.SaveSignaturesToLocation(args.output)
@@ -119,14 +131,16 @@ def cat(args):
 
     # start loading!
     progress = sourmash_args.SignatureLoadingProgress()
-    loader = sourmash_args.load_many_signatures(args.signatures,
-                                                ksize=args.ksize,
-                                                moltype=moltype,
-                                                picklist=picklist,
-                                                progress=progress,
-                                                yield_all_files=args.force,
-                                                force=args.force,
-                                                pattern=pattern_search)
+    loader = sourmash_args.load_many_signatures(
+        args.signatures,
+        ksize=args.ksize,
+        moltype=moltype,
+        picklist=picklist,
+        progress=progress,
+        yield_all_files=args.force,
+        force=args.force,
+        pattern=pattern_search,
+    )
     for ss, sigloc in loader:
         md5 = ss.md5sum()
         encountered_md5sums[md5] += 1
@@ -135,19 +149,19 @@ def cat(args):
 
         save_sigs.add(ss)
 
-    notify(f'loaded {len(save_sigs)} signatures total.')
+    notify(f"loaded {len(save_sigs)} signatures total.")
     if picklist:
         sourmash_args.report_picklist(args, picklist)
 
     save_sigs.close()
 
-    notify(f'output {len(save_sigs)} signatures')
+    notify(f"output {len(save_sigs)} signatures")
 
-    multiple_md5 = [ 1 for cnt in encountered_md5sums.values() if cnt > 1 ]
+    multiple_md5 = [1 for cnt in encountered_md5sums.values() if cnt > 1]
     if multiple_md5:
-        notify(f'encountered {sum(multiple_md5)} MinHashes multiple times')
+        notify(f"encountered {sum(multiple_md5)} MinHashes multiple times")
         if args.unique:
-            notify('...and removed the duplicates, because --unique was specified.')
+            notify("...and removed the duplicates, because --unique was specified.")
 
 
 def split(args):
@@ -160,50 +174,59 @@ def split(args):
     _extend_signatures_with_from_file(args)
 
     output_names = set()
-    output_scaled_template = '{md5sum}.k={ksize}.scaled={scaled}.{moltype}.dup={dup}.{basename}' + args.extension
-    output_num_template = '{md5sum}.k={ksize}.num={num}.{moltype}.dup={dup}.{basename}' + args.extension
+    output_scaled_template = (
+        "{md5sum}.k={ksize}.scaled={scaled}.{moltype}.dup={dup}.{basename}"
+        + args.extension
+    )
+    output_num_template = (
+        "{md5sum}.k={ksize}.num={num}.{moltype}.dup={dup}.{basename}" + args.extension
+    )
 
     if args.output_dir:
         if not os.path.exists(args.output_dir):
-            notify(f'Creating --output-dir {args.output_dir}')
+            notify(f"Creating --output-dir {args.output_dir}")
             os.mkdir(args.output_dir)
 
     progress = sourmash_args.SignatureLoadingProgress()
-    loader = sourmash_args.load_many_signatures(args.signatures,
-                                                ksize=args.ksize,
-                                                moltype=moltype,
-                                                picklist=picklist,
-                                                progress=progress,
-                                                yield_all_files=args.force,
-                                                force=args.force)
+    loader = sourmash_args.load_many_signatures(
+        args.signatures,
+        ksize=args.ksize,
+        moltype=moltype,
+        picklist=picklist,
+        progress=progress,
+        yield_all_files=args.force,
+        force=args.force,
+    )
 
     for sig, sigloc in loader:
         # save each file individually --
         md5sum = sig.md5sum()[:8]
         minhash = sig.minhash
         basename = os.path.basename(sig.filename)
-        if not basename or basename == '-':
-            basename = 'none'
-
-        params = dict(basename=basename,
-                      md5sum=md5sum,
-                      scaled=minhash.scaled,
-                      ksize=minhash.ksize,
-                      num=minhash.num,
-                      moltype=minhash.moltype)
+        if not basename or basename == "-":
+            basename = "none"
+
+        params = dict(
+            basename=basename,
+            md5sum=md5sum,
+            scaled=minhash.scaled,
+            ksize=minhash.ksize,
+            num=minhash.num,
+            moltype=minhash.moltype,
+        )
 
         if minhash.scaled:
             output_template = output_scaled_template
-        else: # num
+        else:  # num
             assert minhash.num
             output_template = output_num_template
 
         # figure out if this is duplicate, build unique filename
         n = 0
-        params['dup'] = n
+        params["dup"] = n
         output_name = output_template.format(**params)
         while output_name in output_names:
-            params['dup'] = n
+            params["dup"] = n
             output_name = output_template.format(**params)
             n += 1
 
@@ -218,9 +241,9 @@ def split(args):
         # save!
         with sourmash_args.SaveSignaturesToLocation(output_name) as save_sigs:
             save_sigs.add(sig)
-            notify(f'writing sig to {output_name}')
+            notify(f"writing sig to {output_name}")
 
-    notify(f'loaded and split {len(progress)} signatures total.')
+    notify(f"loaded and split {len(progress)} signatures total.")
     if picklist:
         sourmash_args.report_picklist(args, picklist)
 
@@ -242,24 +265,39 @@ def describe(args):
         csv_obj = sourmash_args.FileOutputCSV(args.csv)
         csv_fp = csv_obj.open()
 
-        w = csv.DictWriter(csv_fp,
-                           ['signature_file', 'md5', 'ksize', 'moltype',
-                            'num', 'scaled', 'n_hashes', 'seed',
-                            'with_abundance', 'name', 'filename', 'license',
-                            'sum_hashes'],
-                           extrasaction='ignore')
+        w = csv.DictWriter(
+            csv_fp,
+            [
+                "signature_file",
+                "md5",
+                "ksize",
+                "moltype",
+                "num",
+                "scaled",
+                "n_hashes",
+                "seed",
+                "with_abundance",
+                "name",
+                "filename",
+                "license",
+                "sum_hashes",
+            ],
+            extrasaction="ignore",
+        )
         w.writeheader()
 
     # start loading!
     progress = sourmash_args.SignatureLoadingProgress()
-    loader = sourmash_args.load_many_signatures(args.signatures,
-                                                ksize=args.ksize,
-                                                moltype=moltype,
-                                                picklist=picklist,
-                                                progress=progress,
-                                                yield_all_files=args.force,
-                                                force=args.force,
-                                                pattern=pattern_search)
+    loader = sourmash_args.load_many_signatures(
+        args.signatures,
+        ksize=args.ksize,
+        moltype=moltype,
+        picklist=picklist,
+        progress=progress,
+        yield_all_files=args.force,
+        force=args.force,
+        pattern=pattern_search,
+    )
 
     for sig, location in loader:
         # extract info, write as appropriate.
@@ -285,7 +323,8 @@ def describe(args):
         if w:
             w.writerow(locals())
 
-        print_results('''\
+        print_results(
+            """\
 ---
 signature filename: {location}
 signature: {p_name}
@@ -295,7 +334,9 @@ def describe(args):
 size: {n_hashes}
 sum hashes: {sum_hashes}
 signature license: {license}
-''', **locals())
+""",
+            **locals(),
+        )
 
     if csv_obj:
         csv_obj.close()
@@ -311,9 +352,10 @@ def manifest(args):
     set_quiet(args.quiet, args.debug)
 
     try:
-        loader = sourmash_args.load_file_as_index(args.location,
-                                                  yield_all_files=args.force)
-    except ValueError as exc:
+        loader = sourmash_args.load_file_as_index(
+            args.location, yield_all_files=args.force
+        )
+    except ValueError:
         error(f"Cannot open '{args.location}' as a sourmash signature collection.")
         error("Use -d/--debug for details.")
         sys.exit(-1)
@@ -325,12 +367,11 @@ def manifest(args):
     else:
         debug("sig manifest: forcing rebuild.")
 
-    manifest = sourmash_args.get_manifest(loader, require=True,
-                                          rebuild=rebuild)
+    manifest = sourmash_args.get_manifest(loader, require=True, rebuild=rebuild)
 
-    manifest.write_to_filename(args.output,
-                               database_format=args.manifest_format,
-                               ok_if_exists=args.force)
+    manifest.write_to_filename(
+        args.output, database_format=args.manifest_format, ok_if_exists=args.force
+    )
     notify(f"manifest contains {len(manifest)} signatures total.")
     notify(f"wrote manifest to '{args.output}' ({args.manifest_format})")
 
@@ -343,12 +384,14 @@ def overlap(args):
 
     moltype = sourmash_args.calculate_moltype(args)
 
-    sig1 = sourmash.load_one_signature(args.signature1, ksize=args.ksize,
-                                       select_moltype=moltype)
-    sig2 = sourmash.load_one_signature(args.signature2, ksize=args.ksize,
-                                       select_moltype=moltype)
+    sig1 = sourmash.load_one_signature(
+        args.signature1, ksize=args.ksize, select_moltype=moltype
+    )
+    sig2 = sourmash.load_one_signature(
+        args.signature2, ksize=args.ksize, select_moltype=moltype
+    )
 
-    notify(f'loaded one signature each from {args.signature1} and {args.signature2}')
+    notify(f"loaded one signature each from {args.signature1} and {args.signature2}")
 
     try:
         similarity = sig1.similarity(sig2)
@@ -384,7 +427,8 @@ def overlap(args):
     disjoint_2 = len(hashes_2 - hashes_1)
     num_union = len(hashes_1.union(hashes_2))
 
-    print('''\
+    print(
+        """\
 first signature:
   signature filename: {sig1_file}
   signature: {name1}
@@ -408,7 +452,8 @@ def overlap(args):
 only in first:               {disjoint_1}
 only in second:              {disjoint_2}
 total (union):               {num_union}
-'''.format(**locals()))
+""".format(**locals())
+    )
 
 
 def merge(args):
@@ -425,13 +470,15 @@ def merge(args):
 
     # start loading!
     progress = sourmash_args.SignatureLoadingProgress()
-    loader = sourmash_args.load_many_signatures(args.signatures,
-                                                ksize=args.ksize,
-                                                moltype=moltype,
-                                                picklist=picklist,
-                                                progress=progress,
-                                                yield_all_files=args.force,
-                                                force=args.force)
+    loader = sourmash_args.load_many_signatures(
+        args.signatures,
+        ksize=args.ksize,
+        moltype=moltype,
+        picklist=picklist,
+        progress=progress,
+        yield_all_files=args.force,
+        force=args.force,
+    )
 
     for sigobj, sigloc in loader:
         # first signature? initialize a bunch of stuff
@@ -452,8 +499,12 @@ def merge(args):
 
             mh.merge(sigobj_mh)
         except (TypeError, ValueError) as exc:
-            error("ERROR when merging signature '{}' ({}) from file {}",
-                  sigobj, sigobj.md5sum()[:8], sigloc)
+            error(
+                "ERROR when merging signature '{}' ({}) from file {}",
+                sigobj,
+                sigobj.md5sum()[:8],
+                sigloc,
+            )
             error(str(exc))
             sys.exit(-1)
 
@@ -466,7 +517,7 @@ def merge(args):
     with sourmash_args.SaveSignaturesToLocation(args.output) as save_sigs:
         save_sigs.add(merged_sigobj)
 
-    notify(f'loaded and merged {len(progress)} signatures')
+    notify(f"loaded and merged {len(progress)} signatures")
 
     if picklist:
         sourmash_args.report_picklist(args, picklist)
@@ -488,13 +539,15 @@ def intersect(args):
 
     # start loading!
     progress = sourmash_args.SignatureLoadingProgress()
-    loader = sourmash_args.load_many_signatures(args.signatures,
-                                                ksize=args.ksize,
-                                                moltype=moltype,
-                                                picklist=picklist,
-                                                progress=progress,
-                                                yield_all_files=args.force,
-                                                force=args.force)
+    loader = sourmash_args.load_many_signatures(
+        args.signatures,
+        ksize=args.ksize,
+        moltype=moltype,
+        picklist=picklist,
+        progress=progress,
+        yield_all_files=args.force,
+        force=args.force,
+    )
 
     for sigobj, sigloc in loader:
         if first_sig is None:
@@ -519,10 +572,10 @@ def intersect(args):
 
     # borrow abundances from a signature?
     if args.abundances_from:
-        notify(f'loading signature from {args.abundances_from}, keeping abundances')
-        abund_sig = sourmash.load_one_signature(args.abundances_from,
-                                                ksize=args.ksize,
-                                                select_moltype=moltype)
+        notify(f"loading signature from {args.abundances_from}, keeping abundances")
+        abund_sig = sourmash.load_one_signature(
+            args.abundances_from, ksize=args.ksize, select_moltype=moltype
+        )
         if not abund_sig.minhash.track_abundance:
             error("--track-abundance not set on loaded signature?! exiting.")
             sys.exit(-1)
@@ -533,7 +586,7 @@ def intersect(args):
     with sourmash_args.SaveSignaturesToLocation(args.output) as save_sigs:
         save_sigs.add(intersect_sigobj)
 
-    notify(f'loaded and intersected {len(progress)} signatures')
+    notify(f"loaded and intersected {len(progress)} signatures")
     if picklist:
         sourmash_args.report_picklist(args, picklist)
 
@@ -546,9 +599,9 @@ def inflate(args):
     moltype = sourmash_args.calculate_moltype(args)
     picklist = sourmash_args.load_picklist(args)
 
-    inflate_sig = sourmash_args.load_query_signature(args.signature_from,
-                                                     ksize=args.ksize,
-                                                     select_moltype=moltype)
+    inflate_sig = sourmash_args.load_query_signature(
+        args.signature_from, ksize=args.ksize, select_moltype=moltype
+    )
     inflate_from_mh = inflate_sig.minhash
     ksize = inflate_from_mh.ksize
     moltype = inflate_from_mh.moltype
@@ -560,19 +613,20 @@ def inflate(args):
 
     # start loading!
     progress = sourmash_args.SignatureLoadingProgress()
-    loader = sourmash_args.load_many_signatures(args.other_sigs,
-                                                ksize=ksize,
-                                                moltype=moltype,
-                                                picklist=picklist,
-                                                progress=progress,
-                                                yield_all_files=args.force,
-                                                force=args.force)
+    loader = sourmash_args.load_many_signatures(
+        args.other_sigs,
+        ksize=ksize,
+        moltype=moltype,
+        picklist=picklist,
+        progress=progress,
+        yield_all_files=args.force,
+        force=args.force,
+    )
 
     with sourmash_args.SaveSignaturesToLocation(args.output) as save_sigs:
         for sigobj, sigloc in loader:
             inflated_mh = sigobj.minhash.inflate(inflate_from_mh)
-            inflated_sigobj = sourmash.SourmashSignature(inflated_mh,
-                                                         name=sigobj.name)
+            inflated_sigobj = sourmash.SourmashSignature(inflated_mh, name=sigobj.name)
 
             save_sigs.add(inflated_sigobj)
 
@@ -580,7 +634,7 @@ def inflate(args):
         error("no signatures to inflate!?")
         sys.exit(-1)
 
-    notify(f'loaded and intersected {len(save_sigs)} signatures')
+    notify(f"loaded and intersected {len(save_sigs)} signatures")
     if picklist:
         sourmash_args.report_picklist(args, picklist)
 
@@ -593,38 +647,41 @@ def subtract(args):
     moltype = sourmash_args.calculate_moltype(args)
 
     from_sigfile = args.signature_from
-    from_sigobj = sourmash.load_one_signature(from_sigfile, ksize=args.ksize, select_moltype=moltype)
+    from_sigobj = sourmash.load_one_signature(
+        from_sigfile, ksize=args.ksize, select_moltype=moltype
+    )
 
-    if args.abundances_from:    # it's ok to work with abund signatures if -A.
+    if args.abundances_from:  # it's ok to work with abund signatures if -A.
         args.flatten = True
 
     from_mh = from_sigobj.minhash
     if from_mh.track_abundance and not args.flatten:
-        error('Cannot use subtract on signatures with abundance tracking, sorry!')
+        error("Cannot use subtract on signatures with abundance tracking, sorry!")
         sys.exit(1)
 
     subtract_mins = set(from_mh.hashes)
 
-    notify(f'loaded signature from {from_sigfile}...', end='\r')
+    notify(f"loaded signature from {from_sigfile}...", end="\r")
 
     progress = sourmash_args.SignatureLoadingProgress()
 
     for sigfile in args.subtraction_sigs:
-        for sigobj in sourmash_args.load_file_as_signatures(sigfile,
-                                                        ksize=args.ksize,
-                                                        select_moltype=moltype,
-                                                        progress=progress):
+        for sigobj in sourmash_args.load_file_as_signatures(
+            sigfile, ksize=args.ksize, select_moltype=moltype, progress=progress
+        ):
             if not sigobj.minhash.is_compatible(from_mh):
                 error("incompatible minhashes; specify -k and/or molecule type.")
                 sys.exit(-1)
 
             if sigobj.minhash.track_abundance and not args.flatten:
-                error('Cannot use subtract on signatures with abundance tracking, sorry!')
+                error(
+                    "Cannot use subtract on signatures with abundance tracking, sorry!"
+                )
                 sys.exit(1)
 
             subtract_mins -= set(sigobj.minhash.hashes)
 
-            notify(f'loaded and subtracted signatures from {sigfile}...', end='\r')
+            notify(f"loaded and subtracted signatures from {sigfile}...", end="\r")
 
     if not len(progress):
         error("no signatures to subtract!?")
@@ -636,10 +693,10 @@ def subtract(args):
 
     # borrow abundances from somewhere?
     if args.abundances_from:
-        notify(f'loading signature from {args.abundances_from}, keeping abundances')
-        abund_sig = sourmash.load_one_signature(args.abundances_from,
-                                                ksize=args.ksize,
-                                                select_moltype=moltype)
+        notify(f"loading signature from {args.abundances_from}, keeping abundances")
+        abund_sig = sourmash.load_one_signature(
+            args.abundances_from, ksize=args.ksize, select_moltype=moltype
+        )
         if not abund_sig.minhash.track_abundance:
             error("--track-abundance not set on loaded signature?! exiting.")
             sys.exit(-1)
@@ -651,7 +708,7 @@ def subtract(args):
     with sourmash_args.SaveSignaturesToLocation(args.output) as save_sigs:
         save_sigs.add(subtract_sigobj)
 
-    notify(f'loaded and subtracted {len(progress)} signatures')
+    notify(f"loaded and subtracted {len(progress)} signatures")
 
 
 def rename(args):
@@ -669,14 +726,16 @@ def rename(args):
 
     # start loading!
     progress = sourmash_args.SignatureLoadingProgress()
-    loader = sourmash_args.load_many_signatures(args.signatures,
-                                                ksize=args.ksize,
-                                                moltype=moltype,
-                                                picklist=picklist,
-                                                progress=progress,
-                                                yield_all_files=args.force,
-                                                force=args.force,
-                                                pattern=pattern_search)
+    loader = sourmash_args.load_many_signatures(
+        args.signatures,
+        ksize=args.ksize,
+        moltype=moltype,
+        picklist=picklist,
+        progress=progress,
+        yield_all_files=args.force,
+        force=args.force,
+        pattern=pattern_search,
+    )
 
     for sigobj, sigloc in loader:
         sigobj = sigobj.to_mutable()
@@ -703,14 +762,15 @@ def extract(args):
     # further filtering on md5 or name?
     filter_fn = None
     if args.md5 is not None or args.name is not None:
+
         def filter_fn(row):
             # match?
             keep = False
             if args.name:
-                name = row['name'] or row['filename']
+                name = row["name"] or row["filename"]
                 if args.name in name:
                     keep = True
-            if args.md5 and args.md5 in row['md5']:
+            if args.md5 and args.md5 in row["md5"]:
                 keep = True
 
             return keep
@@ -722,13 +782,11 @@ def filter_fn(row):
     # start loading!
     total_rows_examined = 0
     for filename in args.signatures:
-        idx = sourmash_args.load_file_as_index(filename,
-                                               yield_all_files=args.force)
+        idx = sourmash_args.load_file_as_index(filename, yield_all_files=args.force)
 
         idx = idx.select(ksize=args.ksize, moltype=moltype)
 
-        idx = sourmash_args.apply_picklist_and_pattern(idx, picklist,
-                                                       pattern_search)
+        idx = sourmash_args.apply_picklist_and_pattern(idx, picklist, pattern_search)
 
         manifest = sourmash_args.get_manifest(idx)
         total_rows_examined += len(manifest)
@@ -743,7 +801,9 @@ def filter_fn(row):
             try:
                 idx = idx.select(picklist=sub_picklist)
             except ValueError:
-                error("** This input collection doesn't support 'extract' with picklists or patterns.")
+                error(
+                    "** This input collection doesn't support 'extract' with picklists or patterns."
+                )
                 error("** EXITING.")
                 error("**")
                 error("** You can use 'sourmash sig cat' with a picklist or pattern,")
@@ -779,31 +839,29 @@ def filter(args):
     save_sigs.open()
 
     for filename in args.signatures:
-        siglist = sourmash_args.load_file_as_signatures(filename,
-                                                        ksize=args.ksize,
-                                                        select_moltype=moltype,
-                                                        progress=progress)
+        siglist = sourmash_args.load_file_as_signatures(
+            filename, ksize=args.ksize, select_moltype=moltype, progress=progress
+        )
         siglist = list(siglist)
 
         # select!
         if args.md5 is not None:
-            siglist = [ ss for ss in siglist if args.md5 in ss.md5sum() ]
+            siglist = [ss for ss in siglist if args.md5 in ss.md5sum()]
         if args.name is not None:
-            siglist = [ ss for ss in siglist if args.name in str(ss) ]
+            siglist = [ss for ss in siglist if args.name in str(ss)]
 
         for ss in siglist:
             mh = ss.minhash
             if not mh.track_abundance:
-                notify(f'ignoring signature {ss} - track_abundance not set.')
+                notify(f"ignoring signature {ss} - track_abundance not set.")
                 continue
 
             abunds = mh.hashes
             abunds2 = {}
             for k, v in abunds.items():
                 if v >= args.min_abundance:
-                    if args.max_abundance is None or \
-                       v <= args.max_abundance:
-                       abunds2[k] = v
+                    if args.max_abundance is None or v <= args.max_abundance:
+                        abunds2[k] = v
 
             filtered_mh = mh.copy_and_clear()
             filtered_mh.set_abundances(abunds2)
@@ -833,22 +891,24 @@ def flatten(args):
 
     # start loading!
     progress = sourmash_args.SignatureLoadingProgress()
-    loader = sourmash_args.load_many_signatures(args.signatures,
-                                                ksize=args.ksize,
-                                                moltype=moltype,
-                                                picklist=picklist,
-                                                progress=progress,
-                                                yield_all_files=args.force,
-                                                force=args.force)
+    loader = sourmash_args.load_many_signatures(
+        args.signatures,
+        ksize=args.ksize,
+        moltype=moltype,
+        picklist=picklist,
+        progress=progress,
+        yield_all_files=args.force,
+        force=args.force,
+    )
     for ss, sigloc in loader:
         # select!
         if args.md5 is not None:
             if args.md5 not in ss.md5sum():
-                continue        #  skip
+                continue  #  skip
 
         if args.name is not None:
             if args.name not in ss.name:
-                continue        # skip
+                continue  # skip
 
         ss = ss.to_mutable()
         ss.minhash = ss.minhash.flatten()
@@ -872,11 +932,11 @@ def downsample(args):
     _extend_signatures_with_from_file(args)
 
     if not args.num_hashes and not args.scaled:
-        error('ERROR: must specify either --num or --scaled value')
+        error("ERROR: must specify either --num or --scaled value")
         sys.exit(-1)
 
     if args.num_hashes and args.scaled:
-        error('ERROR: cannot specify both --num and --scaled')
+        error("ERROR: cannot specify both --num and --scaled")
         sys.exit(-1)
 
     # open output for saving sigs
@@ -885,13 +945,15 @@ def downsample(args):
 
     # start loading!
     progress = sourmash_args.SignatureLoadingProgress()
-    loader = sourmash_args.load_many_signatures(args.signatures,
-                                                ksize=args.ksize,
-                                                moltype=moltype,
-                                                picklist=picklist,
-                                                progress=progress,
-                                                yield_all_files=args.force,
-                                                force=args.force)
+    loader = sourmash_args.load_many_signatures(
+        args.signatures,
+        ksize=args.ksize,
+        moltype=moltype,
+        picklist=picklist,
+        progress=progress,
+        yield_all_files=args.force,
+        force=args.force,
+    )
     for ss, sigloc in loader:
         sigobj = ss.to_mutable()
         mh = sigobj.minhash
@@ -906,7 +968,9 @@ def downsample(args):
                 max_hash = _get_max_hash_for_scaled(args.scaled)
                 mins = mh.hashes
                 if max(mins) < max_hash:
-                    raise ValueError("this num MinHash does not have enough hashes to convert it into a scaled MinHash.")
+                    raise ValueError(
+                        "this num MinHash does not have enough hashes to convert it into a scaled MinHash."
+                    )
 
                 mh_new = mh.copy()
                 _set_num_scaled(mh_new, 0, args.scaled)
@@ -923,7 +987,6 @@ def downsample(args):
                 mh_new = mh.copy()
                 _set_num_scaled(mh_new, args.num_hashes, 0)
 
-
         sigobj.minhash = mh_new
         save_sigs.add(sigobj)
 
@@ -944,7 +1007,7 @@ def ingest(args):
     siglist = []
     if args.csv:
         for filename in args.filenames:
-            with open(filename, newline='') as csv_fp:
+            with open(filename, newline="") as csv_fp:
                 reader = csv.reader(csv_fp)
                 siglist = []
                 for row in reader:
@@ -952,34 +1015,34 @@ def ingest(args):
                     hashseed = int(row[1])
 
                     # only support a limited import type, for now ;)
-                    assert hashfn == 'murmur64'
+                    assert hashfn == "murmur64"
                     assert hashseed == 42
 
                     _, _, ksize, name, hashes = row
                     ksize = int(ksize)
 
                     hashes = hashes.strip()
-                    hashes = list(map(int, hashes.split(' ' )))
+                    hashes = list(map(int, hashes.split(" ")))
 
                     e = sourmash.MinHash(len(hashes), ksize)
                     e.add_many(hashes)
                     s = sourmash.SourmashSignature(e, filename=name)
                     siglist.append(s)
-                    notify(f'loaded signature: {name} {s.md5sum()[:8]}')
+                    notify(f"loaded signature: {name} {s.md5sum()[:8]}")
     else:
         for filename in args.filenames:
             with open(filename) as fp:
                 x = json.loads(fp.read())
 
-            ksize = x['kmer']
-            num = x['sketchSize']
+            ksize = x["kmer"]
+            num = x["sketchSize"]
 
-            assert x['hashType'] == "MurmurHash3_x64_128"
-            assert x['hashBits'] == 64
-            assert x['hashSeed'] == 42
+            assert x["hashType"] == "MurmurHash3_x64_128"
+            assert x["hashBits"] == 64
+            assert x["hashSeed"] == 42
 
-            xx = x['sketches'][0]
-            hashes = xx['hashes']
+            xx = x["sketches"][0]
+            hashes = xx["hashes"]
 
             mh = sourmash.MinHash(ksize=ksize, n=num, is_protein=False)
             mh.add_many(hashes)
@@ -987,7 +1050,7 @@ def ingest(args):
             s = sourmash.SourmashSignature(mh, filename=filename)
             siglist.append(s)
 
-    notify(f'saving {len(siglist)} signatures to JSON')
+    notify(f"saving {len(siglist)} signatures to JSON")
     with sourmash_args.SaveSignaturesToLocation(args.output) as save_sigs:
         save_sigs.add_many(siglist)
 
@@ -999,24 +1062,23 @@ def export(args):
     set_quiet(args.quiet)
     moltype = sourmash_args.calculate_moltype(args)
 
-    query = sourmash_args.load_query_signature(args.filename,
-                                               ksize=args.ksize,
-                                               select_moltype=moltype,
-                                               select_md5=args.md5)
+    query = sourmash_args.load_query_signature(
+        args.filename, ksize=args.ksize, select_moltype=moltype, select_md5=args.md5
+    )
     mh = query.minhash
 
     x = {}
-    x['kmer'] = mh.ksize
-    x['sketchSize'] = len(mh)
+    x["kmer"] = mh.ksize
+    x["sketchSize"] = len(mh)
 
-    x['hashType'] = "MurmurHash3_x64_128"
-    x['hashBits'] = 64
-    x['hashSeed'] = mh.seed
+    x["hashType"] = "MurmurHash3_x64_128"
+    x["hashBits"] = 64
+    x["hashSeed"] = mh.seed
 
     ll = list(mh.hashes)
-    x['sketches'] = [{ 'hashes': ll }]
+    x["sketches"] = [{"hashes": ll}]
 
-    with FileOutput(args.output, 'wt') as fp:
+    with FileOutput(args.output, "wt") as fp:
         print(json.dumps(x), file=fp)
     notify(f"exported signature {query} ({query.md5sum()[:8]})")
 
@@ -1035,16 +1097,17 @@ def kmers(args):
     first_sig = None
     query_mh = None
 
-
     # start loading!
     progress = sourmash_args.SignatureLoadingProgress()
-    loader = sourmash_args.load_many_signatures(args.signatures,
-                                                ksize=args.ksize,
-                                                moltype=moltype,
-                                                picklist=picklist,
-                                                progress=progress,
-                                                yield_all_files=args.force,
-                                                force=args.force)
+    loader = sourmash_args.load_many_signatures(
+        args.signatures,
+        ksize=args.ksize,
+        moltype=moltype,
+        picklist=picklist,
+        progress=progress,
+        yield_all_files=args.force,
+        force=args.force,
+    )
 
     for sigobj, sigloc in loader:
         # first signature? initialize a bunch of stuff
@@ -1061,8 +1124,12 @@ def kmers(args):
 
             query_mh.merge(sigobj_mh)
         except (TypeError, ValueError) as exc:
-            error("ERROR when merging signature '{}' ({}) from file {}",
-                  sigobj, sigobj.md5sum()[:8], sigloc)
+            error(
+                "ERROR when merging signature '{}' ({}) from file {}",
+                sigobj,
+                sigobj.md5sum()[:8],
+                sigloc,
+            )
             error(str(exc))
             sys.exit(-1)
 
@@ -1075,13 +1142,13 @@ def kmers(args):
         sourmash_args.report_picklist(args, picklist)
 
     is_protein = False
-    if query_mh.moltype == 'DNA':
+    if query_mh.moltype == "DNA":
         if args.translate:
             error("ERROR: cannot use --translate with DNA sketches.")
             sys.exit(-1)
     else:
         is_protein = True
-        if args.translate:      # input sequence is DNA
+        if args.translate:  # input sequence is DNA
             is_protein = False
 
     if not query_mh:
@@ -1089,8 +1156,10 @@ def kmers(args):
         sys.exit(-1)
 
     notify("")
-    notify(f"merged signature has the following properties:")
-    notify(f"k={query_mh.ksize} molecule={query_mh.moltype} num={query_mh.num} scaled={query_mh.scaled} seed={query_mh.seed}")
+    notify("merged signature has the following properties:")
+    notify(
+        f"k={query_mh.ksize} molecule={query_mh.moltype} num={query_mh.num} scaled={query_mh.scaled} seed={query_mh.seed}"
+    )
     notify(f"total hashes in merged signature: {len(query_mh)}")
     notify("")
     notify("now processing sequence files for matches!")
@@ -1103,11 +1172,10 @@ def kmers(args):
     if args.save_kmers:
         save_kmers = sourmash_args.FileOutputCSV(args.save_kmers)
         save_kmers.open()
-        kmer_w = csv.DictWriter(save_kmers.fp,
-                                fieldnames=['sequence_file',
-                                            'sequence_name',
-                                            'kmer',
-                                            'hashval'])
+        kmer_w = csv.DictWriter(
+            save_kmers.fp,
+            fieldnames=["sequence_file", "sequence_name", "kmer", "hashval"],
+        )
         kmer_w.writeheader()
 
     save_seqs = None
@@ -1117,7 +1185,7 @@ def kmers(args):
 
     # figure out protein vs dna
     is_protein = False
-    if query_mh.moltype != 'DNA':
+    if query_mh.moltype != "DNA":
         if not args.translate:
             is_protein = True
 
@@ -1143,12 +1211,11 @@ def kmers(args):
                     seq_mh.add_protein(record.sequence)
                 else:
                     try:
-                        seq_mh.add_sequence(record.sequence,
-                                            not args.check_sequence)
+                        seq_mh.add_sequence(record.sequence, not args.check_sequence)
                     except ValueError as exc:
                         seqname = record.name
                         if len(seqname) > 40:
-                            seqname = seqname[:37] + '...'
+                            seqname = seqname[:37] + "..."
                         notify(f"ERROR in sequence '{seqname}', file '{filename}'")
                         notify(str(exc))
                         if args.force:
@@ -1169,15 +1236,19 @@ def kmers(args):
                     # output matching k-mers:
                     if kmer_w:
                         seq = record.sequence
-                        kh_iter = seq_mh.kmers_and_hashes(seq, force=False,
-                                                          is_protein=is_protein)
+                        kh_iter = seq_mh.kmers_and_hashes(
+                            seq, force=False, is_protein=is_protein
+                        )
                         for kmer, hashval in kh_iter:
                             if hashval in query_mh.hashes:
                                 found_mh.add_hash(hashval)
                                 n_kmers_found += 1
-                                d = dict(sequence_file=filename,
-                                         sequence_name=record.name,
-                                         kmer=kmer, hashval=hashval)
+                                d = dict(
+                                    sequence_file=filename,
+                                    sequence_name=record.name,
+                                    kmer=kmer,
+                                    hashval=hashval,
+                                )
                                 kmer_w.writerow(d)
 
                     # add seq_mh to found_mh
@@ -1188,7 +1259,9 @@ def kmers(args):
                 n_bp_searched += len(record.sequence)
 
                 if n_bp_searched >= progress_threshold:
-                    notify(f"... searched {n_bp_searched} from {n_files_searched} files so far")
+                    notify(
+                        f"... searched {n_bp_searched} from {n_files_searched} files so far"
+                    )
                     while n_bp_searched >= progress_threshold:
                         progress_threshold += progress_interval
 
@@ -1205,10 +1278,14 @@ def kmers(args):
 
     # ...and report!
     notify("DONE.")
-    notify(f"searched {n_sequences_searched} sequences from {n_files_searched} files, containing a total of {format_bp(n_bp_searched)}.")
+    notify(
+        f"searched {n_sequences_searched} sequences from {n_files_searched} files, containing a total of {format_bp(n_bp_searched)}."
+    )
 
     if save_seqs:
-        notify(f"matched and saved a total of {n_sequences_found} sequences with {format_bp(n_bp_saved)}.")
+        notify(
+            f"matched and saved a total of {n_sequences_found} sequences with {format_bp(n_bp_saved)}."
+        )
 
     if kmer_w:
         notify(f"matched and saved a total of {n_kmers_found} k-mers.")
@@ -1226,7 +1303,7 @@ def kmers(args):
         notify("NOTE: see --save-kmers or --save-sequences for output options.")
 
 
-_SketchInfo = namedtuple('_SketchInfo', 'ksize, moltype, scaled, num, abund')
+_SketchInfo = namedtuple("_SketchInfo", "ksize, moltype, scaled, num, abund")
 
 
 def _summarize_manifest(manifest):
@@ -1237,22 +1314,26 @@ def _summarize_manifest(manifest):
     counter = Counter()
     hashcounts = Counter()
     for row in manifest.rows:
-        ski = _SketchInfo(ksize=row['ksize'], moltype=row['moltype'],
-                          scaled=row['scaled'], num=row['num'],
-                          abund=row['with_abundance'])
+        ski = _SketchInfo(
+            ksize=row["ksize"],
+            moltype=row["moltype"],
+            scaled=row["scaled"],
+            num=row["num"],
+            abund=row["with_abundance"],
+        )
         counter[ski] += 1
-        hashcounts[ski] += row['n_hashes']
-        total_size += row['n_hashes']
+        hashcounts[ski] += row["n_hashes"]
+        total_size += row["n_hashes"]
 
     # store in info_d
-    info_d['total_hashes'] = total_size
+    info_d["total_hashes"] = total_size
     sketch_info = []
     for ski, count in counter.items():
         sketch_d = dict(ski._asdict())
-        sketch_d['count'] = count
-        sketch_d['n_hashes'] = hashcounts[ski]
+        sketch_d["count"] = count
+        sketch_d["n_hashes"] = hashcounts[ski]
         sketch_info.append(sketch_d)
-    info_d['sketch_info'] = sketch_info
+    info_d["sketch_info"] = sketch_info
 
     return info_d
 
@@ -1271,22 +1352,21 @@ def fileinfo(args):
     # load as index!
     try:
         notify(f"** loading from '{args.path}'")
-        idx = sourmash_args.load_file_as_index(args.path,
-                                               yield_all_files=args.force)
+        idx = sourmash_args.load_file_as_index(args.path, yield_all_files=args.force)
     except ValueError:
         error(f"Cannot open '{args.path}' as a sourmash signature collection.")
         error("Use -d/--debug for details.")
         sys.exit(-1)
 
-    print_bool = lambda x: "yes" if x else "no"
-    print_none = lambda x: "n/a" if x is None else x
+    def print_bool(x):
+        return "yes" if x else "no"
 
     info_d = {}
-    info_d['path_filetype'] = type(idx).__name__
-    info_d['location'] = "" if not idx.location else idx.location
-    info_d['is_database'] = bool(idx.is_database)
-    info_d['has_manifest'] = bool(idx.manifest)
-    info_d['num_sketches'] = len(idx)
+    info_d["path_filetype"] = type(idx).__name__
+    info_d["location"] = "" if not idx.location else idx.location
+    info_d["is_database"] = bool(idx.is_database)
+    info_d["has_manifest"] = bool(idx.manifest)
+    info_d["num_sketches"] = len(idx)
 
     if text_out:
         print_results(f"path filetype: {info_d['path_filetype']}")
@@ -1298,8 +1378,9 @@ def fileinfo(args):
     # also have arg to fileinfo to force recalculation
     notify("** examining manifest...")
 
-    manifest = sourmash_args.get_manifest(idx, rebuild=args.rebuild_manifest,
-                                          require=False)
+    manifest = sourmash_args.get_manifest(
+        idx, rebuild=args.rebuild_manifest, require=False
+    )
 
     if manifest is None:
         # actually can't find any file type to trigger this, but leaving it
@@ -1313,9 +1394,9 @@ def fileinfo(args):
         print_results(f"total hashes: {info_d['total_hashes']}")
         print_results("summary of sketches:")
 
-        for ski in info_d['sketch_info']:
-            mh_type = f"num={ski['num']}" if ski['num'] else f"scaled={ski['scaled']}"
-            mh_abund = ", abund" if ski['abund'] else ""
+        for ski in info_d["sketch_info"]:
+            mh_type = f"num={ski['num']}" if ski["num"] else f"scaled={ski['scaled']}"
+            mh_abund = ", abund" if ski["abund"] else ""
 
             sketch_str = f"{ski['count']} sketches with {ski['moltype']}, k={ski['ksize']}, {mh_type}{mh_abund}"
 
@@ -1331,10 +1412,11 @@ def check(args):
     check signature db(s) against a picklist.
     """
     from sourmash.picklist import PickStyle
+
     set_quiet(args.quiet, args.debug)
     moltype = sourmash_args.calculate_moltype(args)
     picklist = sourmash_args.load_picklist(args)
-    pattern_search = sourmash_args.load_include_exclude_db_patterns(args)
+    sourmash_args.load_include_exclude_db_patterns(args)
     _extend_signatures_with_from_file(args)
 
     if not picklist:
@@ -1358,8 +1440,7 @@ def check(args):
     # start loading!
     total_rows_examined = 0
     for filename in args.signatures:
-        idx = sourmash_args.load_file_as_index(filename,
-                                               yield_all_files=args.force)
+        idx = sourmash_args.load_file_as_index(filename, yield_all_files=args.force)
 
         idx = idx.select(ksize=args.ksize, moltype=moltype)
 
@@ -1376,14 +1457,15 @@ def check(args):
 
         # rewrite locations so that each signature can be found by filename
         # of its container; this follows `sig collect` logic.
-        rows = []
         for row in sub_manifest.rows:
-            row['internal_location'] = filename
+            row["internal_location"] = filename
             total_manifest_rows.add_row(row)
 
         # the len(sub_manifest) here should only be run when needed :)
         if _debug:
-            debug_literal(f"examined {len(new_manifest)} new rows, found {len(sub_manifest)} matching rows")
+            debug_literal(
+                f"examined {len(new_manifest)} new rows, found {len(sub_manifest)} matching rows"
+            )
 
     notify(f"loaded {total_rows_examined} signatures.")
 
@@ -1399,7 +1481,7 @@ def check(args):
         n_output = 0
 
         with sourmash_args.FileInputCSV(pickfile) as r:
-            with open(args.output_missing, "w", newline='') as outfp:
+            with open(args.output_missing, "w", newline="") as outfp:
                 w = csv.DictWriter(outfp, fieldnames=r.fieldnames)
                 w.writeheader()
 
@@ -1408,18 +1490,27 @@ def check(args):
                     if not picklist.matched_csv_row(row):
                         n_output += 1
                         w.writerow(row)
-        notify(f"saved {n_output} non-matching rows of {n_input} picklist rows to '{args.output_missing}'")
+        notify(
+            f"saved {n_output} non-matching rows of {n_input} picklist rows to '{args.output_missing}'"
+        )
     elif args.output_missing:
-        notify(f"(no remaining picklist entries; not saving to '{args.output_missing}')")
+        notify(
+            f"(no remaining picklist entries; not saving to '{args.output_missing}')"
+        )
 
     # save manifest of matching!
     if args.save_manifest_matching and total_manifest_rows:
         mf = total_manifest_rows
-        mf.write_to_filename(args.save_manifest_matching,
-                             database_format=args.manifest_format)
-        notify(f"wrote {len(mf)} matching manifest rows to '{args.save_manifest_matching}'")
+        mf.write_to_filename(
+            args.save_manifest_matching, database_format=args.manifest_format
+        )
+        notify(
+            f"wrote {len(mf)} matching manifest rows to '{args.save_manifest_matching}'"
+        )
     elif args.save_manifest_matching:
-        notify(f"(not saving matching manifest to '{args.save_manifest_matching}' because no matches)")
+        notify(
+            f"(not saving matching manifest to '{args.save_manifest_matching}' because no matches)"
+        )
 
     if args.fail_if_missing and n_missing:
         error("** ERROR: missing values, and --fail-if-missing requested. Exiting.")
@@ -1437,15 +1528,17 @@ def collect(args):
             pass
         else:
             error(f"ERROR: '{args.output}' already exists!")
-            error(f"ERROR: please remove it, or use --merge-previous to merge")
+            error("ERROR: please remove it, or use --merge-previous to merge")
             sys.exit(-1)
     elif args.merge_previous:
-        notify(f"WARNING: --merge-previous specified, but output file '{args.output}' does not already exist?")
+        notify(
+            f"WARNING: --merge-previous specified, but output file '{args.output}' does not already exist?"
+        )
 
     # load previous manifest for --merge-previous. This gets tricky with
     # mismatched manifest types, which we forbid.
     try:
-        if args.manifest_format == 'sql':
+        if args.manifest_format == "sql":
             # create on-disk manifest
             from sourmash.index.sqlite_index import SqliteCollectionManifest
 
@@ -1455,7 +1548,7 @@ def collect(args):
                 collected_mf = SqliteCollectionManifest.create(args.output)
         else:
             # create in-memory manifest that will be saved as CSV
-            assert args.manifest_format == 'csv'
+            assert args.manifest_format == "csv"
 
             if args.merge_previous and os.path.exists(args.output):
                 collected_mf = CollectionManifest.load_from_filename(args.output)
@@ -1465,7 +1558,9 @@ def collect(args):
             if not isinstance(collected_mf, CollectionManifest):
                 raise Exception
     except:
-        error(f"ERROR loading '{args.output}' with --merge-previous. Is it of type {args.manifest_format}?")
+        error(
+            f"ERROR loading '{args.output}' with --merge-previous. Is it of type {args.manifest_format}?"
+        )
         sys.exit(-1)
 
     if args.merge_previous:
@@ -1482,35 +1577,37 @@ def collect(args):
     n_files = 0
 
     # load from_file
-    _extend_signatures_with_from_file(args, target_attr='locations')
+    _extend_signatures_with_from_file(args, target_attr="locations")
 
     # convert to abspath
     if args.abspath:
-        args.locations = [ os.path.abspath(iloc) for iloc in args.locations ]
+        args.locations = [os.path.abspath(iloc) for iloc in args.locations]
 
     # iterate through, loading all the manifests from all the locations.
     for n_files, loc in enumerate(args.locations):
         notify(f"Loading signature information from {loc}.")
 
         if n_files % 100 == 0:
-            notify(f'... loaded {len(collected_mf)} sigs from {n_files} files')
+            notify(f"... loaded {len(collected_mf)} sigs from {n_files} files")
         idx = sourmash.load_file_as_index(loc)
         if idx.manifest is None and require_manifest:
             error(f"ERROR on location '{loc}'")
-            error(f"sig collect requires a manifest by default, but no manifest present.")
+            error(
+                "sig collect requires a manifest by default, but no manifest present."
+            )
             error("specify --no-require-manifest to dynamically generate one.")
             sys.exit(-1)
 
         mf = sourmash_args.get_manifest(idx)
 
-        rows = []
         for row in mf.rows:
-            row['internal_location'] = loc
+            row["internal_location"] = loc
             collected_mf.add_row(row)
 
-    if args.manifest_format == 'csv':
-        collected_mf.write_to_filename(args.output, database_format='csv',
-                                       ok_if_exists=args.merge_previous)
+    if args.manifest_format == "csv":
+        collected_mf.write_to_filename(
+            args.output, database_format="csv", ok_if_exists=args.merge_previous
+        )
     else:
         collected_mf.close()
 
@@ -1522,9 +1619,9 @@ def collect(args):
 def main(arglist=None):
     args = sourmash.cli.get_parser().parse_args(arglist)
     submod = getattr(sourmash.cli.sig, args.subcmd)
-    mainmethod = getattr(submod, 'main')
+    mainmethod = getattr(submod, "main")
     return mainmethod(args)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main(sys.argv)
diff --git a/src/sourmash/sig/grep.py b/src/sourmash/sig/grep.py
index e131ca501e..cfdc857779 100644
--- a/src/sourmash/sig/grep.py
+++ b/src/sourmash/sig/grep.py
@@ -28,9 +28,13 @@ def main(args):
         pattern = re.compile(pattern)
 
     if args.invert_match:
-        search_pattern = lambda vals: all(not pattern.search(val) for val in vals)
+
+        def search_pattern(vals):
+            return all(not pattern.search(val) for val in vals)
     else:
-        search_pattern = lambda vals: any(pattern.search(val) for val in vals)
+
+        def search_pattern(vals):
+            return any(pattern.search(val) for val in vals)
 
     # require manifests?
     require_manifest = True
@@ -63,28 +67,27 @@ def main(args):
     # start loading!
     total_rows_examined = 0
     for filename in args.signatures:
-        idx = sourmash_args.load_file_as_index(filename,
-                                               yield_all_files=args.force)
+        idx = sourmash_args.load_file_as_index(filename, yield_all_files=args.force)
 
-        idx = idx.select(ksize=args.ksize,
-                         moltype=moltype,
-                         picklist=picklist)
+        idx = idx.select(ksize=args.ksize, moltype=moltype, picklist=picklist)
 
         # get (and maybe generate) the manifest.
         manifest = idx.manifest
         if manifest is None:
             if require_manifest:
                 error(f"ERROR on filename '{filename}'.")
-                error("sig grep requires a manifest by default, but no manifest present.")
+                error(
+                    "sig grep requires a manifest by default, but no manifest present."
+                )
                 error("specify --no-require-manifest to dynamically generate one.")
                 sys.exit(-1)
             else:
-                manifest = sourmash_args.get_manifest(idx,
-                                                      require=False)
+                manifest = sourmash_args.get_manifest(idx, require=False)
 
         # find all matching rows.
-        sub_manifest = manifest.filter_on_columns(search_pattern,
-                                                  ["name", "filename", "md5"])
+        sub_manifest = manifest.filter_on_columns(
+            search_pattern, ["name", "filename", "md5"]
+        )
         total_rows_examined += len(manifest)
 
         # write out to CSV, if desired.
@@ -119,7 +122,9 @@ def main(args):
         notify(f"loaded {total_rows_examined} total that matched ksize & molecule type")
 
         if save_sigs:
-            notify(f"extracted {len(save_sigs)} signatures from {len(args.signatures)} file(s)")
+            notify(
+                f"extracted {len(save_sigs)} signatures from {len(args.signatures)} file(s)"
+            )
             save_sigs.close()
         else:
             error("no matching signatures found!")
diff --git a/src/sourmash/signature.py b/src/sourmash/signature.py
index 1fd34d35e6..3faa5e856b 100644
--- a/src/sourmash/signature.py
+++ b/src/sourmash/signature.py
@@ -40,12 +40,9 @@ def __init__(self, minhash, name="", filename=""):
 
         self.minhash = minhash
 
-
     @property
     def minhash(self):
-        return FrozenMinHash._from_objptr(
-            self._methodcall(lib.signature_first_mh)
-        )
+        return FrozenMinHash._from_objptr(self._methodcall(lib.signature_first_mh))
 
     @minhash.setter
     def minhash(self, value):
@@ -62,11 +59,11 @@ def __repr__(self):
         name = self.name
         md5pref = self.md5sum()[:8]
         if name == md5pref:
-            return "SourmashSignature({})".format(md5pref)
-        else: # name != md5pref:
-            return "SourmashSignature('{}', {})".format(name, md5pref)
+            return f"SourmashSignature({md5pref})"
+        else:  # name != md5pref:
+            return f"SourmashSignature('{name}', {md5pref})"
 
-    #def minhashes(self):
+    # def minhashes(self):
     #    size = ffi.new("uintptr_t *")
     #    mhs_ptr = self._methodcall(lib.signature_get_mhs, size)
     #    size = ffi.unpack(size, 1)[0]
@@ -134,40 +131,77 @@ def _display_name(self, max_length=0):
 
     def similarity(self, other, ignore_abundance=False, downsample=False):
         "Compute similarity with the other signature."
-        return self.minhash.similarity(other.minhash,
-                                       ignore_abundance=ignore_abundance,
-                                       downsample=downsample)
+        return self.minhash.similarity(
+            other.minhash, ignore_abundance=ignore_abundance, downsample=downsample
+        )
 
     def jaccard(self, other):
         "Compute Jaccard similarity with the other MinHash signature."
-        return self.minhash.similarity(other.minhash, ignore_abundance=True,
-                                       downsample=False)
+        return self.minhash.similarity(
+            other.minhash, ignore_abundance=True, downsample=False
+        )
 
-    def jaccard_ani(self, other, *, downsample=False, jaccard=None, prob_threshold=1e-3, err_threshold=1e-4):
+    def jaccard_ani(
+        self,
+        other,
+        *,
+        downsample=False,
+        jaccard=None,
+        prob_threshold=1e-3,
+        err_threshold=1e-4,
+    ):
         "Use jaccard to estimate ANI between two FracMinHash signatures."
-        return self.minhash.jaccard_ani(other.minhash, downsample=downsample,
-                                        jaccard=jaccard, prob_threshold=prob_threshold,
-                                        err_threshold=err_threshold)
+        return self.minhash.jaccard_ani(
+            other.minhash,
+            downsample=downsample,
+            jaccard=jaccard,
+            prob_threshold=prob_threshold,
+            err_threshold=err_threshold,
+        )
 
     def contained_by(self, other, downsample=False):
         "Compute containment by the other signature. Note: ignores abundance."
         return self.minhash.contained_by(other.minhash, downsample=downsample)
 
-    def containment_ani(self, other, *, downsample=False, containment=None, confidence=0.95, estimate_ci=False):
+    def containment_ani(
+        self,
+        other,
+        *,
+        downsample=False,
+        containment=None,
+        confidence=0.95,
+        estimate_ci=False,
+    ):
         "Use containment to estimate ANI between two FracMinHash signatures."
-        return self.minhash.containment_ani(other.minhash, downsample=downsample,
-                                        containment=containment, confidence=confidence,
-                                        estimate_ci=estimate_ci)
+        return self.minhash.containment_ani(
+            other.minhash,
+            downsample=downsample,
+            containment=containment,
+            confidence=confidence,
+            estimate_ci=estimate_ci,
+        )
 
     def max_containment(self, other, downsample=False):
         "Compute max containment w/other signature. Note: ignores abundance."
         return self.minhash.max_containment(other.minhash, downsample=downsample)
 
-    def max_containment_ani(self, other, *, downsample=False, max_containment=None, confidence=0.95, estimate_ci=False):
+    def max_containment_ani(
+        self,
+        other,
+        *,
+        downsample=False,
+        max_containment=None,
+        confidence=0.95,
+        estimate_ci=False,
+    ):
         "Use max containment to estimate ANI between two FracMinHash signatures."
-        return self.minhash.max_containment_ani(other.minhash, downsample=downsample,
-                                                max_containment=max_containment, confidence=confidence,
-                                                estimate_ci=estimate_ci)
+        return self.minhash.max_containment_ani(
+            other.minhash,
+            downsample=downsample,
+            max_containment=max_containment,
+            confidence=confidence,
+            estimate_ci=estimate_ci,
+        )
 
     def avg_containment(self, other, downsample=False):
         """
@@ -218,11 +252,7 @@ def __setstate__(self, tup):
     def __reduce__(self):
         return (
             SourmashSignature,
-            (
-                self.minhash,
-                self.name,
-                self.filename
-            ),
+            (self.minhash, self.name, self.filename),
         )
 
     def __copy__(self):
@@ -279,6 +309,7 @@ def add_protein(self, sequence):
 
     def __copy__(self):
         return self
+
     copy = __copy__
 
     def to_frozen(self):
@@ -325,7 +356,9 @@ def _detect_input_type(data):
      - Compressed memory buffers
      - filename
     """
-    if hasattr(data, 'read') or hasattr(data, "fileno") or hasattr(data, "mode"):  # file-like object
+    if (
+        hasattr(data, "read") or hasattr(data, "fileno") or hasattr(data, "mode")
+    ):  # file-like object
         return SigInput.FILE_LIKE
     elif hasattr(data, "find"):  # check if it is uncompressed sig
         try:
@@ -334,7 +367,7 @@ def _detect_input_type(data):
         except TypeError:
             if data.find(b"sourmash_signature") > 0:
                 return SigInput.BUFFER
-            elif data.startswith(b'\x1F\x8B'):  # gzip compressed
+            elif data.startswith(b"\x1F\x8B"):  # gzip compressed
                 return SigInput.BUFFER
 
     try:
@@ -347,7 +380,11 @@ def _detect_input_type(data):
 
 
 def load_signatures(
-    data, ksize=None, select_moltype=None, ignore_md5sum=False, do_raise=False,
+    data,
+    ksize=None,
+    select_moltype=None,
+    ignore_md5sum=False,
+    do_raise=False,
 ):
     """Load a JSON string with signatures into classes.
 
@@ -374,14 +411,18 @@ def load_signatures(
     input_type = _detect_input_type(data)
     if input_type == SigInput.UNKNOWN:
         if do_raise:
-            raise ValueError("Error in parsing signature; quitting. Cannot open file or invalid signature")
+            raise ValueError(
+                "Error in parsing signature; quitting. Cannot open file or invalid signature"
+            )
         return
 
     size = ffi.new("uintptr_t *")
 
     try:
         if input_type == SigInput.FILE_LIKE:
-            if hasattr(data, "mode") and "t" in data.mode:  # need to reopen handler as binary
+            if (
+                hasattr(data, "mode") and "t" in data.mode
+            ):  # need to reopen handler as binary
                 data = data.buffer
 
             buf = data.read()
@@ -423,7 +464,7 @@ def load_signatures(
         for sig in sigs:
             yield sig.to_frozen()
 
-    except Exception as e:
+    except Exception:
         if do_raise:
             raise
 
@@ -461,8 +502,9 @@ def save_signatures(siglist, fp=None, compression=0):
     size = ffi.new("uintptr_t *")
 
     # save signature into a string (potentially compressed)
-    rawbuf = rustcall(lib.signatures_save_buffer, siglist_c, len(collected),
-                      compression, size)
+    rawbuf = rustcall(
+        lib.signatures_save_buffer, siglist_c, len(collected), compression, size
+    )
     size = size[0]
 
     # associate a finalizer with rawbuf so that it gets freed
@@ -472,11 +514,11 @@ def save_signatures(siglist, fp=None, compression=0):
     else:
         result = ffi.string(buf, size)
 
-    if fp is None:                        # return string
+    if fp is None:  # return string
         return result
     else:
-        try:                              # write to file
+        try:  # write to file
             fp.write(result)
         except TypeError:
-            fp.write(result.decode('utf-8'))
+            fp.write(result.decode("utf-8"))
         return None
diff --git a/src/sourmash/sketchcomparison.py b/src/sourmash/sketchcomparison.py
index db36d20ac3..e80013edaa 100644
--- a/src/sourmash/sketchcomparison.py
+++ b/src/sourmash/sketchcomparison.py
@@ -4,14 +4,16 @@
 import numpy as np
 from dataclasses import dataclass
 
-from .signature import MinHash
+from .minhash import MinHash
+
 
 @dataclass
 class BaseMinHashComparison:
     """Class for standard comparison between two MinHashes"""
+
     mh1: MinHash
     mh2: MinHash
-    ignore_abundance: bool = False # optionally ignore abundances
+    ignore_abundance: bool = False  # optionally ignore abundances
     jaccard_ani_untrustworthy: bool = False
 
     def downsample_and_handle_ignore_abundance(self, cmp_num=None, cmp_scaled=None):
@@ -34,11 +36,15 @@ def downsample_and_handle_ignore_abundance(self, cmp_num=None, cmp_scaled=None):
             raise ValueError("Error: must pass in a comparison scaled or num value.")
 
     def check_compatibility_and_downsample(self, cmp_num=None, cmp_scaled=None):
-        if not any([(self.mh1.num and self.mh2.num), (self.mh1.scaled and self.mh2.scaled)]):
+        if not any(
+            [(self.mh1.num and self.mh2.num), (self.mh1.scaled and self.mh2.scaled)]
+        ):
             raise TypeError("Error: Both sketches must be 'num' or 'scaled'.")
 
-        #need to downsample first because is_compatible checks scaled (though does not check num)
-        self.downsample_and_handle_ignore_abundance(cmp_num=cmp_num, cmp_scaled=cmp_scaled)
+        # need to downsample first because is_compatible checks scaled (though does not check num)
+        self.downsample_and_handle_ignore_abundance(
+            cmp_num=cmp_num, cmp_scaled=cmp_scaled
+        )
         if not self.mh1_cmp.is_compatible(self.mh2_cmp):
             raise TypeError("Error: Cannot compare incompatible sketches.")
         self.ksize = self.mh1.ksize
@@ -69,30 +75,34 @@ def angular_similarity(self):
     @property
     def cosine_similarity(self):
         return self.angular_similarity
-    
+
+
 @dataclass
 class NumMinHashComparison(BaseMinHashComparison):
     """Class for standard comparison between two num minhashes"""
+
     cmp_num: int = None
 
     def __post_init__(self):
         "Initialize NumMinHashComparison using values from provided MinHashes"
-        if self.cmp_num is None: # record the num we're doing this comparison on
+        if self.cmp_num is None:  # record the num we're doing this comparison on
             self.cmp_num = min(self.mh1.num, self.mh2.num)
         self.check_compatibility_and_downsample(cmp_num=self.cmp_num)
 
     @property
     def size_may_be_inaccurate(self):
-        return False # not using size estimation, can ignore
+        return False  # not using size estimation, can ignore
+
 
 @dataclass
 class FracMinHashComparison(BaseMinHashComparison):
     """Class for standard comparison between two scaled minhashes"""
-    cmp_scaled: int = None # optionally force scaled value for this comparison
+
+    cmp_scaled: int = None  # optionally force scaled value for this comparison
     threshold_bp: int = 0
     estimate_ani_ci: bool = False
     ani_confidence: float = 0.95
-#    pfn_threshold: float = 1e-3
+    #    pfn_threshold: float = 1e-3
 
     def __post_init__(self):
         "Initialize ScaledComparison using values from provided FracMinHashes"
@@ -121,19 +131,23 @@ def total_unique_intersect_hashes(self):
         To get true bp estimates, we would need to add `(k-1)`. However, this complicates
         the iterative gather algorithm, so let's stick with hashes.
         """
-        return len(self.intersect_mh) * self.cmp_scaled # + (ksize-1) #for bp estimation
+        return (
+            len(self.intersect_mh) * self.cmp_scaled
+        )  # + (ksize-1) #for bp estimation
 
     @property
     def mh1_containment_in_mh2(self):
         return self.mh1_cmp.contained_by(self.mh2_cmp)
 
-    def estimate_ani_from_mh1_containment_in_mh2(self, containment = None):
+    def estimate_ani_from_mh1_containment_in_mh2(self, containment=None):
         # build result once
-        m1_cani = self.mh1_cmp.containment_ani(self.mh2_cmp,
-                                            containment=containment,
-                                            confidence=self.ani_confidence,
-                                            estimate_ci=self.estimate_ani_ci)
-#                                            prob_threshold=self.pfn_threshold)
+        m1_cani = self.mh1_cmp.containment_ani(
+            self.mh2_cmp,
+            containment=containment,
+            confidence=self.ani_confidence,
+            estimate_ci=self.estimate_ani_ci,
+        )
+        #                                            prob_threshold=self.pfn_threshold)
         # propagate params
         self.ani_from_mh1_containment_in_mh2 = m1_cani.ani
         if m1_cani.p_exceeds_threshold:
@@ -148,28 +162,32 @@ def mh2_containment_in_mh1(self):
         return self.mh2_cmp.contained_by(self.mh1_cmp)
 
     def estimate_ani_from_mh2_containment_in_mh1(self, containment=None):
-        m2_cani =  self.mh2_cmp.containment_ani(self.mh1_cmp,
-                                            containment=containment,
-                                            confidence=self.ani_confidence,
-                                            estimate_ci=self.estimate_ani_ci)
-#                                            prob_threshold=self.pfn_threshold)
+        m2_cani = self.mh2_cmp.containment_ani(
+            self.mh1_cmp,
+            containment=containment,
+            confidence=self.ani_confidence,
+            estimate_ci=self.estimate_ani_ci,
+        )
+        #                                            prob_threshold=self.pfn_threshold)
         self.ani_from_mh2_containment_in_mh1 = m2_cani.ani
         if m2_cani.p_exceeds_threshold:
             self.potential_false_negative = True
         if self.estimate_ani_ci:
             self.ani_from_mh2_containment_in_mh1_low = m2_cani.ani_low
             self.ani_from_mh2_containment_in_mh1_high = m2_cani.ani_high
-    
+
     @property
     def max_containment(self):
         return self.mh1_cmp.max_containment(self.mh2_cmp)
 
     def estimate_max_containment_ani(self, max_containment=None):
-        mc_ani_info = self.mh1_cmp.max_containment_ani(self.mh2_cmp,
-                                                max_containment=max_containment,
-                                                confidence=self.ani_confidence,
-                                                estimate_ci=self.estimate_ani_ci)
-#                                                prob_threshold=self.pfn_threshold)
+        mc_ani_info = self.mh1_cmp.max_containment_ani(
+            self.mh2_cmp,
+            max_containment=max_containment,
+            confidence=self.ani_confidence,
+            estimate_ci=self.estimate_ani_ci,
+        )
+        #                                                prob_threshold=self.pfn_threshold)
         # propagate params
         self.max_containment_ani = mc_ani_info.ani
         if mc_ani_info.p_exceeds_threshold:
@@ -187,23 +205,41 @@ def avg_containment_ani(self):
         "Returns single average_containment_ani value. Sets self.potential_false_negative internally."
         self.estimate_ani_from_mh1_containment_in_mh2()
         self.estimate_ani_from_mh2_containment_in_mh1()
-        if any([self.ani_from_mh1_containment_in_mh2 is None, self.ani_from_mh2_containment_in_mh1 is None]):
+        if any(
+            [
+                self.ani_from_mh1_containment_in_mh2 is None,
+                self.ani_from_mh2_containment_in_mh1 is None,
+            ]
+        ):
             return None
         else:
-            return (self.ani_from_mh1_containment_in_mh2 + self.ani_from_mh2_containment_in_mh1)/2
+            return (
+                self.ani_from_mh1_containment_in_mh2
+                + self.ani_from_mh2_containment_in_mh1
+            ) / 2
 
     def estimate_all_containment_ani(self):
         "Estimate all containment ANI values."
         self.estimate_ani_from_mh1_containment_in_mh2()
         self.estimate_ani_from_mh2_containment_in_mh1()
-        if any([self.ani_from_mh1_containment_in_mh2 is None, self.ani_from_mh2_containment_in_mh1 is None]):
-#            self.estimate_max_containment_ani()
+        if any(
+            [
+                self.ani_from_mh1_containment_in_mh2 is None,
+                self.ani_from_mh2_containment_in_mh1 is None,
+            ]
+        ):
+            #            self.estimate_max_containment_ani()
             self.max_containment_ani = None
         else:
-            self.max_containment_ani = max([self.ani_from_mh1_containment_in_mh2, self.ani_from_mh2_containment_in_mh1])
+            self.max_containment_ani = max(
+                [
+                    self.ani_from_mh1_containment_in_mh2,
+                    self.ani_from_mh2_containment_in_mh1,
+                ]
+            )
 
     def weighted_intersection(self, from_mh=None, from_abundD={}):
-         # map abundances to all intersection hashes.
+        # map abundances to all intersection hashes.
         abund_mh = self.intersect_mh.copy_and_clear()
         abund_mh.track_abundance = True
         # if from_mh is provided, it takes precedence over from_abund dict
@@ -211,7 +247,7 @@ def weighted_intersection(self, from_mh=None, from_abundD={}):
             from_abundD = from_mh.hashes
         if from_abundD:
             # this sets any hash not present in abundD to 1. Is that desired? Or should we return 0?
-            abunds = {k: from_abundD.get(k, 1) for k in self.intersect_mh.hashes }
+            abunds = {k: from_abundD.get(k, 1) for k in self.intersect_mh.hashes}
             abund_mh.set_abundances(abunds)
             return abund_mh
         # if no abundances are passed in, return intersect_mh
diff --git a/src/sourmash/sourmash_args.py b/src/sourmash/sourmash_args.py
index 8b149d7d1d..fdbc0e4cf6 100644
--- a/src/sourmash/sourmash_args.py
+++ b/src/sourmash/sourmash_args.py
@@ -51,8 +51,7 @@
 from .index import LinearIndex
 from .picklist import SignaturePicklist, PickStyle
 from .manifest import CollectionManifest
-from .save_load import (SaveSignaturesToLocation, load_file_as_index,
-                        _load_database)
+from .save_load import SaveSignaturesToLocation, load_file_as_index, _load_database
 
 
 DEFAULT_LOAD_K = 31
@@ -64,9 +63,9 @@ def check_scaled_bounds(arg):
     if f < 0:
         raise argparse.ArgumentTypeError("ERROR: scaled value must be positive")
     if f < 100:
-        notify('WARNING: scaled value should be >= 100. Continuing anyway.')
+        notify("WARNING: scaled value should be >= 100. Continuing anyway.")
     if f > 1e6:
-        notify('WARNING: scaled value should be <= 1e6. Continuing anyway.')
+        notify("WARNING: scaled value should be <= 1e6. Continuing anyway.")
     return f
 
 
@@ -76,18 +75,18 @@ def check_num_bounds(arg):
     if f < 0:
         raise argparse.ArgumentTypeError("ERROR: num value must be positive")
     if f < 50:
-        notify('WARNING: num value should be >= 50. Continuing anyway.')
+        notify("WARNING: num value should be >= 50. Continuing anyway.")
     if f > 50000:
-        notify('WARNING: num value should be <= 50000. Continuing anyway.')
+        notify("WARNING: num value should be <= 50000. Continuing anyway.")
     return f
 
 
 def get_moltype(sig, require=False):
     mh = sig.minhash
-    if mh.moltype in ('DNA', 'dayhoff', 'hp', 'protein'):
+    if mh.moltype in ("DNA", "dayhoff", "hp", "protein"):
         moltype = mh.moltype
     else:
-        raise ValueError('unknown molecule type for sig {}'.format(sig))
+        raise ValueError(f"unknown molecule type for sig {sig}")
 
     return moltype
 
@@ -97,20 +96,22 @@ def calculate_moltype(args, default=None):
 
     n = 0
     if args.dna:
-        moltype = 'DNA'
+        moltype = "DNA"
         n += 1
     if args.dayhoff:
-        moltype = 'dayhoff'
+        moltype = "dayhoff"
         n += 1
     if args.hp:
-        moltype = 'hp'
+        moltype = "hp"
         n += 1
     if args.protein:
-        moltype = 'protein'
+        moltype = "protein"
         n += 1
 
     if n > 1:
-        error("cannot specify more than one of --dna/--rna/--nucleotide/--protein/--hp/--dayhoff")
+        error(
+            "cannot specify more than one of --dna/--rna/--nucleotide/--protein/--hp/--dayhoff"
+        )
         sys.exit(-1)
 
     return moltype
@@ -123,7 +124,9 @@ def load_picklist(args):
         try:
             picklist = SignaturePicklist.from_picklist_args(args.picklist)
 
-            notify(f"picking column '{picklist.column_name}' of type '{picklist.coltype}' from '{picklist.pickfile}'")
+            notify(
+                f"picking column '{picklist.column_name}' of type '{picklist.coltype}' from '{picklist.pickfile}'"
+            )
 
             n_empty_val, dup_vals = picklist.load()
         except ValueError as exc:
@@ -133,19 +136,27 @@ def load_picklist(args):
 
         notify(f"loaded {len(picklist.pickset)} distinct values into picklist.")
         if n_empty_val:
-            notify(f"WARNING: {n_empty_val} empty values in column '{picklist.column_name}' in picklist file")
+            notify(
+                f"WARNING: {n_empty_val} empty values in column '{picklist.column_name}' in picklist file"
+            )
         if dup_vals:
-            notify(f"WARNING: {len(dup_vals)} values in picklist column '{picklist.column_name}' were not distinct")
+            notify(
+                f"WARNING: {len(dup_vals)} values in picklist column '{picklist.column_name}' were not distinct"
+            )
 
     return picklist
 
 
 def report_picklist(args, picklist):
     if picklist.pickstyle == PickStyle.INCLUDE:
-        notify(f"for given picklist, found {len(picklist.found)} matches to {len(picklist.pickset)} distinct values")
+        notify(
+            f"for given picklist, found {len(picklist.found)} matches to {len(picklist.pickset)} distinct values"
+        )
         n_missing = len(picklist.pickset - picklist.found)
     elif picklist.pickstyle == PickStyle.EXCLUDE:
-        notify(f"for given picklist, found {len(picklist.found)} matches by excluding {len(picklist.pickset)} distinct values")
+        notify(
+            f"for given picklist, found {len(picklist.found)} matches by excluding {len(picklist.pickset)} distinct values"
+        )
         n_missing = 0
     if n_missing:
         notify(f"WARNING: {n_missing} missing picklist values.")
@@ -157,19 +168,27 @@ def report_picklist(args, picklist):
 
 def load_include_exclude_db_patterns(args):
     if args.picklist and (args.include_db_pattern or args.exclude_db_pattern):
-        error("ERROR: --picklist and --include-db-pattern/--exclude cannot be used together.")
+        error(
+            "ERROR: --picklist and --include-db-pattern/--exclude cannot be used together."
+        )
         sys.exit(-1)
 
     if args.include_db_pattern and args.exclude_db_pattern:
-        error("ERROR: --include-db-pattern and --exclude-db-pattern cannot be used together.")
+        error(
+            "ERROR: --include-db-pattern and --exclude-db-pattern cannot be used together."
+        )
         sys.exit(-1)
 
     if args.include_db_pattern:
         pattern = re.compile(args.include_db_pattern, re.IGNORECASE)
-        search_pattern = lambda vals: any(pattern.search(val) for val in vals)
+
+        def search_pattern(vals):
+            return any(pattern.search(val) for val in vals)
     elif args.exclude_db_pattern:
         pattern = re.compile(args.exclude_db_pattern, re.IGNORECASE)
-        search_pattern = lambda vals: all(not pattern.search(val) for val in vals)
+
+        def search_pattern(vals):
+            return all(not pattern.search(val) for val in vals)
     else:
         search_pattern = None
 
@@ -187,8 +206,7 @@ def apply_picklist_and_pattern(db, picklist, pattern):
             error("--include-db-pattern/--exclude-db-pattern require a manifest.")
             sys.exit(-1)
 
-        manifest = manifest.filter_on_columns(pattern,
-                                              ["name", "filename", "md5"])
+        manifest = manifest.filter_on_columns(pattern, ["name", "filename", "md5"])
         pattern_picklist = manifest.to_picklist()
         db = db.select(picklist=pattern_picklist)
 
@@ -202,8 +220,9 @@ def load_query_signature(filename, ksize, select_moltype, select_md5=None):
     and indexed databases.
     """
     try:
-        sl = load_file_as_signatures(filename, ksize=ksize,
-                                     select_moltype=select_moltype)
+        sl = load_file_as_signatures(
+            filename, ksize=ksize, select_moltype=select_moltype
+        )
         sl = list(sl)
     except (OSError, ValueError):
         error(f"Cannot open query file '{filename}'")
@@ -225,21 +244,21 @@ def load_query_signature(filename, ksize, select_moltype, select_md5=None):
             sl = [found_sig]
 
     if len(sl) and ksize is None:
-        ksizes = set([ ss.minhash.ksize for ss in sl ])
+        ksizes = set([ss.minhash.ksize for ss in sl])
         if len(ksizes) == 1:
             ksize = ksizes.pop()
-            sl = [ ss for ss in sl if ss.minhash.ksize == ksize ]
-            notify(f'select query k={ksize} automatically.')
+            sl = [ss for ss in sl if ss.minhash.ksize == ksize]
+            notify(f"select query k={ksize} automatically.")
         elif DEFAULT_LOAD_K in ksizes:
-            sl = [ ss for ss in sl if ss.minhash.ksize == DEFAULT_LOAD_K ]
-            notify(f'selecting default query k={DEFAULT_LOAD_K}.')
+            sl = [ss for ss in sl if ss.minhash.ksize == DEFAULT_LOAD_K]
+            notify(f"selecting default query k={DEFAULT_LOAD_K}.")
     elif ksize:
-        notify(f'selecting specified query k={ksize}')
+        notify(f"selecting specified query k={ksize}")
 
     if len(sl) != 1:
         error(f"When loading query from '{filename}'", filename)
-        error(f'{len(sl)} signatures matching ksize and molecule type;')
-        error('need exactly one. Specify --ksize or --dna, --rna, or --protein.')
+        error(f"{len(sl)} signatures matching ksize and molecule type;")
+        error("need exactly one. Specify --ksize or --dna, --rna, or --protein.")
         sys.exit(-1)
 
     return sl[0]
@@ -259,7 +278,7 @@ def traverse_find_sigs(filenames, yield_all_files=False):
     If 'yield_all_files' is True, this will return _all_ files
     (but not directories).
     """
-    endings = ('.sig', '.sig.gz')
+    endings = (".sig", ".sig.gz")
     for filename in filenames:
         # check for files in filenames:
         if os.path.isfile(filename):
@@ -275,9 +294,16 @@ def traverse_find_sigs(filenames, yield_all_files=False):
                         yield fullname
 
 
-def load_dbs_and_sigs(filenames, query, is_similarity_query, *,
-                      cache_size=None, picklist=None, pattern=None,
-                      fail_on_empty_database=False):
+def load_dbs_and_sigs(
+    filenames,
+    query,
+    is_similarity_query,
+    *,
+    cache_size=None,
+    picklist=None,
+    pattern=None,
+    fail_on_empty_database=False,
+):
     """
     Load one or more Index objects to search - databases, etc.
 
@@ -294,7 +320,7 @@ def load_dbs_and_sigs(filenames, query, is_similarity_query, *,
     total_signatures_loaded = 0
     sum_signatures_after_select = 0
     for filename in filenames:
-        notify(f"loading from '{filename}'...", end='\r')
+        notify(f"loading from '{filename}'...", end="\r")
 
         try:
             db = _load_database(filename, False, cache_size=cache_size)
@@ -308,11 +334,13 @@ def load_dbs_and_sigs(filenames, query, is_similarity_query, *,
 
         # get compatible signatures - moltype/ksize/num/scaled
         try:
-            db = db.select(moltype=query_mh.moltype,
-                           ksize=query_mh.ksize,
-                           num=query_mh.num,
-                           scaled=query_mh.scaled,
-                           containment=containment)
+            db = db.select(
+                moltype=query_mh.moltype,
+                ksize=query_mh.ksize,
+                num=query_mh.num,
+                scaled=query_mh.scaled,
+                containment=containment,
+            )
         except ValueError as exc:
             # incompatible collection specified!
             notify(f"ERROR: cannot use '{filename}' for this query.")
@@ -337,9 +365,13 @@ def load_dbs_and_sigs(filenames, query, is_similarity_query, *,
 
     # display num loaded/num selected
     notify("--")
-    notify(f"loaded {total_signatures_loaded} total signatures from {len(databases)} locations.")
-    notify(f"after selecting signatures compatible with search, {sum_signatures_after_select} remain.")
-    print('')
+    notify(
+        f"loaded {total_signatures_loaded} total signatures from {len(databases)} locations."
+    )
+    notify(
+        f"after selecting signatures compatible with search, {sum_signatures_after_select} remain."
+    )
+    print("")
 
     return databases
 
@@ -347,15 +379,17 @@ def load_dbs_and_sigs(filenames, query, is_similarity_query, *,
 def load_pathlist_from_file(filename):
     "Load a list-of-files text file."
     try:
-        with open(filename, 'rt') as fp:
-            file_list = [ x.rstrip('\r\n') for x in fp ]
+        with open(filename) as fp:
+            file_list = [x.rstrip("\r\n") for x in fp]
         file_list = set(file_list)
         if not file_list:
             raise ValueError("pathlist is empty")
         for checkfile in file_list:
             if not os.path.exists(checkfile):
-                raise ValueError(f"file '{checkfile}' inside the pathlist does not exist")
-    except IOError:
+                raise ValueError(
+                    f"file '{checkfile}' inside the pathlist does not exist"
+                )
+    except OSError:
         raise ValueError(f"pathlist file '{filename}' does not exist")
     except OSError:
         raise ValueError(f"cannot open file '{filename}'")
@@ -385,7 +419,8 @@ class FileOutput:
 
     will properly handle no argument or '-' as sys.stdout.
     """
-    def __init__(self, filename, mode='wt', *, newline=None, encoding='utf-8'):
+
+    def __init__(self, filename, mode="wt", *, newline=None, encoding="utf-8"):
         self.filename = filename
         self.mode = mode
         self.fp = None
@@ -393,14 +428,15 @@ def __init__(self, filename, mode='wt', *, newline=None, encoding='utf-8'):
         self.encoding = encoding
 
     def open(self):
-        if self.filename == '-' or self.filename is None:
+        if self.filename == "-" or self.filename is None:
             return sys.stdout
-        self.fp = open(self.filename, self.mode, newline=self.newline,
-                       encoding=self.encoding)
+        self.fp = open(
+            self.filename, self.mode, newline=self.newline, encoding=self.encoding
+        )
         return self.fp
 
     def close(self):
-        if self.fp is not None: # in case of stdout
+        if self.fp is not None:  # in case of stdout
             self.fp.close()
 
     def __enter__(self):
@@ -435,17 +471,18 @@ class FileOutputCSV(FileOutput):
 
     will properly handle no argument or '-' as sys.stdout.
     """
+
     def __init__(self, filename):
         self.filename = filename
         self.fp = None
 
     def open(self):
-        if self.filename == '-' or self.filename is None:
+        if self.filename == "-" or self.filename is None:
             return sys.stdout
-        if self.filename.endswith('.gz'):
-            self.fp = gzip.open(self.filename, 'wt', newline='')
+        if self.filename.endswith(".gz"):
+            self.fp = gzip.open(self.filename, "wt", newline="")
         else:
-            self.fp = open(self.filename, 'w', newline='')
+            self.fp = open(self.filename, "w", newline="")
         return self.fp
 
 
@@ -457,38 +494,44 @@ class _DictReader_with_version:
 
     The version is stored as a 2-tuple in the 'version_info' attribute.
     """
-    def __init__(self, textfp, *, delimiter=','):
+
+    def __init__(self, textfp, *, delimiter=","):
         self.version_info = []
 
         # is there a '#' in the raw buffer pos 0?
         ch = textfp.buffer.peek(1)
 
         try:
-            ch = ch.decode('utf-8')
+            ch = ch.decode("utf-8")
         except UnicodeDecodeError:
             raise csv.Error("unable to read CSV file")
 
         # yes - read a line from the text buffer => parse
-        if ch.startswith('#'):
+        if ch.startswith("#"):
             line = textfp.readline()
-            assert line.startswith('# '), line
+            assert line.startswith("# "), line
 
             # note, this can set version_info to lots of different things.
             # revisit later, I guess. CTB.
-            self.version_info = line[2:].strip().split(': ', 2)
+            self.version_info = line[2:].strip().split(": ", 2)
 
         # build a DictReader from the remaining stream
         self.reader = csv.DictReader(textfp, delimiter=delimiter)
         self.fieldnames = self.reader.fieldnames
 
     def __iter__(self):
-        for row in self.reader:
-            yield row
+        yield from self.reader
 
 
 @contextlib.contextmanager
-def FileInputCSV(filename, *, encoding='utf-8', default_csv_name=None,
-                 zipfile_obj=None, delimiter=','):
+def FileInputCSV(
+    filename,
+    *,
+    encoding="utf-8",
+    default_csv_name=None,
+    zipfile_obj=None,
+    delimiter=",",
+):
     """A context manager for reading in CSV files in gzip, zip or text format.
 
     Assumes comma delimiter, and uses csv.DictReader.
@@ -513,24 +556,20 @@ def FileInputCSV(filename, *, encoding='utf-8', default_csv_name=None,
             try:
                 zi = zipfile_obj.getinfo(default_csv_name)
                 with zipfile_obj.open(zi) as fp:
-                    textfp = TextIOWrapper(fp,
-                                           encoding=encoding,
-                                           newline="")
+                    textfp = TextIOWrapper(fp, encoding=encoding, newline="")
                     r = _DictReader_with_version(textfp, delimiter=delimiter)
                     yield r
             except (zipfile.BadZipFile, KeyError):
-                pass # uh oh, we were given a zipfile_obj and it FAILED.
+                pass  # uh oh, we were given a zipfile_obj and it FAILED.
 
             # no matter what, if given zipfile_obj don't try .gz or regular csv
             return
         else:
             try:
-                with zipfile.ZipFile(filename, 'r') as zip_fp:
+                with zipfile.ZipFile(filename, "r") as zip_fp:
                     zi = zip_fp.getinfo(default_csv_name)
                     with zip_fp.open(zi) as fp:
-                        textfp = TextIOWrapper(fp,
-                                               encoding=encoding,
-                                               newline="")
+                        textfp = TextIOWrapper(fp, encoding=encoding, newline="")
                         r = _DictReader_with_version(textfp, delimiter=delimiter)
                         yield r
 
@@ -545,7 +584,7 @@ def FileInputCSV(filename, *, encoding='utf-8', default_csv_name=None,
     # ok, not a zip file - try .gz:
     try:
         with gzip.open(filename, "rt", newline="", encoding=encoding) as fp:
-            fp.buffer.peek(1)          # force exception if not a gzip file
+            fp.buffer.peek(1)  # force exception if not a gzip file
             r = _DictReader_with_version(fp, delimiter=delimiter)
             yield r
         return
@@ -553,7 +592,7 @@ def FileInputCSV(filename, *, encoding='utf-8', default_csv_name=None,
         pass
 
     # neither zip nor gz; regular file!
-    with open(filename, 'rt', newline="", encoding=encoding) as fp:
+    with open(filename, newline="", encoding=encoding) as fp:
         r = _DictReader_with_version(fp, delimiter=delimiter)
         yield r
 
@@ -569,6 +608,7 @@ class SignatureLoadingProgress:
 
     You can optionally notify of reading a file with `.notify(location)`.
     """
+
     def __init__(self, reporting_interval=10):
         self.n_sig = 0
         self.interval = reporting_interval
@@ -584,17 +624,19 @@ def short_notify(self, msg_template, *args, **kwargs):
         """
 
         msg = msg_template.format(*args, **kwargs)
-        end = kwargs.get('end', '\n')
+        end = kwargs.get("end", "\n")
         w = self.screen_width
 
         if len(msg) > w:
             truncate_len = len(msg) - w + 3
-            msg = '<<<' + msg[truncate_len:]
+            msg = "<<<" + msg[truncate_len:]
 
         notify(msg, end=end)
 
     def notify(self, location):
-        self.short_notify(f"...{self.n_sig} sigs so far. Now reading from file '{location}'", end='\r')
+        self.short_notify(
+            f"...{self.n_sig} sigs so far. Now reading from file '{location}'", end="\r"
+        )
 
     def start_file(self, location, loader):
         n_this = 0
@@ -606,24 +648,35 @@ def start_file(self, location, loader):
                 n_this += 1
                 n_total = n_before + n_this
                 if n_this and n_total % self.interval == 0:
-                    self.short_notify("...loading from '{}' / {} sigs total",
-                                      location, n_total, end='\r')
+                    self.short_notify(
+                        "...loading from '{}' / {} sigs total",
+                        location,
+                        n_total,
+                        end="\r",
+                    )
 
                 yield result
         except KeyboardInterrupt:
             # might as well nicely handle CTRL-C while we're at it!
-            notify('\n(CTRL-C received! quitting.)')
+            notify("\n(CTRL-C received! quitting.)")
             sys.exit(-1)
         finally:
             self.n_sig += n_this
 
-        self.short_notify(f"Loaded {n_this} sigs from '{location}'",
-                          end='\r')
+        self.short_notify(f"Loaded {n_this} sigs from '{location}'", end="\r")
 
 
-def load_many_signatures(locations, progress, *, yield_all_files=False,
-                         ksize=None, moltype=None, picklist=None, force=False,
-                         pattern=None):
+def load_many_signatures(
+    locations,
+    progress,
+    *,
+    yield_all_files=False,
+    ksize=None,
+    moltype=None,
+    picklist=None,
+    force=False,
+    pattern=None,
+):
     """
     Load many signatures from multiple files, with progress indicators.
 
@@ -648,11 +701,11 @@ def load_many_signatures(locations, progress, *, yield_all_files=False,
             loader = idx.signatures_with_location()
 
             # go!
-            n = 0               # count signatures loaded
+            n = 0  # count signatures loaded
             for sig, sigloc in progress.start_file(loc, loader):
                 yield sig, sigloc
                 n += 1
-            notify(f"loaded {n} signatures from '{loc}'", end='\r')
+            notify(f"loaded {n} signatures from '{loc}'", end="\r")
         except ValueError as exc:
             # trap expected errors, and either power through or display + exit.
             if force:
@@ -693,8 +746,9 @@ def get_manifest(idx, *, require=True, rebuild=False):
     # need to build one...
     try:
         notify("Generating a manifest...")
-        m = CollectionManifest.create_manifest(idx._signatures_with_internal(),
-                                               include_signature=False)
+        m = CollectionManifest.create_manifest(
+            idx._signatures_with_internal(), include_signature=False
+        )
         debug_literal("get_manifest: rebuilt manifest.")
     except NotImplementedError:
         if require:
@@ -707,12 +761,17 @@ def get_manifest(idx, *, require=True, rebuild=False):
     return m
 
 
-def load_file_as_signatures(filename, *, select_moltype=None, ksize=None,
-                            picklist=None,
-                            yield_all_files=False,
-                            progress=None,
-                            pattern=None,
-                            _use_manifest=True):
+def load_file_as_signatures(
+    filename,
+    *,
+    select_moltype=None,
+    ksize=None,
+    picklist=None,
+    yield_all_files=False,
+    progress=None,
+    pattern=None,
+    _use_manifest=True,
+):
     """Load 'filename' as a collection of signatures. Return an iterable.
 
     If 'filename' contains an SBT or LCA indexed database, or a regular
diff --git a/src/sourmash/sqlite_utils.py b/src/sourmash/sqlite_utils.py
index 2b7503a2d8..8efb754a23 100644
--- a/src/sourmash/sqlite_utils.py
+++ b/src/sourmash/sqlite_utils.py
@@ -31,13 +31,13 @@ def open_sqlite_db(filename):
     # check for the 'sourmash_internal' table.
     cursor = conn.cursor()
     try:
-        cursor.execute('SELECT DISTINCT key, value FROM sourmash_internal')
+        cursor.execute("SELECT DISTINCT key, value FROM sourmash_internal")
     except (sqlite3.OperationalError, sqlite3.DatabaseError):
         debug_literal("open_sqlite_db: cannot read sourmash_internal.")
 
         # is this a taxonomy DB?
         try:
-            cursor.execute('SELECT * FROM taxonomy LIMIT 1')
+            cursor.execute("SELECT * FROM taxonomy LIMIT 1")
         except (sqlite3.OperationalError, sqlite3.DatabaseError):
             debug_literal("open_sqlite_db: cannot read 'taxonomy', either.")
             return None
@@ -49,12 +49,14 @@ def add_sourmash_internal(cursor, use_type, version):
     """
     Add use_type/version to sourmash_internal table.
     """
-    cursor.execute("""
+    cursor.execute(
+        """
     CREATE TABLE IF NOT EXISTS sourmash_internal (
        key TEXT UNIQUE,
        value TEXT
     )
-    """)
+    """
+    )
 
     d = get_sourmash_internal(cursor)
 
@@ -62,18 +64,23 @@ def add_sourmash_internal(cursor, use_type, version):
     if val is not None:
         # do version compatibility foo here?
         if version != val:
-            raise Exception(f"sqlite problem: for {use_type}, want version {version}, got version {val}")
+            raise Exception(
+                f"sqlite problem: for {use_type}, want version {version}, got version {val}"
+            )
     else:
-        cursor.execute("""
+        cursor.execute(
+            """
         INSERT INTO sourmash_internal (key, value) VALUES (?, ?)
-        """, (use_type, version))
+        """,
+            (use_type, version),
+        )
 
 
 def get_sourmash_internal(cursor):
     """
     Retrieve a key/value dictionary from sourmash_internal.
     """
-    cursor.execute('SELECT DISTINCT key, value FROM sourmash_internal')
+    cursor.execute("SELECT DISTINCT key, value FROM sourmash_internal")
     d = dict(cursor)
 
     return d
diff --git a/src/sourmash/tax/__main__.py b/src/sourmash/tax/__main__.py
index b6ff3d9dd2..8e490ae545 100644
--- a/src/sourmash/tax/__main__.py
+++ b/src/sourmash/tax/__main__.py
@@ -13,9 +13,14 @@
 from sourmash.logging import set_quiet, error, notify, print_results
 
 from . import tax_utils
-from .tax_utils import MultiLineageDB, RankLineageInfo, LINLineageInfo, AnnotateTaxResult
-
-usage='''
+from .tax_utils import (
+    MultiLineageDB,
+    RankLineageInfo,
+    LINLineageInfo,
+    AnnotateTaxResult,
+)
+
+usage = """
 sourmash taxonomy <command> [<args>] - manipulate/work with taxonomy information.
 or
 sourmash tax <command> [<args>]
@@ -30,31 +35,32 @@
 ** Use '-h' to get subcommand-specific help, e.g.
 
 sourmash taxonomy metagenome -h
-'''
+"""
 
 # outfile utils
 _output_type_to_ext = {
-    'csv_summary': '.summarized.csv',
-    'classification': '.classifications.csv',
-    'krona': '.krona.tsv',
-    'lineage_summary': '.lineage_summary.tsv',
-    'annotate': '.with-lineages.csv',
-    'human': '.human.txt',
-    'lineage_csv': '.lineage.csv',
-    'kreport': ".kreport.txt",
-    'lingroup': ".lingroup.tsv",
-    'bioboxes': '.bioboxes.profile'
-    }
-
-def make_outfile(base, output_type, *, output_dir = ""):
-    limit_float_decimals=False
+    "csv_summary": ".summarized.csv",
+    "classification": ".classifications.csv",
+    "krona": ".krona.tsv",
+    "lineage_summary": ".lineage_summary.tsv",
+    "annotate": ".with-lineages.csv",
+    "human": ".human.txt",
+    "lineage_csv": ".lineage.csv",
+    "kreport": ".kreport.txt",
+    "lingroup": ".lingroup.tsv",
+    "bioboxes": ".bioboxes.profile",
+}
+
+
+def make_outfile(base, output_type, *, output_dir=""):
+    limit_float_decimals = False
     if base == "-":
-        limit_float_decimals=True
+        limit_float_decimals = True
         return base, limit_float_decimals
 
     ext = _output_type_to_ext[output_type]
 
-    fname = base+ext
+    fname = base + ext
     if output_dir:
         fname = os.path.join(output_dir, fname)
     notify(f"saving '{output_type}' output to '{fname}'.")
@@ -70,50 +76,70 @@ def metagenome(args):
 
     # first, load taxonomic_assignments
     try:
-        tax_assign = MultiLineageDB.load(args.taxonomy_csv,
-                       keep_full_identifiers=args.keep_full_identifiers,
-                       keep_identifier_versions=args.keep_identifier_versions,
-                       force=args.force, lins=args.lins)
+        tax_assign = MultiLineageDB.load(
+            args.taxonomy_csv,
+            keep_full_identifiers=args.keep_full_identifiers,
+            keep_identifier_versions=args.keep_identifier_versions,
+            force=args.force,
+            lins=args.lins,
+        )
         available_ranks = tax_assign.available_ranks
     except ValueError as exc:
         error(f"ERROR: {str(exc)}")
         sys.exit(-1)
 
     if not tax_assign:
-        error(f'ERROR: No taxonomic assignments loaded from {",".join(args.taxonomy_csv)}. Exiting.')
+        error(
+            f'ERROR: No taxonomic assignments loaded from {",".join(args.taxonomy_csv)}. Exiting.'
+        )
         sys.exit(-1)
 
     if args.rank and args.rank not in available_ranks:
-        error(f"ERROR: No taxonomic information provided for rank {args.rank}: cannot summarize at this rank")
+        error(
+            f"ERROR: No taxonomic information provided for rank {args.rank}: cannot summarize at this rank"
+        )
         sys.exit(-1)
 
     # next, collect and load gather results
-    gather_csvs = tax_utils.collect_gather_csvs(args.gather_csv, from_file= args.from_file)
+    gather_csvs = tax_utils.collect_gather_csvs(
+        args.gather_csv, from_file=args.from_file
+    )
     try:
-         query_gather_results = tax_utils.check_and_load_gather_csvs(gather_csvs, tax_assign, force=args.force,
-                                                                     fail_on_missing_taxonomy=args.fail_on_missing_taxonomy,
-                                                                     keep_full_identifiers=args.keep_full_identifiers,
-                                                                     keep_identifier_versions = args.keep_identifier_versions,
-                                                                     lins=args.lins,
-                                                                     )
+        query_gather_results = tax_utils.check_and_load_gather_csvs(
+            gather_csvs,
+            tax_assign,
+            force=args.force,
+            fail_on_missing_taxonomy=args.fail_on_missing_taxonomy,
+            keep_full_identifiers=args.keep_full_identifiers,
+            keep_identifier_versions=args.keep_identifier_versions,
+            lins=args.lins,
+        )
     except ValueError as exc:
         error(f"ERROR: {str(exc)}")
         sys.exit(-1)
 
     if not query_gather_results:
-        notify('No gather results loaded. Exiting.')
+        notify("No gather results loaded. Exiting.")
         sys.exit(-1)
 
-    single_query_output_formats =  ['csv_summary', 'kreport']
+    single_query_output_formats = ["csv_summary", "kreport"]
     desired_single_outputs = []
-    if len(query_gather_results) > 1: # working with multiple queries
-        desired_single_outputs = [x for x in args.output_format if x in single_query_output_formats]
+    if len(query_gather_results) > 1:  # working with multiple queries
+        desired_single_outputs = [
+            x for x in args.output_format if x in single_query_output_formats
+        ]
         if desired_single_outputs:
-            notify(f"WARNING: found results for multiple gather queries. Can only output multi-query result formats: skipping {', '.join(desired_single_outputs)}")
+            notify(
+                f"WARNING: found results for multiple gather queries. Can only output multi-query result formats: skipping {', '.join(desired_single_outputs)}"
+            )
         # remove single query outputs from output format
-        args.output_format = [x for x in args.output_format if x not in single_query_output_formats]
-        if not args.output_format: # or do we want to insert `human` here so we always report something?
-            error(f"ERROR: No output formats remaining.")
+        args.output_format = [
+            x for x in args.output_format if x not in single_query_output_formats
+        ]
+        if (
+            not args.output_format
+        ):  # or do we want to insert `human` here so we always report something?
+            error("ERROR: No output formats remaining.")
             sys.exit(-1)
 
     # for each queryResult, actually summarize at rank, reporting any errors that occur.
@@ -126,47 +152,66 @@ def metagenome(args):
 
     # write summarized output in human-readable format
     if "lineage_summary" in args.output_format:
-        lineage_outfile, limit_float = make_outfile(args.output_base, "lineage_summary", output_dir=args.output_dir)
+        lineage_outfile, limit_float = make_outfile(
+            args.output_base, "lineage_summary", output_dir=args.output_dir
+        )
 
         ## aggregate by lineage by query
-        lineageD, query_names= tax_utils.aggregate_by_lineage_at_rank(query_gather_results=query_gather_results,
-                                                                      rank=args.rank, by_query=True)
+        lineageD, query_names = tax_utils.aggregate_by_lineage_at_rank(
+            query_gather_results=query_gather_results, rank=args.rank, by_query=True
+        )
 
         with FileOutputCSV(lineage_outfile) as out_fp:
-            tax_utils.write_lineage_sample_frac(query_names, lineageD, out_fp, sep='\t')
+            tax_utils.write_lineage_sample_frac(query_names, lineageD, out_fp, sep="\t")
 
     # write summarized --> krona output tsv
     if "krona" in args.output_format:
-        krona_results, header =  tax_utils.format_for_krona(query_gather_results, rank=args.rank)
+        krona_results, header = tax_utils.format_for_krona(
+            query_gather_results, rank=args.rank
+        )
 
-        krona_outfile, limit_float = make_outfile(args.output_base, "krona", output_dir=args.output_dir)
+        krona_outfile, limit_float = make_outfile(
+            args.output_base, "krona", output_dir=args.output_dir
+        )
         with FileOutputCSV(krona_outfile) as out_fp:
             tax_utils.write_krona(header, krona_results, out_fp)
 
     if "human" in args.output_format:
-        summary_outfile, limit_float = make_outfile(args.output_base, "human", output_dir=args.output_dir)
+        summary_outfile, limit_float = make_outfile(
+            args.output_base, "human", output_dir=args.output_dir
+        )
 
         with FileOutput(summary_outfile) as out_fp:
             human_display_rank = args.rank or "species"
             if args.lins and not args.rank:
-                human_display_rank = query_gather_results[0].ranks[-1] # lowest rank
+                human_display_rank = query_gather_results[0].ranks[-1]  # lowest rank
 
-            tax_utils.write_human_summary(query_gather_results, out_fp, human_display_rank)
+            tax_utils.write_human_summary(
+                query_gather_results, out_fp, human_display_rank
+            )
 
     # write summarized output csv
     single_query_results = query_gather_results[0]
     if "csv_summary" in args.output_format:
-        summary_outfile, limit_float = make_outfile(args.output_base, "csv_summary", output_dir=args.output_dir)
+        summary_outfile, limit_float = make_outfile(
+            args.output_base, "csv_summary", output_dir=args.output_dir
+        )
         with FileOutputCSV(summary_outfile) as out_fp:
-            tax_utils.write_summary(query_gather_results, out_fp, limit_float_decimals=limit_float)
+            tax_utils.write_summary(
+                query_gather_results, out_fp, limit_float_decimals=limit_float
+            )
 
     # write summarized --> kreport output tsv
     if "kreport" in args.output_format:
-        kreport_outfile, limit_float = make_outfile(args.output_base, "kreport", output_dir=args.output_dir)
+        kreport_outfile, limit_float = make_outfile(
+            args.output_base, "kreport", output_dir=args.output_dir
+        )
 
         with FileOutputCSV(kreport_outfile) as out_fp:
             header, kreport_results = single_query_results.make_kreport_results()
-            tax_utils.write_output(header, kreport_results, out_fp, sep="\t", write_header=False)
+            tax_utils.write_output(
+                header, kreport_results, out_fp, sep="\t", write_header=False
+            )
 
     # write summarized --> LINgroup output tsv
     if "lingroup" in args.output_format:
@@ -176,15 +221,23 @@ def metagenome(args):
             error(f"ERROR: {str(exc)}")
             sys.exit(-1)
 
-        lingroupfile, limit_float = make_outfile(args.output_base, "lingroup", output_dir=args.output_dir)
+        lingroupfile, limit_float = make_outfile(
+            args.output_base, "lingroup", output_dir=args.output_dir
+        )
 
         with FileOutputCSV(lingroupfile) as out_fp:
-            header, lgreport_results = single_query_results.make_lingroup_results(LINgroupsD = lingroups)
-            tax_utils.write_output(header, lgreport_results, out_fp, sep="\t", write_header=True)
+            header, lgreport_results = single_query_results.make_lingroup_results(
+                LINgroupsD=lingroups
+            )
+            tax_utils.write_output(
+                header, lgreport_results, out_fp, sep="\t", write_header=True
+            )
 
     # write cami bioboxes format
     if "bioboxes" in args.output_format:
-        bbfile, limit_float = make_outfile(args.output_base, "bioboxes", output_dir=args.output_dir)
+        bbfile, limit_float = make_outfile(
+            args.output_base, "bioboxes", output_dir=args.output_dir
+        )
 
         with FileOutputCSV(bbfile) as out_fp:
             header_lines, bb_results = single_query_results.make_cami_bioboxes()
@@ -199,14 +252,17 @@ def genome(args):
 
     # first, load taxonomic_assignments
     try:
-        tax_assign = MultiLineageDB.load(args.taxonomy_csv,
-                       keep_full_identifiers=args.keep_full_identifiers,
-                       keep_identifier_versions=args.keep_identifier_versions,
-                       force=args.force, lins=args.lins)
+        tax_assign = MultiLineageDB.load(
+            args.taxonomy_csv,
+            keep_full_identifiers=args.keep_full_identifiers,
+            keep_identifier_versions=args.keep_identifier_versions,
+            force=args.force,
+            lins=args.lins,
+        )
         available_ranks = tax_assign.available_ranks
 
-        lg_ranks=None
-        all_lgs=None
+        lg_ranks = None
+        all_lgs = None
         if args.lingroup:
             lingroups = tax_utils.read_lingroups(args.lingroup)
             lg_ranks, all_lgs = tax_utils.parse_lingroups(lingroups)
@@ -216,38 +272,51 @@ def genome(args):
         sys.exit(-1)
 
     if not tax_assign:
-        error(f'ERROR: No taxonomic assignments loaded from {",".join(args.taxonomy_csv)}. Exiting.')
+        error(
+            f'ERROR: No taxonomic assignments loaded from {",".join(args.taxonomy_csv)}. Exiting.'
+        )
         sys.exit(-1)
 
     if args.rank and args.rank not in available_ranks:
-        error(f"ERROR: No taxonomic information provided for rank {args.rank}: cannot classify at this rank")
+        error(
+            f"ERROR: No taxonomic information provided for rank {args.rank}: cannot classify at this rank"
+        )
         sys.exit(-1)
 
     # get gather_csvs from args
-    gather_csvs = tax_utils.collect_gather_csvs(args.gather_csv, from_file=args.from_file)
+    gather_csvs = tax_utils.collect_gather_csvs(
+        args.gather_csv, from_file=args.from_file
+    )
 
     try:
-         query_gather_results = tax_utils.check_and_load_gather_csvs(gather_csvs, tax_assign, force=args.force,
-                                                                                       fail_on_missing_taxonomy=args.fail_on_missing_taxonomy,
-                                                                                       keep_full_identifiers=args.keep_full_identifiers,
-                                                                                       keep_identifier_versions = args.keep_identifier_versions,
-                                                                                       lins=args.lins)
+        query_gather_results = tax_utils.check_and_load_gather_csvs(
+            gather_csvs,
+            tax_assign,
+            force=args.force,
+            fail_on_missing_taxonomy=args.fail_on_missing_taxonomy,
+            keep_full_identifiers=args.keep_full_identifiers,
+            keep_identifier_versions=args.keep_identifier_versions,
+            lins=args.lins,
+        )
 
     except ValueError as exc:
         error(f"ERROR: {str(exc)}")
         sys.exit(-1)
 
     if not query_gather_results:
-        notify('No results for classification. Exiting.')
+        notify("No results for classification. Exiting.")
         sys.exit(-1)
 
     # for each queryResult, summarize at rank and classify according to thresholds, reporting any errors that occur.
     for queryResult in query_gather_results:
         try:
-            queryResult.build_classification_result(rank=args.rank,
-                                                    ani_threshold=args.ani_threshold,
-                                                    containment_threshold=args.containment_threshold,
-                                                    lingroup_ranks=lg_ranks, lingroups=all_lgs)
+            queryResult.build_classification_result(
+                rank=args.rank,
+                ani_threshold=args.ani_threshold,
+                containment_threshold=args.containment_threshold,
+                lingroup_ranks=lg_ranks,
+                lingroups=all_lgs,
+            )
 
         except ValueError as exc:
             error(f"ERROR: {str(exc)}")
@@ -255,42 +324,65 @@ def genome(args):
 
     # write outputs
     if "csv_summary" in args.output_format:
-        summary_outfile, limit_float = make_outfile(args.output_base, "classification", output_dir=args.output_dir)
+        summary_outfile, limit_float = make_outfile(
+            args.output_base, "classification", output_dir=args.output_dir
+        )
         with FileOutputCSV(summary_outfile) as out_fp:
-            tax_utils.write_summary(query_gather_results, out_fp, limit_float_decimals=limit_float, classification=True)
+            tax_utils.write_summary(
+                query_gather_results,
+                out_fp,
+                limit_float_decimals=limit_float,
+                classification=True,
+            )
 
     # write summarized output in human-readable format
     if "human" in args.output_format:
-        summary_outfile, limit_float = make_outfile(args.output_base, "human", output_dir=args.output_dir)
+        summary_outfile, limit_float = make_outfile(
+            args.output_base, "human", output_dir=args.output_dir
+        )
 
         with FileOutput(summary_outfile) as out_fp:
-            tax_utils.write_human_summary(query_gather_results, out_fp, args.rank or "species", classification=True)
+            tax_utils.write_human_summary(
+                query_gather_results,
+                out_fp,
+                args.rank or "species",
+                classification=True,
+            )
 
     # The following require a single rank:
     # note: interactive krona can handle mult ranks, do we want to enable?
     if "krona" in args.output_format:
-        krona_results, header =  tax_utils.format_for_krona(query_gather_results=query_gather_results, rank=args.rank, classification=True)
-        krona_outfile, limit_float = make_outfile(args.output_base, "krona", output_dir=args.output_dir)
+        krona_results, header = tax_utils.format_for_krona(
+            query_gather_results=query_gather_results,
+            rank=args.rank,
+            classification=True,
+        )
+        krona_outfile, limit_float = make_outfile(
+            args.output_base, "krona", output_dir=args.output_dir
+        )
         with FileOutputCSV(krona_outfile) as out_fp:
             tax_utils.write_krona(header, krona_results, out_fp)
 
     if "lineage_csv" in args.output_format:
-        lineage_outfile, _ = make_outfile(args.output_base, "lineage_csv",
-                                          output_dir=args.output_dir)
+        lineage_outfile, _ = make_outfile(
+            args.output_base, "lineage_csv", output_dir=args.output_dir
+        )
         lineage_results = []
         header = None
         for q_res in query_gather_results:
             if not header:
                 ranks = list(q_res.ranks)
-                if 'strain' in ranks: # maintains prior functionality.. but we could keep strain now, i think?
-                    ranks.remove('strain')
+                if (
+                    "strain" in ranks
+                ):  # maintains prior functionality.. but we could keep strain now, i think?
+                    ranks.remove("strain")
                 header = ["ident", *ranks]
-            lineageD = q_res.classification_result.as_lineage_dict(q_res.query_info, ranks)
+            lineageD = q_res.classification_result.as_lineage_dict(
+                q_res.query_info, ranks
+            )
             lineage_results.append(lineageD)
         with FileOutputCSV(lineage_outfile) as out_fp:
             tax_utils.write_output(header, lineage_results, out_fp)
-    
-
 
 
 def annotate(args):
@@ -304,21 +396,28 @@ def annotate(args):
 
     try:
         # first, load taxonomic_assignments
-        tax_assign = MultiLineageDB.load(args.taxonomy_csv,
-                       keep_full_identifiers=args.keep_full_identifiers,
-                       keep_identifier_versions=args.keep_identifier_versions,
-                       force=args.force, lins=args.lins)
+        tax_assign = MultiLineageDB.load(
+            args.taxonomy_csv,
+            keep_full_identifiers=args.keep_full_identifiers,
+            keep_identifier_versions=args.keep_identifier_versions,
+            force=args.force,
+            lins=args.lins,
+        )
 
     except ValueError as exc:
         error(f"ERROR: {str(exc)}")
         sys.exit(-1)
 
     if not tax_assign:
-        error(f'ERROR: No taxonomic assignments loaded from {",".join(args.taxonomy_csv)}. Exiting.')
+        error(
+            f'ERROR: No taxonomic assignments loaded from {",".join(args.taxonomy_csv)}. Exiting.'
+        )
         sys.exit(-1)
 
     # get csv from args
-    input_csvs = tax_utils.collect_gather_csvs(args.gather_csv, from_file=args.from_file)
+    input_csvs = tax_utils.collect_gather_csvs(
+        args.gather_csv, from_file=args.from_file
+    )
 
     # handle each gather csv separately
     for n, in_csv in enumerate(input_csvs):
@@ -332,22 +431,28 @@ def annotate(args):
 
                 # look for the column to match with taxonomic identifier
                 id_col = None
-                col_options = ['name', 'match_name', 'ident', 'accession']
+                col_options = ["name", "match_name", "ident", "accession"]
                 for colname in col_options:
                     if colname in header:
                         id_col = colname
                         break
 
                 if not id_col:
-                    raise ValueError(f"Cannot find taxonomic identifier column in '{in_csv}'. Tried: {', '.join(col_options)}")
+                    raise ValueError(
+                        f"Cannot find taxonomic identifier column in '{in_csv}'. Tried: {', '.join(col_options)}"
+                    )
 
-                notify(f"Starting annotation on '{in_csv}'. Using ID column: '{id_col}'")
+                notify(
+                    f"Starting annotation on '{in_csv}'. Using ID column: '{id_col}'"
+                )
 
                 # make output file for this input
-                out_base = os.path.basename(in_csv.rsplit('.csv')[0])
-                this_outfile, _ = make_outfile(out_base, "annotate", output_dir=args.output_dir)
+                out_base = os.path.basename(in_csv.rsplit(".csv")[0])
+                this_outfile, _ = make_outfile(
+                    out_base, "annotate", output_dir=args.output_dir
+                )
 
-                out_header = header + ['lineage']
+                out_header = header + ["lineage"]
 
                 with FileOutputCSV(this_outfile) as out_fp:
                     w = csv.DictWriter(out_fp, out_header)
@@ -357,25 +462,36 @@ def annotate(args):
                     n_missed = 0
                     for n, row in enumerate(r):
                         # find lineage and write annotated row
-                        taxres = AnnotateTaxResult(raw=row, id_col=id_col, lins=args.lins,
-                                                keep_full_identifiers=args.keep_full_identifiers,
-                                                keep_identifier_versions=args.keep_identifier_versions)
-                        taxres.get_match_lineage(tax_assignments=tax_assign, fail_on_missing_taxonomy=args.fail_on_missing_taxonomy)
-
-                        if taxres.missed_ident: # could not assign taxonomy
-                            n_missed+=1
+                        taxres = AnnotateTaxResult(
+                            raw=row,
+                            id_col=id_col,
+                            lins=args.lins,
+                            keep_full_identifiers=args.keep_full_identifiers,
+                            keep_identifier_versions=args.keep_identifier_versions,
+                        )
+                        taxres.get_match_lineage(
+                            tax_assignments=tax_assign,
+                            fail_on_missing_taxonomy=args.fail_on_missing_taxonomy,
+                        )
+
+                        if taxres.missed_ident:  # could not assign taxonomy
+                            n_missed += 1
                         w.writerow(taxres.row_with_lineages())
 
-                    rows_annotated = (n+1) - n_missed
+                    rows_annotated = (n + 1) - n_missed
                     if not rows_annotated:
-                        raise ValueError(f"Could not annotate any rows from '{in_csv}'.")
+                        raise ValueError(
+                            f"Could not annotate any rows from '{in_csv}'."
+                        )
                     else:
-                        notify(f"Annotated {rows_annotated} of {n+1} total rows from '{in_csv}'.")
+                        notify(
+                            f"Annotated {rows_annotated} of {n+1} total rows from '{in_csv}'."
+                        )
 
         except ValueError as exc:
             if args.force:
                 notify(str(exc))
-                notify('--force is set. Attempting to continue to next file.')
+                notify("--force is set. Attempting to continue to next file.")
             else:
                 error(f"ERROR: {str(exc)}")
                 sys.exit(-1)
@@ -385,10 +501,12 @@ def prepare(args):
     "Combine multiple taxonomy databases into one and/or translate formats."
     notify("loading taxonomies...")
     try:
-        tax_assign = MultiLineageDB.load(args.taxonomy_csv,
-                                         force=args.force,
-                       keep_full_identifiers=args.keep_full_identifiers,
-                       keep_identifier_versions=args.keep_identifier_versions)
+        tax_assign = MultiLineageDB.load(
+            args.taxonomy_csv,
+            force=args.force,
+            keep_full_identifiers=args.keep_full_identifiers,
+            keep_identifier_versions=args.keep_identifier_versions,
+        )
     except ValueError as exc:
         error("ERROR while loading taxonomies!")
         error(str(exc))
@@ -409,14 +527,15 @@ def prepare(args):
 
 def grep(args):
     term = args.pattern
-    tax_assign = MultiLineageDB.load(args.taxonomy_csv,
-                                     force=args.force)
+    tax_assign = MultiLineageDB.load(args.taxonomy_csv, force=args.force)
 
     silent = args.silent or args.count
 
     notify(f"searching {len(args.taxonomy_csv)} taxonomy files for '{term}'")
     if args.invert_match:
-        notify("-v/--invert-match specified; returning only lineages that do not match.")
+        notify(
+            "-v/--invert-match specified; returning only lineages that do not match."
+        )
     if args.rank:
         notify(f"limiting matches to {args.rank} level")
 
@@ -436,6 +555,7 @@ def find_pattern(lineage, select_rank):
         return False
 
     if args.invert_match:
+
         def search_pattern(l, r):
             return not find_pattern(l, r)
     else:
@@ -452,22 +572,26 @@ def search_pattern(l, r):
     else:
         with FileOutputCSV(args.output) as fp:
             w = csv.writer(fp)
-            w.writerow(['ident'] + list(RankLineageInfo().taxlist[:-1]))
+            w.writerow(["ident"] + list(RankLineageInfo().taxlist[:-1]))
             for ident, lineage in sorted(match_ident):
-                w.writerow([ident] + [ x.name for x in lineage ])
+                w.writerow([ident] + [x.name for x in lineage])
 
-        notify(f"found {len(match_ident)} matches; saved identifiers to picklist file '{args.output}'")
+        notify(
+            f"found {len(match_ident)} matches; saved identifiers to picklist file '{args.output}'"
+        )
 
 
 def summarize(args):
     "Summarize multiple taxonomy databases."
     notify("loading taxonomies...")
     try:
-        tax_assign = MultiLineageDB.load(args.taxonomy_files,
-                                         force=args.force,
-                       keep_full_identifiers=args.keep_full_identifiers,
-                       keep_identifier_versions=args.keep_identifier_versions,
-                       lins=args.lins)
+        tax_assign = MultiLineageDB.load(
+            args.taxonomy_files,
+            force=args.force,
+            keep_full_identifiers=args.keep_full_identifiers,
+            keep_identifier_versions=args.keep_identifier_versions,
+            lins=args.lins,
+        )
     except ValueError as exc:
         error("ERROR while loading taxonomies!")
         error(str(exc))
@@ -481,7 +605,6 @@ def summarize(args):
     rank_counts = defaultdict(int)
     name_seen = set()
     for v in tax_assign.values():
-        sofar = []
         for vv in v:
             name = vv.name
             rank = vv.rank
@@ -507,7 +630,7 @@ def summarize(args):
 
         with FileOutputCSV(args.output_lineage_information) as fp:
             w = csv.writer(fp)
-            w.writerow(['rank', 'lineage_count', 'lineage'])
+            w.writerow(["rank", "lineage_count", "lineage"])
 
             # output in order of most common
             for lineage, count in lineage_counts.most_common():
@@ -526,9 +649,9 @@ def summarize(args):
 def main(arglist=None):
     args = sourmash.cli.get_parser().parse_args(arglist)
     submod = getattr(sourmash.cli.sig, args.subcmd)
-    mainmethod = getattr(submod, 'main')
+    mainmethod = getattr(submod, "main")
     return mainmethod(args)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main(sys.argv)
diff --git a/src/sourmash/tax/tax_utils.py b/src/sourmash/tax/tax_utils.py
index df69f0ee6a..55b30a540e 100644
--- a/src/sourmash/tax/tax_utils.py
+++ b/src/sourmash/tax/tax_utils.py
@@ -16,24 +16,45 @@
 import sqlite3
 
 
-__all__ = ['get_ident', 'ascending_taxlist', 'collect_gather_csvs',
-           'load_gather_results', 'check_and_load_gather_csvs'
-           'report_missing_and_skipped_identities', 'aggregate_by_lineage_at_rank'
-           'format_for_krona', 'write_output', 'write_bioboxes', 'parse_lingroups',
-           'combine_sumgather_csvs_by_lineage', 'write_lineage_sample_frac',
-           'MultiLineageDB', 'RankLineageInfo', 'LINLineageInfo']
+__all__ = [
+    "get_ident",
+    "ascending_taxlist",
+    "collect_gather_csvs",
+    "load_gather_results",
+    "check_and_load_gather_csvs" "report_missing_and_skipped_identities",
+    "aggregate_by_lineage_at_rank" "format_for_krona",
+    "write_output",
+    "write_bioboxes",
+    "parse_lingroups",
+    "combine_sumgather_csvs_by_lineage",
+    "write_lineage_sample_frac",
+    "MultiLineageDB",
+    "RankLineageInfo",
+    "LINLineageInfo",
+]
 
 from sourmash.logging import notify
 from sourmash.sourmash_args import load_pathlist_from_file
 
-RANKCODE = { "superkingdom": "D", "kingdom": "K", "phylum": "P", "class": "C",
-                        "order": "O", "family":"F", "genus": "G", "species": "S", "unclassified": "U"}
+RANKCODE = {
+    "superkingdom": "D",
+    "kingdom": "K",
+    "phylum": "P",
+    "class": "C",
+    "order": "O",
+    "family": "F",
+    "genus": "G",
+    "species": "S",
+    "unclassified": "U",
+}
+
 
 class LineagePair(NamedTuple):
     rank: str
     name: str = None
     taxid: int = None
 
+
 @dataclass(frozen=True, order=True)
 class BaseLineageInfo:
     """
@@ -53,10 +74,13 @@ class BaseLineageInfo:
     Input lineage information is only used for initialization of the final `lineage`
     and will not be used or compared in any other class methods.
     """
+
     # need to set compare=False for any mutable type to keep this class hashable
-    ranks: tuple() # require ranks
-    lineage: tuple = None # tuple of LineagePairs
-    lineage_str: str = field(default=None, compare=False) # ';'- or ','-separated str of lineage names
+    ranks: ()  # require ranks
+    lineage: tuple = None  # tuple of LineagePairs
+    lineage_str: str = field(
+        default=None, compare=False
+    )  # ';'- or ','-separated str of lineage names
 
     def __post_init__(self):
         "Initialize according to passed values"
@@ -71,9 +95,11 @@ def __post_init__(self):
             self._init_empty()
 
     def __eq__(self, other):
-        if other == (): # just handy: if comparing to a null tuple, don't try to find its lineage before returning False
+        if (
+            other == ()
+        ):  # just handy: if comparing to a null tuple, don't try to find its lineage before returning False
             return False
-        return all([self.ranks == other.ranks and self.lineage==other.lineage])
+        return all([self.ranks == other.ranks and self.lineage == other.lineage])
 
     @property
     def taxlist(self):
@@ -108,7 +134,7 @@ def filled_lineage(self):
         if not self.filled_ranks:
             return ()
         lowest_filled_rank_idx = self.rank_index(self.filled_ranks[-1])
-        return self.lineage[:lowest_filled_rank_idx+1]
+        return self.lineage[: lowest_filled_rank_idx + 1]
 
     @property
     def lowest_lineage_name(self):
@@ -125,7 +151,7 @@ def lowest_lineage_taxid(self):
         return self.filled_lineage[-1].taxid
 
     def _init_empty(self):
-        'initialize empty genome lineage'
+        "initialize empty genome lineage"
         new_lineage = []
         for rank in self.ranks:
             new_lineage.append(LineagePair(rank=rank))
@@ -134,7 +160,7 @@ def _init_empty(self):
         object.__setattr__(self, "filled_ranks", ())
 
     def _init_from_lineage_tuples(self):
-        'initialize from tuple/list of LineagePairs, allowing empty ranks and reordering if necessary'
+        "initialize from tuple/list of LineagePairs, allowing empty ranks and reordering if necessary"
         new_lineage = []
         # check this is a list or tuple of lineage tuples:
         for rank in self.ranks:
@@ -143,12 +169,14 @@ def _init_from_lineage_tuples(self):
             # now add input tuples in correct spots. This corrects for order and allows empty values.
             if not isinstance(lin_tup, LineagePair):
                 raise ValueError(f"{lin_tup} is not tax_utils LineagePair.")
-            if lin_tup.rank: # skip this tuple if rank is None or "" (empty lineage tuple. is this needed?)
+            if lin_tup.rank:  # skip this tuple if rank is None or "" (empty lineage tuple. is this needed?)
                 try:
                     # find index for this rank
                     rank_idx = self.rank_index(lin_tup.rank)
                 except ValueError as e:
-                    raise ValueError(f"Rank '{lin_tup.rank}' not present in {', '.join(self.ranks)}") from e
+                    raise ValueError(
+                        f"Rank '{lin_tup.rank}' not present in {', '.join(self.ranks)}"
+                    ) from e
                 new_lineage[rank_idx] = lin_tup
 
         # build list of filled ranks
@@ -161,10 +189,13 @@ def _init_from_lineage_str(self):
         """
         Turn a ; or ,-separated set of lineages into a list of LineagePair objs.
         """
-        new_lineage = self.lineage_str.split(';')
+        new_lineage = self.lineage_str.split(";")
         if len(new_lineage) == 1:
-            new_lineage = self.lineage_str.split(',')
-        new_lineage = [ LineagePair(rank=rank, name=n) for (rank, n) in zip_longest(self.ranks, new_lineage) ]
+            new_lineage = self.lineage_str.split(",")
+        new_lineage = [
+            LineagePair(rank=rank, name=n)
+            for (rank, n) in zip_longest(self.ranks, new_lineage)
+        ]
         # build list of filled ranks
         filled_ranks = [a.rank for a in new_lineage if a.name is not None]
         object.__setattr__(self, "lineage", tuple(new_lineage))
@@ -180,7 +211,7 @@ def zip_lineage(self, truncate_empty=False):
             zipped = [a.name for a in self.lineage]
         # replace None with empty string ("")
         if None in zipped:
-            zipped = ['' if x is None else x for x in zipped]
+            zipped = ["" if x is None else x for x in zipped]
 
         return zipped
 
@@ -193,11 +224,11 @@ def zip_taxid(self, truncate_empty=False):
         else:
             zipped = [a.taxid for a in self.lineage]
         # replace None with empty string (""); cast taxids to str
-        zipped = ['' if x is None else str(x) for x in zipped]
+        zipped = ["" if x is None else str(x) for x in zipped]
 
         return zipped
 
-    def display_lineage(self, truncate_empty=True, null_as_unclassified=False, sep = ';'):
+    def display_lineage(self, truncate_empty=True, null_as_unclassified=False, sep=";"):
         "Return lineage names as ';'-separated list"
         lin = sep.join(self.zip_lineage(truncate_empty=truncate_empty))
         if null_as_unclassified and lin == "" or lin is None:
@@ -205,12 +236,12 @@ def display_lineage(self, truncate_empty=True, null_as_unclassified=False, sep =
         else:
             return lin
 
-    def display_taxid(self, truncate_empty=True, sep = ";"):
+    def display_taxid(self, truncate_empty=True, sep=";"):
         "Return lineage taxids as ';'-separated list"
         return sep.join(self.zip_taxid(truncate_empty=truncate_empty))
 
     def check_rank_availability(self, rank):
-        if rank in self.ranks: # rank is available
+        if rank in self.ranks:  # rank is available
             return True
         raise ValueError(f"Desired Rank '{rank}' not available for this lineage.")
 
@@ -234,12 +265,14 @@ def is_lineage_match(self, other, rank):
         """
         self.check_rank_availability(rank)
         if not self.is_compatible(other):
-            raise ValueError("Cannot compare lineages from taxonomies with different ranks.")
+            raise ValueError(
+                "Cannot compare lineages from taxonomies with different ranks."
+            )
         # always return false if rank is not filled in either of the two lineages
         if self.rank_is_filled(rank, other=other):
             rank_idx = self.rank_index(rank)
-            a_lin = self.lineage[:rank_idx+1]
-            b_lin = other.lineage[:rank_idx+1]
+            a_lin = self.lineage[: rank_idx + 1]
+            b_lin = other.lineage[: rank_idx + 1]
             if a_lin == b_lin:
                 return 1
         return 0
@@ -252,7 +285,7 @@ def pop_to_rank(self, rank):
             return replace(self)
         # if not, make filled_lineage at this rank + use to generate new LineageInfo
         new_lineage = self.lineage_at_rank(rank)
-        new = replace(self, lineage = new_lineage)
+        new = replace(self, lineage=new_lineage)
         # replace doesn't run the __post_init__ properly. reinitialize.
         new._init_from_lineage_tuples()
         return new
@@ -265,7 +298,7 @@ def lineage_at_rank(self, rank):
             return self.filled_lineage
         # if not, return lineage tuples down to desired rank
         rank_idx = self.rank_index(rank)
-        return self.filled_lineage[:rank_idx+1]
+        return self.filled_lineage[: rank_idx + 1]
 
     def find_lca(self, other):
         """
@@ -298,8 +331,18 @@ class RankLineageInfo(BaseLineageInfo):
     Input lineage information is only used for initialization of the final `lineage`
     and will not be used or compared in any other class methods.
     """
-    ranks: tuple = ('superkingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'strain')
-    lineage_dict: dict = field(default=None, compare=False) # dict of rank: name
+
+    ranks: tuple = (
+        "superkingdom",
+        "phylum",
+        "class",
+        "order",
+        "family",
+        "genus",
+        "species",
+        "strain",
+    )
+    lineage_dict: dict = field(default=None, compare=False)  # dict of rank: name
 
     def __post_init__(self):
         "Initialize according to passed values"
@@ -321,21 +364,23 @@ def _init_from_lineage_dict(self):
         Use NCBI taxids if available as '|'-separated 'taxpath' column.
         Allows empty ranks/extra columns and reordering if necessary
         """
-        null_names = set(['[Blank]', 'na', 'null', 'NA', ''])
+        null_names = set(["[Blank]", "na", "null", "NA", ""])
         if not isinstance(self.lineage_dict, (dict)):
             raise ValueError(f"{self.lineage_dict} is not dictionary")
         new_lineage = []
-        taxpath=[]
+        taxpath = []
         # build empty lineage and taxpath
         for rank in self.ranks:
             new_lineage.append(LineagePair(rank=rank))
 
         # check for NCBI taxpath information
-        taxpath_str = self.lineage_dict.get('taxpath', [])
+        taxpath_str = self.lineage_dict.get("taxpath", [])
         if taxpath_str:
-            taxpath = taxpath_str.split('|')
+            taxpath = taxpath_str.split("|")
             if len(taxpath) > len(self.ranks):
-                raise ValueError(f"Number of NCBI taxids ({len(taxpath)}) exceeds number of ranks ({len(self.ranks)})")
+                raise ValueError(
+                    f"Number of NCBI taxids ({len(taxpath)}) exceeds number of ranks ({len(self.ranks)})"
+                )
 
         # now add rank information in correct spots. This corrects for order and allows empty ranks and extra dict keys
         for key, val in self.lineage_dict.items():
@@ -344,7 +389,7 @@ def _init_from_lineage_dict(self):
                 rank, name = key, val
                 rank_idx = self.rank_index(rank)
             except ValueError:
-                continue # ignore dictionary entries (columns) that don't match a rank
+                continue  # ignore dictionary entries (columns) that don't match a rank
 
             if taxpath:
                 try:
@@ -353,8 +398,8 @@ def _init_from_lineage_dict(self):
                     taxid = None
             # filter null
             if name is not None and name.strip() in null_names:
-                 name = None
-            new_lineage[rank_idx] =  LineagePair(rank=rank, name=name, taxid=taxid)
+                name = None
+            new_lineage[rank_idx] = LineagePair(rank=rank, name=name, taxid=taxid)
 
         # build list of filled ranks
         filled_ranks = [a.rank for a in new_lineage if a.name]
@@ -382,7 +427,10 @@ class LINLineageInfo(BaseLineageInfo):
     Input lineage information is only used for initialization of the final `lineage`
     and will not be used or compared in any other class methods.
     """
-    ranks: tuple = field(default=None, init=False, compare=False)# we will set this within class instead
+
+    ranks: tuple = field(
+        default=None, init=False, compare=False
+    )  # we will set this within class instead
     lineage: tuple = None
     # init with n_positions if you want to set a specific number of positions
     n_lin_positions: int = field(default=None, compare=False)
@@ -403,9 +451,11 @@ def __eq__(self, other):
         total ranks, with full LINs, we only check for the filled_lineage to match and don't check that
         the number of lin_positions match.
         """
-        if other == (): # if comparing to a null tuple, don't try to find its lineage before returning False
+        if (
+            other == ()
+        ):  # if comparing to a null tuple, don't try to find its lineage before returning False
             return False
-        return self.filled_lineage==other.filled_lineage
+        return self.filled_lineage == other.filled_lineage
 
     def _init_ranks_from_n_lin_positions(self):
         new_ranks = [str(x) for x in range(0, self.n_lin_positions)]
@@ -418,7 +468,7 @@ def _init_empty(self):
             # set n_lin_positions to 0 for completely empty LINLineageInfo
             object.__setattr__(self, "n_lin_positions", 0)
         self._init_ranks_from_n_lin_positions()
-        new_lineage=[]
+        new_lineage = []
         for rank in self.ranks:
             new_lineage.append(LineagePair(rank=rank))
         # set lineage and filled_ranks (because frozen, need to do it this way)
@@ -430,12 +480,16 @@ def _init_from_lineage_str(self):
         """
         Turn a ; or ,-separated set of lineages into a list of LineagePair objs.
         """
-        new_lineage = self.lineage_str.split(';')
+        new_lineage = self.lineage_str.split(";")
         if len(new_lineage) == 1:
-            new_lineage = self.lineage_str.split(',')
+            new_lineage = self.lineage_str.split(",")
         if self.n_lin_positions is not None:
             if self.n_lin_positions < len(new_lineage):
-                raise(ValueError("Provided 'n_lin_positions' has fewer positions than provided 'lineage_str'."))
+                raise (
+                    ValueError(
+                        "Provided 'n_lin_positions' has fewer positions than provided 'lineage_str'."
+                    )
+                )
             self._init_ranks_from_n_lin_positions()
         else:
             n_lin_positions = len(new_lineage)
@@ -443,14 +497,17 @@ def _init_from_lineage_str(self):
             self._init_ranks_from_n_lin_positions()
 
         # build lineage and n_filled_pos, filled_ranks
-        new_lineage = [ LineagePair(rank=rank, name=n) for (rank, n) in zip_longest(self.ranks, new_lineage) ]
+        new_lineage = [
+            LineagePair(rank=rank, name=n)
+            for (rank, n) in zip_longest(self.ranks, new_lineage)
+        ]
         filled_ranks = [a.rank for a in new_lineage if a.name is not None]
         object.__setattr__(self, "lineage", tuple(new_lineage))
         object.__setattr__(self, "filled_ranks", tuple(filled_ranks))
         object.__setattr__(self, "n_filled_pos", len(filled_ranks))
 
-    def _init_from_lineage_tuples(self):    
-        'initialize from tuple/list of LineagePairs, building ranks as you go'
+    def _init_from_lineage_tuples(self):
+        "initialize from tuple/list of LineagePairs, building ranks as you go"
         new_lineage = []
         ranks = []
         # check this is a list or tuple of lineage tuples:
@@ -469,7 +526,6 @@ def _init_from_lineage_tuples(self):
         object.__setattr__(self, "filled_ranks", tuple(filled_ranks))
         object.__setattr__(self, "n_filled_pos", len(filled_ranks))
 
-
     def is_compatible(self, other):
         """
         Since we sometimes want to match LINprefixes with full LINs,
@@ -486,7 +542,6 @@ def is_compatible(self, other):
         return False
 
 
-
 @dataclass
 class LineageTree:
     """
@@ -494,6 +549,7 @@ class LineageTree:
     LineageInfo objects in 'assignments'.  This tree can then be used
     to find lowest common ancestor agreements/confusion.
     """
+
     assignments: list = field(compare=False)
 
     def __post_init__(self):
@@ -501,7 +557,7 @@ def __post_init__(self):
         self.add_lineages(self.assignments)
 
     def add_lineage(self, lineage):
-        if isinstance(lineage, (BaseLineageInfo, RankLineageInfo, LINLineageInfo)):
+        if isinstance(lineage, BaseLineageInfo | RankLineageInfo | LINLineageInfo):
             lineage = lineage.filled_lineage
         node = self.tree
         for lineage_tup in lineage:
@@ -515,7 +571,9 @@ def add_lineages(self, lineages):
         if not lineages:
             raise ValueError("empty assignment passed to build_tree")
         if not isinstance(lineages, abc.Iterable):
-            raise ValueError("Must pass in an iterable containing LineagePair or LineageInfo objects.")
+            raise ValueError(
+                "Must pass in an iterable containing LineagePair or LineageInfo objects."
+            )
         for lineageInf in lineages:
             self.add_lineage(lineageInf)
 
@@ -529,13 +587,13 @@ def find_lca(self):
         node = self.tree
         lca = []
         while 1:
-            if len(node) == 1:                # descend to only child; track path
+            if len(node) == 1:  # descend to only child; track path
                 lineage_tup = next(iter(node.keys()))
                 lca.append(lineage_tup)
                 node = node[lineage_tup]
-            elif len(node) == 0:              # at leaf; end
+            elif len(node) == 0:  # at leaf; end
                 return tuple(lca), 0
-            else:                             # len(node) > 1 => confusion!!
+            else:  # len(node) > 1 => confusion!!
                 return tuple(lca), len(node)
 
     def ordered_paths(self, include_internal=False):
@@ -550,7 +608,7 @@ def ordered_paths(self, include_internal=False):
         while stack:
             path, node = stack.pop()
             for key, val in node.items():
-                if len(val) == 0: # leaf node
+                if len(val) == 0:  # leaf node
                     # if want internal paths, build up from leaf
                     if include_internal:
                         internal_path = path
@@ -561,20 +619,19 @@ def ordered_paths(self, include_internal=False):
                                 internal_path = internal_path[:-1]
                     # now add leaf path
                     paths.append(path + (key,))
-                else: # not leaf, add to stack
+                else:  # not leaf, add to stack
                     stack.append((path + (key,), val))
         return paths
 
 
-def get_ident(ident, *,
-              keep_full_identifiers=False, keep_identifier_versions=False):
+def get_ident(ident, *, keep_full_identifiers=False, keep_identifier_versions=False):
     # split identifiers = split on whitespace
     # keep identifiers = don't split .[12] from assembly accessions
     "Hack and slash identifiers."
     if not keep_full_identifiers:
-        ident = ident.split(' ')[0]
+        ident = ident.split(" ")[0]
         if not keep_identifier_versions:
-            ident = ident.split('.')[0]
+            ident = ident.split(".")[0]
     return ident
 
 
@@ -582,12 +639,18 @@ def ascending_taxlist(include_strain=True):
     """
     Provide an ordered list of taxonomic ranks: strain --> superkingdom
     """
-    ascending_taxlist = ['species', 'genus', 'family', 'order',
-                         'class', 'phylum', 'superkingdom']
+    ascending_taxlist = [
+        "species",
+        "genus",
+        "family",
+        "order",
+        "class",
+        "phylum",
+        "superkingdom",
+    ]
     if include_strain:
-        ascending_taxlist = ['strain'] + ascending_taxlist
-    for k in ascending_taxlist:
-        yield k
+        ascending_taxlist = ["strain"] + ascending_taxlist
+    yield from ascending_taxlist
 
 
 def collect_gather_csvs(cmdline_gather_input, *, from_file=None):
@@ -600,7 +663,7 @@ def collect_gather_csvs(cmdline_gather_input, *, from_file=None):
         if gf not in gather_csvs:
             gather_csvs.append(gf)
         else:
-            notify(f'ignoring duplicated reference to file: {gf}')
+            notify(f"ignoring duplicated reference to file: {gf}")
     # ignore pathlist duplicates
     if from_file:
         more_files = load_pathlist_from_file(from_file)
@@ -608,25 +671,29 @@ def collect_gather_csvs(cmdline_gather_input, *, from_file=None):
             if gf not in gather_csvs:
                 gather_csvs.append(gf)
             else:
-               notify(f'ignoring duplicated reference to file: {gf}')
+                notify(f"ignoring duplicated reference to file: {gf}")
     return gather_csvs
 
 
 def read_lingroups(lingroup_csv):
     lingroupD = {}
-    n=None
+    n = None
     with sourmash_args.FileInputCSV(lingroup_csv) as r:
         header = r.fieldnames
         # check for empty file
         if not header:
-            raise ValueError(f"Cannot read lingroups from '{lingroup_csv}'. Is file empty?")
+            raise ValueError(
+                f"Cannot read lingroups from '{lingroup_csv}'. Is file empty?"
+            )
         if "lin" not in header or "name" not in header:
-            raise ValueError(f"'{lingroup_csv}' must contain the following columns: 'name', 'lin'.")
+            raise ValueError(
+                f"'{lingroup_csv}' must contain the following columns: 'name', 'lin'."
+            )
         for n, row in enumerate(r):
-            lingroupD[row['lin']] = row['name']
+            lingroupD[row["lin"]] = row["name"]
 
     if n is None:
-        raise ValueError(f'No lingroups loaded from {lingroup_csv}.')
+        raise ValueError(f"No lingroups loaded from {lingroup_csv}.")
     n_lg = len(lingroupD.keys())
     notify(f"Read {n+1} lingroup rows and found {n_lg} distinct lingroup prefixes.")
     return lingroupD
@@ -646,20 +713,30 @@ def parse_lingroups(lingroupD):
     return lg_ranks, all_lgs
 
 
-def load_gather_results(gather_csv, tax_assignments, *, seen_queries=None, force=False,
-                        skip_idents = None, fail_on_missing_taxonomy=False,
-                        keep_full_identifiers=False, keep_identifier_versions=False,
-                        lins=False):
+def load_gather_results(
+    gather_csv,
+    tax_assignments,
+    *,
+    seen_queries=None,
+    force=False,
+    skip_idents=None,
+    fail_on_missing_taxonomy=False,
+    keep_full_identifiers=False,
+    keep_identifier_versions=False,
+    lins=False,
+):
     "Load a single gather csv"
     if not seen_queries:
-        seen_queries=set()
+        seen_queries = set()
     header = []
     gather_results = {}
     with sourmash_args.FileInputCSV(gather_csv) as r:
         header = r.fieldnames
         # check for empty file
         if not header:
-            raise ValueError(f"Cannot read gather results from '{gather_csv}'. Is file empty?")
+            raise ValueError(
+                f"Cannot read gather results from '{gather_csv}'. Is file empty?"
+            )
 
         this_querytaxres = None
         for n, row in enumerate(r):
@@ -667,72 +744,101 @@ def load_gather_results(gather_csv, tax_assignments, *, seen_queries=None, force
             try:
                 gatherRow = GatherRow(**row)
             except TypeError as exc:
-                raise ValueError(f"'{gather_csv}' is missing columns needed for taxonomic summarization. Please run gather with sourmash >= 4.4.") from exc
+                raise ValueError(
+                    f"'{gather_csv}' is missing columns needed for taxonomic summarization. Please run gather with sourmash >= 4.4."
+                ) from exc
             # check if we've seen this query already in a different gather CSV
             if gatherRow.query_name in seen_queries:
                 # do not allow loading of same query from a second CSV.
-                raise ValueError(f"Gather query {gatherRow.query_name} was found in more than one CSV. Cannot load from '{gather_csv}'.")
-            taxres = TaxResult(raw=gatherRow, keep_full_identifiers=keep_full_identifiers,
-                                                keep_identifier_versions=keep_identifier_versions,
-                                                lins=lins)
-            taxres.get_match_lineage(tax_assignments=tax_assignments, skip_idents=skip_idents, 
-                                        fail_on_missing_taxonomy=fail_on_missing_taxonomy)
+                raise ValueError(
+                    f"Gather query {gatherRow.query_name} was found in more than one CSV. Cannot load from '{gather_csv}'."
+                )
+            taxres = TaxResult(
+                raw=gatherRow,
+                keep_full_identifiers=keep_full_identifiers,
+                keep_identifier_versions=keep_identifier_versions,
+                lins=lins,
+            )
+            taxres.get_match_lineage(
+                tax_assignments=tax_assignments,
+                skip_idents=skip_idents,
+                fail_on_missing_taxonomy=fail_on_missing_taxonomy,
+            )
             # add to matching QueryTaxResult or create new one
             if not this_querytaxres or not this_querytaxres.is_compatible(taxres):
                 # get existing or initialize new
-                this_querytaxres = gather_results.get(gatherRow.query_name, QueryTaxResult(taxres.query_info, lins=lins))
+                this_querytaxres = gather_results.get(
+                    gatherRow.query_name, QueryTaxResult(taxres.query_info, lins=lins)
+                )
             this_querytaxres.add_taxresult(taxres)
             gather_results[gatherRow.query_name] = this_querytaxres
 
     if not gather_results:
-        raise ValueError(f'No gather results loaded from {gather_csv}.')
+        raise ValueError(f"No gather results loaded from {gather_csv}.")
     else:
         notify(f"loaded {len(gather_results)} gather results from '{gather_csv}'.")
-    return gather_results, header #, gather_queries # can use the gather_results keys instead
-
-
-def check_and_load_gather_csvs(gather_csvs, tax_assign, *, fail_on_missing_taxonomy=False, force=False, 
-                               keep_full_identifiers=False,keep_identifier_versions=False, lins=False):
-    '''
+    return (
+        gather_results,
+        header,
+    )  # , gather_queries # can use the gather_results keys instead
+
+
+def check_and_load_gather_csvs(
+    gather_csvs,
+    tax_assign,
+    *,
+    fail_on_missing_taxonomy=False,
+    force=False,
+    keep_full_identifiers=False,
+    keep_identifier_versions=False,
+    lins=False,
+):
+    """
     Load gather csvs, checking for empties and ids missing from taxonomic assignments.
-    '''
+    """
     if not isinstance(gather_csvs, list):
         gather_csvs = [gather_csvs]
     gather_results = {}
-    total_missed = 0
-    all_ident_missed = set()
     header = []
     n_ignored = 0
     for n, gather_csv in enumerate(gather_csvs):
         these_results = {}
         try:
-            these_results, header = load_gather_results(gather_csv, tax_assign, 
-                                                        seen_queries=gather_results.keys(),
-                                                        force=force, keep_full_identifiers=keep_full_identifiers,
-                                                        keep_identifier_versions = keep_identifier_versions,
-                                                        fail_on_missing_taxonomy=fail_on_missing_taxonomy,
-                                                        lins=lins)
+            these_results, header = load_gather_results(
+                gather_csv,
+                tax_assign,
+                seen_queries=gather_results.keys(),
+                force=force,
+                keep_full_identifiers=keep_full_identifiers,
+                keep_identifier_versions=keep_identifier_versions,
+                fail_on_missing_taxonomy=fail_on_missing_taxonomy,
+                lins=lins,
+            )
         except ValueError as exc:
             if force:
                 if "found in more than one CSV" in str(exc):
-                    notify('Cannot force past duplicated gather query. Exiting.')
+                    notify("Cannot force past duplicated gather query. Exiting.")
                     raise
                 if "Failing, as requested via --fail-on-missing-taxonomy" in str(exc):
                     raise
                 notify(str(exc))
-                notify('--force is set. Attempting to continue to next set of gather results.')
-                n_ignored+=1
+                notify(
+                    "--force is set. Attempting to continue to next set of gather results."
+                )
+                n_ignored += 1
                 continue
             else:
-                notify('Exiting.')
+                notify("Exiting.")
                 raise
 
         # add these results to gather_results
         gather_results.update(these_results)
- 
+
     # some reporting
-    num_gather_csvs_loaded = n+1 - n_ignored
-    notify(f'loaded results for {len(gather_results)} queries from {str(num_gather_csvs_loaded)} gather CSVs')
+    num_gather_csvs_loaded = n + 1 - n_ignored
+    notify(
+        f"loaded results for {len(gather_results)} queries from {str(num_gather_csvs_loaded)} gather CSVs"
+    )
     # count and report missing and skipped idents
     report_missing_and_skipped_identities(gather_results)
 
@@ -748,8 +854,8 @@ def report_missing_and_skipped_identities(gather_results):
     that are not present in taxonomic assignments, either
     by accident (missed) or request (skipped).
     """
-    ident_missed= set()
-    ident_skipped= set()
+    ident_missed = set()
+    ident_skipped = set()
     total_n_missed = 0
     total_n_skipped = 0
     total_taxresults = 0
@@ -757,20 +863,24 @@ def report_missing_and_skipped_identities(gather_results):
         ident_missed.update(querytaxres.missed_idents)
         ident_skipped.update(querytaxres.skipped_idents)
         # totals are total rows in gather that were missed - do we want to report these at all?
-        total_n_missed+= querytaxres.n_missed
-        total_n_skipped+= querytaxres.n_skipped
+        total_n_missed += querytaxres.n_missed
+        total_n_skipped += querytaxres.n_skipped
         total_taxresults += len(querytaxres.raw_taxresults)
 
     if ident_missed:
-        notify(f'of {total_taxresults} gather results, lineage assignments for {total_n_missed} results were missed.')
-        notify(f'The following are missing from the taxonomy information: {", ".join(ident_missed)}')
+        notify(
+            f"of {total_taxresults} gather results, lineage assignments for {total_n_missed} results were missed."
+        )
+        notify(
+            f'The following are missing from the taxonomy information: {", ".join(ident_missed)}'
+        )
 
 
 def aggregate_by_lineage_at_rank(query_gather_results, rank, *, by_query=False):
-    '''
-    Aggregate list of summarized_lineage_results at rank, keeping 
+    """
+    Aggregate list of summarized_lineage_results at rank, keeping
     query names or not (but this aggregates across queries if multiple).
-    '''
+    """
     lineage_summary = defaultdict(float)
     if by_query:
         lineage_summary = defaultdict(dict)
@@ -784,9 +894,11 @@ def aggregate_by_lineage_at_rank(query_gather_results, rank, *, by_query=False):
             raise ValueError(f"Error: rank '{rank}' not available for aggregation.")
 
         for res in queryResult.summarized_lineage_results[rank]:
-            lineage = res.lineage.display_lineage(null_as_unclassified = True)
+            lineage = res.lineage.display_lineage(null_as_unclassified=True)
             if by_query:
-                    lineage_summary[lineage][query_name] = res.fraction # v5?: res.f_weighted_at_rank
+                lineage_summary[lineage][
+                    query_name
+                ] = res.fraction  # v5?: res.f_weighted_at_rank
             else:
                 lineage_summary[lineage] += res.fraction
 
@@ -794,21 +906,23 @@ def aggregate_by_lineage_at_rank(query_gather_results, rank, *, by_query=False):
     if not by_query:
         n_queries = len(all_queries)
         for lin, fraction in lineage_summary.items():
-            lineage_summary[lin] = fraction/n_queries
+            lineage_summary[lin] = fraction / n_queries
     return lineage_summary, all_queries
 
 
 def format_for_krona(query_gather_results, rank, *, classification=False):
-    '''
+    """
     Aggregate and format for krona output. Single query recommended, but we don't want query headers.
-    '''
+    """
     # make header
     header = query_gather_results[0].make_krona_header(min_rank=rank)
     krona_results = []
     # do we want to block more than one query for summarization?
     if len(query_gather_results) > 1:
-        notify('WARNING: results from more than one query found. Krona summarization not recommended.\n' \
-                'Percentage assignment will be normalized by the number of queries to maintain range 0-100%.')
+        notify(
+            "WARNING: results from more than one query found. Krona summarization not recommended.\n"
+            "Percentage assignment will be normalized by the number of queries to maintain range 0-100%."
+        )
 
     if classification:
         # for classification, just write the results
@@ -820,13 +934,17 @@ def format_for_krona(query_gather_results, rank, *, classification=False):
             # but also misleading, since we're using best_only and there may
             # be more matches that are not included here, making % unclassified seem higher than it would
             # be with summarization. We previously excluded it -- is that the behavior we want to keep?
-            krona_results.extend([q_res.krona_classified])#, q_res.krona_unclassified])
+            krona_results.extend(
+                [q_res.krona_classified]
+            )  # , q_res.krona_unclassified])
     else:
-        lineage_summary, _ = aggregate_by_lineage_at_rank(query_gather_results, rank, by_query=False)
+        lineage_summary, _ = aggregate_by_lineage_at_rank(
+            query_gather_results, rank, by_query=False
+        )
 
         # sort by fraction
         lin_items = list(lineage_summary.items())
-        lin_items.sort(key = lambda x: -x[1])
+        lin_items.sort(key=lambda x: -x[1])
 
         # reformat lineage for krona_results printing
         unclassified_fraction = 0
@@ -836,20 +954,20 @@ def format_for_krona(query_gather_results, rank, *, classification=False):
                 unclassified_fraction = fraction
                 continue
             else:
-                lin_list = lin.split(';')
+                lin_list = lin.split(";")
                 krona_results.append((fraction, *lin_list))
 
         # handle unclassified
         if unclassified_fraction:
-            len_unclassified_lin = len(header) -1
-            unclassifed_lin = ["unclassified"]*len_unclassified_lin
+            len_unclassified_lin = len(header) - 1
+            unclassifed_lin = ["unclassified"] * len_unclassified_lin
             krona_results.append((unclassified_fraction, *unclassifed_lin))
 
     return krona_results, header
 
 
-def write_krona(header, krona_results, out_fp, *, sep='\t'):
-    'write krona output'
+def write_krona(header, krona_results, out_fp, *, sep="\t"):
+    "write krona output"
     # CTB: do we want to optionally allow restriction to a specific rank
     # & above? NTP: think we originally kept krona to a specific rank, but
     # that may have been how we were plotting, since krona plots can be
@@ -861,7 +979,7 @@ def write_krona(header, krona_results, out_fp, *, sep='\t'):
         tsv_output.writerow(res)
 
 
-def write_output(header, results, out_fp, *, sep=',', write_header=True):
+def write_output(header, results, out_fp, *, sep=",", write_header=True):
     """
     write pre-generated results list of rows, with each
     row being a dictionary
@@ -873,25 +991,34 @@ def write_output(header, results, out_fp, *, sep=',', write_header=True):
         output.writerow(res)
 
 
-def write_bioboxes(header_lines, results, out_fp, *, sep='\t'):
+def write_bioboxes(header_lines, results, out_fp, *, sep="\t"):
     """
     write pre-generated results list of rows, with each
     row being list.
     """
     for inf in header_lines:
-        out_fp.write(inf + '\n')
+        out_fp.write(inf + "\n")
     for res in results:
-        res = sep.join(res) + '\n'
+        res = sep.join(res) + "\n"
         out_fp.write(res)
 
 
-def write_summary(query_gather_results, csv_fp, *, sep=',', limit_float_decimals=False, classification=False):
-    '''
+def write_summary(
+    query_gather_results,
+    csv_fp,
+    *,
+    sep=",",
+    limit_float_decimals=False,
+    classification=False,
+):
+    """
     Write taxonomy-summarized gather results for each rank.
-    '''
-    w= None
+    """
+    w = None
     for q_res in query_gather_results:
-        header, summary = q_res.make_full_summary(limit_float=limit_float_decimals, classification=classification)
+        header, summary = q_res.make_full_summary(
+            limit_float=limit_float_decimals, classification=classification
+        )
         if w is None:
             w = csv.DictWriter(csv_fp, header, delimiter=sep)
             w.writeheader()
@@ -899,29 +1026,41 @@ def write_summary(query_gather_results, csv_fp, *, sep=',', limit_float_decimals
             w.writerow(res)
 
 
-def write_human_summary(query_gather_results, out_fp, display_rank, classification=False):
-    '''
+def write_human_summary(
+    query_gather_results, out_fp, display_rank, classification=False
+):
+    """
     Write human-readable taxonomy-summarized gather results for a specific rank.
-    '''
+    """
     for queryResult in query_gather_results:
-        results = queryResult.make_human_summary(display_rank=display_rank, classification=classification)
+        results = queryResult.make_human_summary(
+            display_rank=display_rank, classification=classification
+        )
 
         if classification:
             out_fp.write("sample name    status    proportion   cANI   lineage\n")
             out_fp.write("-----------    ------    ----------   ----   -------\n")
 
             for rD in results:
-                out_fp.write("{query_name:<15s}   {status}    {f_weighted_at_rank}     {query_ani_at_rank}  {lineage}\n".format(**rD))
+                out_fp.write(
+                    "{query_name:<15s}   {status}    {f_weighted_at_rank}     {query_ani_at_rank}  {lineage}\n".format(
+                        **rD
+                    )
+                )
         else:
             out_fp.write("sample name    proportion   cANI   lineage\n")
             out_fp.write("-----------    ----------   ----   -------\n")
 
             for rD in results:
-                out_fp.write("{query_name:<15s}   {f_weighted_at_rank}     {query_ani_at_rank}  {lineage}\n".format(**rD))
+                out_fp.write(
+                    "{query_name:<15s}   {f_weighted_at_rank}     {query_ani_at_rank}  {lineage}\n".format(
+                        **rD
+                    )
+                )
 
 
-def write_lineage_sample_frac(sample_names, lineage_dict, out_fp, *, sep='\t'):
-    '''
+def write_lineage_sample_frac(sample_names, lineage_dict, out_fp, *, sep="\t"):
+    """
     takes in a lineage dictionary with sample counts (output of aggregate_by_lineage_at_rank)
     and produces a tab-separated file with fractions for each sample.
 
@@ -935,7 +1074,7 @@ def write_lineage_sample_frac(sample_names, lineage_dict, out_fp, *, sep='\t'):
     lin_a	  0.4    0.17     0.6
     lin_b	  0.0    0.0      0.1
     lin_c	  0.3    0.4      0.2
-    '''
+    """
 
     header = ["lineage"] + sample_names
     w = csv.DictWriter(out_fp, header, delimiter=sep)
@@ -943,14 +1082,14 @@ def write_lineage_sample_frac(sample_names, lineage_dict, out_fp, *, sep='\t'):
     blank_row = {query_name: 0 for query_name in sample_names}
     unclassified_row = None
     for lin, sampleinfo in sorted(lineage_dict.items()):
-        #add lineage and 0 placeholders
-        row = {'lineage': lin}
+        # add lineage and 0 placeholders
+        row = {"lineage": lin}
         row.update(blank_row)
         # add info for query_names that exist for this lineage
         row.update(sampleinfo)
         # if unclassified, save this row for the end
-        if lin== "unclassified":
-            row.update({'lineage': 'unclassified'})
+        if lin == "unclassified":
+            row.update({"lineage": "unclassified"})
             unclassified_row = row
             continue
         # write row
@@ -961,6 +1100,7 @@ def write_lineage_sample_frac(sample_names, lineage_dict, out_fp, *, sep='\t'):
 
 class LineageDB(abc.Mapping):
     "Base LineageDB class built around an assignments dictionary."
+
     def __init__(self, assign_d, avail_ranks):
         self.assignments = assign_d
         self.available_ranks = set(avail_ranks)
@@ -982,8 +1122,16 @@ def __bool__(self):
         return bool(self.assignments)
 
     @classmethod
-    def load(cls, filename, *, delimiter=',', force=False,
-             keep_full_identifiers=False, keep_identifier_versions=True, lins=False):
+    def load(
+        cls,
+        filename,
+        *,
+        delimiter=",",
+        force=False,
+        keep_full_identifiers=False,
+        keep_identifier_versions=True,
+        lins=False,
+    ):
         """
         Load a taxonomy assignment CSV file into a LineageDB.
 
@@ -993,9 +1141,11 @@ def load(cls, filename, *, delimiter=',', force=False,
         'keep_identifier_versions=False' will remove trailing versions,
         e.g. 'IDENT.1' => 'IDENT'.
         """
-        include_strain=False
+        include_strain = False
         if not keep_identifier_versions and keep_full_identifiers:
-            raise ValueError("keep_identifer_versions=False doesn't make sense with keep_full_identifiers=True")
+            raise ValueError(
+                "keep_identifer_versions=False doesn't make sense with keep_full_identifiers=True"
+            )
 
         if not os.path.exists(filename):
             raise ValueError(f"'{filename}' does not exist")
@@ -1006,42 +1156,46 @@ def load(cls, filename, *, delimiter=',', force=False,
         with sourmash_args.FileInputCSV(filename) as r:
             header = r.fieldnames
             if not header:
-                raise ValueError(f'cannot read taxonomy assignments from {filename}')
+                raise ValueError(f"cannot read taxonomy assignments from {filename}")
 
             identifier = "ident"
             # check for ident/identifier, handle some common alternatives
             if "ident" not in header:
                 # check for ident/identifier, handle some common alternatives
-                if 'identifiers' in header:
-                    identifier = 'identifiers'
+                if "identifiers" in header:
+                    identifier = "identifiers"
                     header = ["ident" if "identifiers" == x else x for x in header]
-                elif 'accession' in header:
-                    identifier = 'accession'
+                elif "accession" in header:
+                    identifier = "accession"
                     header = ["ident" if "accession" == x else x for x in header]
-                elif 'name' in header and 'lineage' in header:
-                    return cls.load_from_gather_with_lineages(filename,
-                                                              force=force,
-                                                              lins=lins)
+                elif "name" in header and "lineage" in header:
+                    return cls.load_from_gather_with_lineages(
+                        filename, force=force, lins=lins
+                    )
                 else:
                     header_str = ",".join([repr(x) for x in header])
-                    raise ValueError(f'No taxonomic identifiers found; headers are {header_str}')
+                    raise ValueError(
+                        f"No taxonomic identifiers found; headers are {header_str}"
+                    )
 
             if lins and "lin" not in header:
-                raise ValueError(f"'lin' column not found: cannot read LIN taxonomy assignments from {filename}.")
+                raise ValueError(
+                    f"'lin' column not found: cannot read LIN taxonomy assignments from {filename}."
+                )
 
             if not lins:
                 # is "strain" an available rank?
                 if "strain" in header:
-                    include_strain=True
+                    include_strain = True
                 # check that all ranks are in header
                 ranks = list(RankLineageInfo().taxlist)
                 if not include_strain:
-                    ranks.remove('strain')
+                    ranks.remove("strain")
                 if not set(ranks).issubset(header):
                     # for now, just raise err if not all ranks are present.
                     # in future, we can define `ranks` differently if desired
                     # return them from this function so we can check the `available` ranks
-                    raise ValueError('Not all taxonomy ranks present')
+                    raise ValueError("Not all taxonomy ranks present")
 
             assignments = {}
             num_rows = 0
@@ -1053,13 +1207,17 @@ def load(cls, filename, *, delimiter=',', force=False,
             for n, row in enumerate(r):
                 num_rows += 1
                 if lins:
-                    lineageInfo = LINLineageInfo(lineage_str=row['lin'])
+                    lineageInfo = LINLineageInfo(lineage_str=row["lin"])
                     if n_pos is not None:
                         if lineageInfo.n_lin_positions != n_pos:
-                            raise ValueError(f"For taxonomic summarization, all LIN assignments must use the same number of LIN positions.")
+                            raise ValueError(
+                                "For taxonomic summarization, all LIN assignments must use the same number of LIN positions."
+                            )
                     else:
-                        n_pos = lineageInfo.n_lin_positions # set n_pos with first entry
-                        ranks=lineageInfo.ranks
+                        n_pos = (
+                            lineageInfo.n_lin_positions
+                        )  # set n_pos with first entry
+                        ranks = lineageInfo.ranks
                 else:
                     # read lineage from row dictionary
                     lineageInfo = RankLineageInfo(lineage_dict=row)
@@ -1067,9 +1225,11 @@ def load(cls, filename, *, delimiter=',', force=False,
                 ident = row[identifier]
 
                 # fold, spindle, and mutilate ident?
-                ident = get_ident(ident,
-                                  keep_full_identifiers=keep_full_identifiers,
-                                  keep_identifier_versions=keep_identifier_versions)
+                ident = get_ident(
+                    ident,
+                    keep_full_identifiers=keep_full_identifiers,
+                    keep_identifier_versions=keep_identifier_versions,
+                )
 
                 # store lineage tuple
                 lineage = lineageInfo.filled_lineage
@@ -1078,27 +1238,27 @@ def load(cls, filename, *, delimiter=',', force=False,
                     if ident in assignments:
                         if assignments[ident] != lineage:
                             if not force:
-                                raise ValueError(f"multiple lineages for identifier {ident}")
+                                raise ValueError(
+                                    f"multiple lineages for identifier {ident}"
+                                )
                     else:
                         assignments[ident] = lineage
 
                         if not lins:
-                            if lineage[-1].rank == 'species':
+                            if lineage[-1].rank == "species":
                                 n_species += 1
-                            elif lineage[-1].rank == 'strain':
+                            elif lineage[-1].rank == "strain":
                                 n_species += 1
                                 n_strains += 1
 
         return LineageDB(assignments, ranks)
 
-
     @classmethod
     def load_from_gather_with_lineages(cls, filename, *, force=False, lins=False):
         """
         Load an annotated gather-with-lineages CSV file produced by
         'tax annotate' into a LineageDB.
         """
-        include_strain = False
 
         if not os.path.exists(filename):
             raise ValueError(f"'{filename}' does not exist")
@@ -1109,12 +1269,14 @@ def load_from_gather_with_lineages(cls, filename, *, force=False, lins=False):
         with sourmash_args.FileInputCSV(filename) as r:
             header = r.fieldnames
             if not header:
-                raise ValueError(f'cannot read taxonomy assignments from {filename}')
+                raise ValueError(f"cannot read taxonomy assignments from {filename}")
 
             if "name" not in header or "lineage" not in header:
-                raise ValueError(f"Expected headers 'name' and 'lineage' not found. Is this a with-lineages file?")
+                raise ValueError(
+                    "Expected headers 'name' and 'lineage' not found. Is this a with-lineages file?"
+                )
 
-            ranks=None
+            ranks = None
             assignments = {}
             num_rows = 0
             n_species = 0
@@ -1124,13 +1286,13 @@ def load_from_gather_with_lineages(cls, filename, *, force=False, lins=False):
             for n, row in enumerate(r):
                 num_rows += 1
 
-                name = row['name']
+                name = row["name"]
                 ident = get_ident(name)
 
                 if lins:
-                    lineageInfo = LINLineageInfo(lineage_str=row['lineage'])
+                    lineageInfo = LINLineageInfo(lineage_str=row["lineage"])
                 else:
-                    lineageInfo = RankLineageInfo(lineage_str= row['lineage'])
+                    lineageInfo = RankLineageInfo(lineage_str=row["lineage"])
 
                 if ranks is None:
                     ranks = lineageInfo.taxlist
@@ -1142,14 +1304,16 @@ def load_from_gather_with_lineages(cls, filename, *, force=False, lins=False):
                         # this should not happen with valid
                         # sourmash tax annotate output, but check anyway.
                         if not force:
-                            raise ValueError(f"multiple lineages for identifier {ident}")
+                            raise ValueError(
+                                f"multiple lineages for identifier {ident}"
+                            )
                 else:
                     assignments[ident] = lineage
 
                     if isinstance(lineageInfo, RankLineageInfo):
-                        if lineage[-1].rank == 'species':
+                        if lineage[-1].rank == "species":
                             n_species += 1
-                        elif lineage[-1].rank == 'strain':
+                        elif lineage[-1].rank == "strain":
                             n_species += 1
                             n_strains += 1
 
@@ -1160,10 +1324,19 @@ class LineageDB_Sqlite(abc.Mapping):
     """
     A LineageDB based on a sqlite3 database with a 'sourmash_taxonomy' table.
     """
+
     # NOTE: 'order' is a reserved name in sql, so we have to use 'order_'.
-    columns = ('superkingdom', 'phylum', 'order_', 'class', 'family',
-               'genus', 'species', 'strain')
-    table_name = 'sourmash_taxonomy'
+    columns = (
+        "superkingdom",
+        "phylum",
+        "order_",
+        "class",
+        "family",
+        "genus",
+        "species",
+        "strain",
+    )
+    table_name = "sourmash_taxonomy"
 
     def __init__(self, conn, *, table_name=None):
         self.conn = conn
@@ -1175,10 +1348,10 @@ def __init__(self, conn, *, table_name=None):
         # check that the right table is there.
         c = conn.cursor()
         try:
-            c.execute(f'SELECT * FROM {self.table_name} LIMIT 1')
+            c.execute(f"SELECT * FROM {self.table_name} LIMIT 1")
         except (sqlite3.DatabaseError, sqlite3.OperationalError):
             raise ValueError("not a taxonomy database")
-            
+
         # check: can we do a 'select' on the right table?
         self.__len__()
         c = conn.cursor()
@@ -1188,7 +1361,7 @@ def __init__(self, conn, *, table_name=None):
         for column, rank in zip(self.columns, RankLineageInfo().taxlist):
             query = f'SELECT COUNT({column}) FROM {self.table_name} WHERE {column} IS NOT NULL AND {column} != ""'
             c.execute(query)
-            cnt, = c.fetchone()
+            (cnt,) = c.fetchone()
             if cnt:
                 ranks.add(rank)
 
@@ -1209,16 +1382,16 @@ def load(cls, location):
         except sqlite3.OperationalError:
             info = {}
 
-        if 'SqliteLineage' in info:
-            if info['SqliteLineage'] != '1.0':
+        if "SqliteLineage" in info:
+            if info["SqliteLineage"] != "1.0":
                 raise IndexNotSupported
 
-            table_name = 'sourmash_taxonomy'
+            table_name = "sourmash_taxonomy"
         else:
             # legacy support for old taxonomy DB, pre sourmash_internal.
             try:
-                c.execute('SELECT * FROM taxonomy LIMIT 1')
-                table_name = 'taxonomy'
+                c.execute("SELECT * FROM taxonomy LIMIT 1")
+                table_name = "taxonomy"
             except sqlite3.OperationalError:
                 pass
 
@@ -1229,13 +1402,16 @@ def load(cls, location):
 
     def _make_tup(self, row):
         "build a tuple of LineagePairs for this sqlite row"
-        tup = [ LineagePair(n, r) for (n, r) in zip(RankLineageInfo().taxlist, row) ]
+        tup = [LineagePair(n, r) for (n, r) in zip(RankLineageInfo().taxlist, row)]
         return tuple(tup)
 
     def __getitem__(self, ident):
         "Retrieve lineage for identifer"
         c = self.cursor
-        c.execute(f'SELECT superkingdom, phylum, class, order_, family, genus, species, strain FROM {self.table_name} WHERE ident=?', (ident,))
+        c.execute(
+            f"SELECT superkingdom, phylum, class, order_, family, genus, species, strain FROM {self.table_name} WHERE ident=?",
+            (ident,),
+        )
 
         # retrieve names list...
         names = c.fetchone()
@@ -1256,24 +1432,26 @@ def __bool__(self):
     def __len__(self):
         "Return number of rows"
         c = self.conn.cursor()
-        c.execute(f'SELECT COUNT(DISTINCT ident) FROM {self.table_name}')
-        nrows, = c.fetchone()
+        c.execute(f"SELECT COUNT(DISTINCT ident) FROM {self.table_name}")
+        (nrows,) = c.fetchone()
         return nrows
 
     def __iter__(self):
         "Return all identifiers"
         # create new cursor so as to allow other operations
         c = self.conn.cursor()
-        c.execute(f'SELECT DISTINCT ident FROM {self.table_name}')
+        c.execute(f"SELECT DISTINCT ident FROM {self.table_name}")
 
-        for ident, in c:
+        for (ident,) in c:
             yield ident
 
     def items(self):
         "return all items in the sqlite database"
         c = self.conn.cursor()
 
-        c.execute(f'SELECT DISTINCT ident, superkingdom, phylum, class, order_, family, genus, species, strain FROM {self.table_name}')
+        c.execute(
+            f"SELECT DISTINCT ident, superkingdom, phylum, class, order_, family, genus, species, strain FROM {self.table_name}"
+        )
 
         for ident, *names in c:
             yield ident, self._make_tup(names)
@@ -1347,10 +1525,10 @@ def __len__(self):
 
     def __bool__(self):
         "True if any contained database has content."
-        return any( bool(db) for db in self.lineage_dbs )
+        return any(bool(db) for db in self.lineage_dbs)
 
     def save(self, filename_or_fp, file_format):
-        assert file_format in ('sql', 'csv')
+        assert file_format in ("sql", "csv")
 
         is_filename = False
         try:
@@ -1358,18 +1536,20 @@ def save(self, filename_or_fp, file_format):
         except AttributeError:
             is_filename = True
 
-        if file_format == 'sql':
+        if file_format == "sql":
             if not is_filename:
-                raise ValueError("file format '{file_format}' requires a filename, not a file handle")
+                raise ValueError(
+                    "file format '{file_format}' requires a filename, not a file handle"
+                )
             self._save_sqlite(filename_or_fp)
-        elif file_format == 'csv':
+        elif file_format == "csv":
             # we need a file handle; open file.
             fp = filename_or_fp
             if is_filename:
-                if filename_or_fp.endswith('.gz'):
-                    fp = gzip.open(filename_or_fp, 'wt', newline="")
+                if filename_or_fp.endswith(".gz"):
+                    fp = gzip.open(filename_or_fp, "wt", newline="")
                 else:
-                    fp = open(filename_or_fp, 'w', newline="")
+                    fp = open(filename_or_fp, "w", newline="")
 
             try:
                 self._save_csv(fp)
@@ -1389,13 +1569,14 @@ def _save_sqlite(self, filename, *, conn=None):
 
         cursor = db.cursor()
         try:
-            sqlite_utils.add_sourmash_internal(cursor, 'SqliteLineage', '1.0')
+            sqlite_utils.add_sourmash_internal(cursor, "SqliteLineage", "1.0")
         except sqlite3.OperationalError:
             raise ValueError("attempt to write a readonly database")
 
         try:
             # CTB: could add 'IF NOT EXIST' here; would need tests, too.
-            cursor.execute("""
+            cursor.execute(
+                """
 
         CREATE TABLE sourmash_taxonomy (
             ident TEXT NOT NULL,
@@ -1408,49 +1589,54 @@ class TEXT,
             species TEXT,
             strain TEXT
         )
-        """)
-            did_create = True
+        """
+            )
         except sqlite3.OperationalError:
             # already exists?
             raise ValueError(f"taxonomy table already exists in '{filename}'")
 
         # follow up and create index
-        cursor.execute("CREATE UNIQUE INDEX sourmash_taxonomy_ident ON sourmash_taxonomy(ident);")
+        cursor.execute(
+            "CREATE UNIQUE INDEX sourmash_taxonomy_ident ON sourmash_taxonomy(ident);"
+        )
         for ident, tax in self.items():
-            x = [ident, *[ t.name for t in tax ]]
+            x = [ident, *[t.name for t in tax]]
 
             # fill the taxonomy tuple with empty values until it's the
             # right length for the SQL statement -
             while len(x) < 9:
-                x.append('')
+                x.append("")
 
-            cursor.execute('INSERT INTO sourmash_taxonomy (ident, superkingdom, phylum, class, order_, family, genus, species, strain) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', x)
+            cursor.execute(
+                "INSERT INTO sourmash_taxonomy (ident, superkingdom, phylum, class, order_, family, genus, species, strain) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
+                x,
+            )
 
         db.commit()
 
     def _save_csv(self, fp):
-        headers = ['identifiers'] + list(RankLineageInfo().taxlist)
+        headers = ["identifiers"] + list(RankLineageInfo().taxlist)
         w = csv.DictWriter(fp, fieldnames=headers)
         w.writeheader()
 
         for n, (ident, tax) in enumerate(self.items()):
             row = {}
-            row['identifiers'] = ident
+            row["identifiers"] = ident
 
             # convert tax LineagePairs into dictionary
             for t in tax:
                 row[t.rank] = t.name
 
             # add strain if needed
-            if 'strain' not in row:
-                row['strain'] = ''
+            if "strain" not in row:
+                row["strain"] = ""
 
             w.writerow(row)
 
     @classmethod
     def load(cls, locations, **kwargs):
         "Load one or more taxonomies from the given location(s)"
-        force = kwargs.get('force', False)
+        force = kwargs.get("force", False)
 
         if isinstance(locations, str):
             raise TypeError("'locations' should be a list, not a string")
@@ -1475,7 +1661,9 @@ def load(cls, locations, **kwargs):
                 except (ValueError, csv.Error) as exc:
                     # for the last loader, just pass along ValueError...
                     if not force:
-                        raise ValueError(f"cannot read taxonomy assignments from '{location}': {str(exc)}")
+                        raise ValueError(
+                            f"cannot read taxonomy assignments from '{location}': {str(exc)}"
+                        )
 
             # nothing loaded, goodbye!
             if not loaded and not force:
@@ -1506,7 +1694,7 @@ class GatherRow:
 
     # essential columns
     query_name: str
-    name: str # match_name
+    name: str  # match_name
     f_unique_weighted: float
     f_unique_to_query: float
     unique_intersect_bp: int
@@ -1549,6 +1737,7 @@ class GatherRow:
 @dataclass
 class QueryInfo:
     "Class for storing query information"
+
     query_name: str
     query_md5: str
     query_filename: str
@@ -1564,7 +1753,9 @@ def __post_init__(self):
         self.ksize = int(self.ksize)
         self.scaled = int(self.scaled)
         self.query_n_hashes = int(self.query_n_hashes) if self.query_n_hashes else 0
-        self.total_weighted_hashes = int(self.total_weighted_hashes) if self.total_weighted_hashes else 0
+        self.total_weighted_hashes = (
+            int(self.total_weighted_hashes) if self.total_weighted_hashes else 0
+        )
 
     @property
     def total_weighted_bp(self):
@@ -1576,7 +1767,8 @@ class BaseTaxResult:
     """
     Base class for sourmash taxonomic annotation.
     """
-    raw: dict # csv row
+
+    raw: dict  # csv row
     keep_full_identifiers: bool = False
     keep_identifier_versions: bool = False
     match_ident: str = field(init=False)
@@ -1594,29 +1786,32 @@ def get_ident(self, id_col=None):
         else:
             self.match_ident = self.raw.name
         if not self.keep_full_identifiers:
-            self.match_ident = self.match_ident.split(' ')[0]
+            self.match_ident = self.match_ident.split(" ")[0]
         else:
-            #overrides version bc can't keep full without keeping version
+            # overrides version bc can't keep full without keeping version
             self.keep_identifier_versions = True
         if not self.keep_identifier_versions:
-            self.match_ident = self.match_ident.split('.')[0]
+            self.match_ident = self.match_ident.split(".")[0]
 
-
-    def get_match_lineage(self, tax_assignments, skip_idents=None, fail_on_missing_taxonomy=False):
+    def get_match_lineage(
+        self, tax_assignments, skip_idents=None, fail_on_missing_taxonomy=False
+    ):
         if skip_idents and self.match_ident in skip_idents:
             self.skipped_ident = True
         else:
             lin = tax_assignments.get(self.match_ident)
             if lin:
                 if self.lins:
-                    self.lineageInfo = LINLineageInfo(lineage = lin)
+                    self.lineageInfo = LINLineageInfo(lineage=lin)
                 else:
-                    self.lineageInfo = RankLineageInfo(lineage = lin)
+                    self.lineageInfo = RankLineageInfo(lineage=lin)
             else:
-                self.missed_ident=True
+                self.missed_ident = True
         self.match_lineage_attempted = True
         if self.missed_ident and fail_on_missing_taxonomy:
-            raise ValueError(f"Error: ident '{self.match_ident}' is not in the taxonomy database. Failing, as requested via --fail-on-missing-taxonomy")
+            raise ValueError(
+                f"Error: ident '{self.match_ident}' is not in the taxonomy database. Failing, as requested via --fail-on-missing-taxonomy"
+            )
 
 
 @dataclass
@@ -1624,7 +1819,8 @@ class AnnotateTaxResult(BaseTaxResult):
     """
     Class to enable taxonomic annotation of any sourmash CSV.
     """
-    id_col: str = 'name'
+
+    id_col: str = "name"
 
     def __post_init__(self):
         if self.id_col not in self.raw.keys():
@@ -1667,22 +1863,24 @@ class TaxResult(BaseTaxResult):
 
     Use RankLineageInfo or LINLineageInfo to store lineage information.
     """
+
     raw: GatherRow
     query_name: str = field(init=False)
     query_info: QueryInfo = field(init=False)
 
     def __post_init__(self):
         self.get_ident()
-        self.query_name = self.raw.query_name # convenience
-        self.query_info = QueryInfo(query_name = self.raw.query_name,
-                                  query_md5=self.raw.query_md5,
-                                  query_filename = self.raw.query_filename,
-                                  query_bp = self.raw.query_bp,
-                                  query_n_hashes = self.raw.query_n_hashes,
-                                  total_weighted_hashes = self.raw.total_weighted_hashes,
-                                  ksize = self.raw.ksize,
-                                  scaled = self.raw.scaled
-                                  )
+        self.query_name = self.raw.query_name  # convenience
+        self.query_info = QueryInfo(
+            query_name=self.raw.query_name,
+            query_md5=self.raw.query_md5,
+            query_filename=self.raw.query_filename,
+            query_bp=self.raw.query_bp,
+            query_n_hashes=self.raw.query_n_hashes,
+            total_weighted_hashes=self.raw.total_weighted_hashes,
+            ksize=self.raw.ksize,
+            scaled=self.raw.scaled,
+        )
         # cast and store the imp bits
         self.f_unique_to_query = float(self.raw.f_unique_to_query)
         self.f_unique_weighted = float(self.raw.f_unique_weighted)
@@ -1701,6 +1899,7 @@ class SummarizedGatherResult:
 
     Methods included for returning formatted results for different outputs.
     """
+
     rank: str
     fraction: float
     lineage: RankLineageInfo
@@ -1713,23 +1912,32 @@ def __post_init__(self):
 
     def check_values(self):
         if any([self.fraction > 1, self.f_weighted_at_rank > 1]):
-            raise ValueError(f"Summarized fraction is > 100% of the query! This should not be possible. Please check that your input files come directly from a single gather run per query.")
+            raise ValueError(
+                "Summarized fraction is > 100% of the query! This should not be possible. Please check that your input files come directly from a single gather run per query."
+            )
         # is this true for weighted too, or is that set to 0 when --ignore-abundance is used?
-        if any([self.fraction <=0, self.f_weighted_at_rank <= 0]): # this shouldn't actually happen, but it breaks ANI estimation, so let's check for it.
-            raise ValueError(f"Summarized fraction is <=0% of the query! This should not occur.")
+        if any(
+            [self.fraction <= 0, self.f_weighted_at_rank <= 0]
+        ):  # this shouldn't actually happen, but it breaks ANI estimation, so let's check for it.
+            raise ValueError(
+                "Summarized fraction is <=0% of the query! This should not occur."
+            )
 
     def set_query_ani(self, query_info):
-        self.query_ani_at_rank = containment_to_distance(self.fraction, query_info.ksize, query_info.scaled,
-                                                         n_unique_kmers=query_info.query_n_hashes,
-                                                         sequence_len_bp=query_info.query_bp).ani
-
+        self.query_ani_at_rank = containment_to_distance(
+            self.fraction,
+            query_info.ksize,
+            query_info.scaled,
+            n_unique_kmers=query_info.query_n_hashes,
+            sequence_len_bp=query_info.query_bp,
+        ).ani
 
     def as_lineage_dict(self, query_info, ranks):
-        '''
+        """
         Format to dict for writing lineage-CSV file suitable for use with sourmash tax ... -t.
-        '''
+        """
         lD = {}
-        lD['ident'] = query_info.query_name
+        lD["ident"] = query_info.query_name
         for rank in ranks:
             lin_name = self.lineage.name_at_rank(rank)
             if lin_name is None:
@@ -1739,52 +1947,54 @@ def as_lineage_dict(self, query_info, ranks):
 
     def as_summary_dict(self, query_info, limit_float=False):
         sD = asdict(self)
-        sD['lineage'] = self.lineage.display_lineage(null_as_unclassified=True)
-        sD['query_name'] = query_info.query_name
-        sD['query_md5'] = query_info.query_md5
-        sD['query_filename'] = query_info.query_filename
-        sD['total_weighted_hashes'] = str(query_info.total_weighted_hashes)
-        sD['bp_match_at_rank'] = str(self.bp_match_at_rank)
+        sD["lineage"] = self.lineage.display_lineage(null_as_unclassified=True)
+        sD["query_name"] = query_info.query_name
+        sD["query_md5"] = query_info.query_md5
+        sD["query_filename"] = query_info.query_filename
+        sD["total_weighted_hashes"] = str(query_info.total_weighted_hashes)
+        sD["bp_match_at_rank"] = str(self.bp_match_at_rank)
         if limit_float:
-            sD['fraction'] = f'{self.fraction:.3f}'
-            sD['f_weighted_at_rank'] = f'{self.f_weighted_at_rank:.3f}'
+            sD["fraction"] = f"{self.fraction:.3f}"
+            sD["f_weighted_at_rank"] = f"{self.f_weighted_at_rank:.3f}"
             if self.query_ani_at_rank:
-                sD['query_ani_at_rank'] = f'{self.query_ani_at_rank:.3f}'
+                sD["query_ani_at_rank"] = f"{self.query_ani_at_rank:.3f}"
         else:
-            sD['fraction'] = str(self.fraction)
-            sD['f_weighted_at_rank'] = str(self.f_weighted_at_rank)
+            sD["fraction"] = str(self.fraction)
+            sD["f_weighted_at_rank"] = str(self.f_weighted_at_rank)
 
-        return(sD)
+        return sD
 
     def as_human_friendly_dict(self, query_info):
         sD = self.as_summary_dict(query_info=query_info, limit_float=True)
-        sD['f_weighted_at_rank'] = f"{self.f_weighted_at_rank*100:>4.1f}%"
+        sD["f_weighted_at_rank"] = f"{self.f_weighted_at_rank*100:>4.1f}%"
         if self.query_ani_at_rank is not None:
-            sD['query_ani_at_rank'] = f"{self.query_ani_at_rank*100:>3.1f}%"
+            sD["query_ani_at_rank"] = f"{self.query_ani_at_rank*100:>3.1f}%"
         else:
-            sD['query_ani_at_rank'] = '-    '
+            sD["query_ani_at_rank"] = "-    "
         return sD
 
     def as_kreport_dict(self, query_info):
         """
         Produce kreport dict for named taxonomic groups.
         """
-        lowest_assignment_rank = 'species'
+        lowest_assignment_rank = "species"
         sD = {}
-        sD['num_bp_assigned'] = str(0)
-        sD['ncbi_taxid'] = None
+        sD["num_bp_assigned"] = str(0)
+        sD["ncbi_taxid"] = None
         # total percent containment, weighted to include abundance info
-        sD['percent_containment'] = f'{self.f_weighted_at_rank * 100:.2f}'
-        sD["num_bp_contained"] = str(int(self.f_weighted_at_rank * query_info.total_weighted_bp))
+        sD["percent_containment"] = f"{self.f_weighted_at_rank * 100:.2f}"
+        sD["num_bp_contained"] = str(
+            int(self.f_weighted_at_rank * query_info.total_weighted_bp)
+        )
         if isinstance(self.lineage, LINLineageInfo):
             raise ValueError("Cannot produce 'kreport' with LIN taxonomy.")
         if self.lineage != RankLineageInfo():
             this_rank = self.lineage.lowest_rank
-            sD['rank_code'] = RANKCODE[this_rank]
-            sD['sci_name'] = self.lineage.lowest_lineage_name
+            sD["rank_code"] = RANKCODE[this_rank]
+            sD["sci_name"] = self.lineage.lowest_lineage_name
             taxid = self.lineage.lowest_lineage_taxid
             if taxid:
-                sD['ncbi_taxid'] = str(taxid)
+                sD["ncbi_taxid"] = str(taxid)
             # the number of bp actually 'assigned' at this rank. Sourmash assigns everything
             # at genome level, but since kreport traditionally doesn't include 'strain' or genome,
             # it is reasonable to state that sourmash assigns at 'species' level for this.
@@ -1792,19 +2002,21 @@ def as_kreport_dict(self, query_info):
             if this_rank == lowest_assignment_rank:
                 sD["num_bp_assigned"] = sD["num_bp_contained"]
         else:
-            sD['sci_name'] = 'unclassified'
-            sD['rank_code'] = RANKCODE['unclassified']
+            sD["sci_name"] = "unclassified"
+            sD["rank_code"] = RANKCODE["unclassified"]
             sD["num_bp_assigned"] = sD["num_bp_contained"]
         return sD
-    
+
     def as_lingroup_dict(self, query_info, lg_name):
         """
         Produce lingroup report dict for lingroups.
         """
         sD = {}
         # total percent containment, weighted to include abundance info
-        sD['percent_containment'] = f'{self.f_weighted_at_rank * 100:.2f}'
-        sD["num_bp_contained"] = str(int(self.f_weighted_at_rank * query_info.total_weighted_bp))
+        sD["percent_containment"] = f"{self.f_weighted_at_rank * 100:.2f}"
+        sD["num_bp_contained"] = str(
+            int(self.f_weighted_at_rank * query_info.total_weighted_bp)
+        )
         sD["lin"] = self.lineage.display_lineage()
         sD["name"] = lg_name
         return sD
@@ -1814,11 +2026,11 @@ def as_cami_bioboxes(self):
         Format taxonomy-summarized gather results
         as CAMI profiling Bioboxes format.
 
-        Columns are: TAXID	RANK	TAXPATH	TAXPATHSN	PERCENTAGE 
+        Columns are: TAXID	RANK	TAXPATH	TAXPATHSN	PERCENTAGE
         """
         if isinstance(self.lineage, LINLineageInfo):
             raise ValueError("Cannot produce 'bioboxes' with LIN taxonomy.")
-        if self.lineage != RankLineageInfo(): # if not unassigned
+        if self.lineage != RankLineageInfo():  # if not unassigned
             taxid = self.lineage.lowest_lineage_taxid
             if taxid:
                 taxpath = self.lineage.display_taxid(sep="|")
@@ -1826,7 +2038,9 @@ def as_cami_bioboxes(self):
             else:
                 taxpath = None
             taxpathsn = self.lineage.display_lineage(sep="|")
-            percentage = f"{(self.f_weighted_at_rank * 100):.2f}" # fix at 2 decimal points
+            percentage = (
+                f"{(self.f_weighted_at_rank * 100):.2f}"  # fix at 2 decimal points
+            )
             return [taxid, self.rank, taxpath, taxpathsn, percentage]
         return []
 
@@ -1842,39 +2056,47 @@ class ClassificationResult(SummarizedGatherResult):
 
     Methods included for returning formatted results for different outputs.
     """
+
     "Class for storing query classification information"
     status: str = field(init=False)
 
     def __post_init__(self):
         # check for out of bounds values, default "nomatch" if no match at all
         self.check_values()
-        self.status = 'nomatch' #None?
+        self.status = "nomatch"  # None?
 
     def set_status(self, query_info, containment_threshold=None, ani_threshold=None):
         # if any matches, use 'below_threshold' as default; set 'match' if meets threshold
         if any([containment_threshold is not None, ani_threshold is not None]):
-            self.status="below_threshold"
+            self.status = "below_threshold"
         self.set_query_ani(query_info=query_info)
-        if ani_threshold is not None:  # if provided, just use ani thresh, don't use containment threshold
+        if (
+            ani_threshold is not None
+        ):  # if provided, just use ani thresh, don't use containment threshold
             if self.query_ani_at_rank >= ani_threshold:
-                self.status = 'match'
+                self.status = "match"
         # v5?: switch to using self.f_weighted_at_rank here
-        elif containment_threshold is not None and self.fraction >= containment_threshold:
-            self.status = 'match'
+        elif (
+            containment_threshold is not None and self.fraction >= containment_threshold
+        ):
+            self.status = "match"
 
     def build_krona_result(self, rank=None):
         krona_classified, krona_unclassified = None, None
         if rank is not None and rank == self.rank:
-            lin_as_list = self.lineage.display_lineage().split(';')
-            krona_classification = (self.fraction, *lin_as_list) # v5?: f_weighted_at_rank
-            krona_classified = (krona_classification)
+            lin_as_list = self.lineage.display_lineage().split(";")
+            krona_classification = (
+                self.fraction,
+                *lin_as_list,
+            )  # v5?: f_weighted_at_rank
+            krona_classified = krona_classification
             # handle unclassified - do we want/need this?
-            unclassified_fraction= 1.0-self.fraction #v5?: f_weighted_at_rank
+            unclassified_fraction = 1.0 - self.fraction  # v5?: f_weighted_at_rank
             len_unclassified_lin = len(lin_as_list)
-            unclassifed_lin = ["unclassified"]*(len_unclassified_lin)
+            unclassifed_lin = ["unclassified"] * (len_unclassified_lin)
             krona_unclassified = (unclassified_fraction, *unclassifed_lin)
         return krona_classified, krona_unclassified
- 
+
 
 @dataclass
 class QueryTaxResult:
@@ -1887,11 +2109,12 @@ class QueryTaxResult:
 
     Contains methods for formatting results for different outputs.
     """
-    query_info: QueryInfo # initialize with QueryInfo dataclass
+
+    query_info: QueryInfo  # initialize with QueryInfo dataclass
     lins: bool = False
 
     def __post_init__(self):
-        self.query_name = self.query_info.query_name # for convenience
+        self.query_name = self.query_info.query_name  # for convenience
         self._init_taxresult_vars()
         self._init_summarization_vars()
         self._init_classification_results()
@@ -1899,7 +2122,7 @@ def __post_init__(self):
     def _init_taxresult_vars(self):
         self.ranks = []
         self.raw_taxresults = []
-        self.skipped_idents= set()
+        self.skipped_idents = set()
         self.missed_idents = set()
         self.n_missed = 0
         self.n_skipped = 0
@@ -1913,13 +2136,13 @@ def _init_summarization_vars(self):
         self._init_summarization_results()
 
     def _init_summarization_results(self):
-        self.total_f_weighted = defaultdict(float) #0.0
-        self.total_f_classified = defaultdict(float)#0.0
-        self.total_bp_classified = defaultdict(int) #0
+        self.total_f_weighted = defaultdict(float)  # 0.0
+        self.total_f_classified = defaultdict(float)  # 0.0
+        self.total_bp_classified = defaultdict(int)  # 0
         self.summarized_lineage_results = defaultdict(list)
 
     def _init_classification_results(self):
-        self.status = 'nomatch'
+        self.status = "nomatch"
         self.classified_ranks = []
         self.classification_result = None
         self.krona_classified = None
@@ -1940,76 +2163,114 @@ def add_taxresult(self, taxresult):
         # check that all query parameters match
         if self.is_compatible(taxresult=taxresult):
             if not taxresult.match_lineage_attempted:
-                raise ValueError("Error: Cannot add TaxResult. Please use get_match_lineage() to add taxonomic lineage information first.")
+                raise ValueError(
+                    "Error: Cannot add TaxResult. Please use get_match_lineage() to add taxonomic lineage information first."
+                )
             if not self.ranks:
                 self.ranks = taxresult.lineageInfo.ranks
             if taxresult.skipped_ident:
-                self.n_skipped +=1
+                self.n_skipped += 1
                 self.skipped_idents.add(taxresult.match_ident)
             elif taxresult.missed_ident:
-                self.n_missed +=1
+                self.n_missed += 1
                 self.missed_idents.add(taxresult.match_ident)
             self.raw_taxresults.append(taxresult)
         else:
-            raise ValueError("Error: Cannot add TaxResult: query information does not match.")
+            raise ValueError(
+                "Error: Cannot add TaxResult: query information does not match."
+            )
 
     def summarize_up_ranks(self, single_rank=None, force_resummarize=False):
-        if self.summarized_ranks: # has already been summarized
+        if self.summarized_ranks:  # has already been summarized
             if force_resummarize:
                 self._init_summarization_vars()
             else:
-                raise ValueError("Error: already summarized using rank(s): '{', '.join(self.summarized_ranks)}'. Use 'force_resummarize=True' to reset and resummarize")
+                raise ValueError(
+                    "Error: already summarized using rank(s): '{', '.join(self.summarized_ranks)}'. Use 'force_resummarize=True' to reset and resummarize"
+                )
         # set ranks levels to summarize
         self.summarized_ranks = self.ascending_ranks
         if single_rank:
             if single_rank not in self.summarized_ranks:
-                raise ValueError(f"Error: rank '{single_rank}' not in available ranks ({', '.join(self.summarized_ranks)})")
+                raise ValueError(
+                    f"Error: rank '{single_rank}' not in available ranks ({', '.join(self.summarized_ranks)})"
+                )
             self.summarized_ranks = [single_rank]
-        notify(f"Starting summarization up rank(s): {', '.join(self.summarized_ranks)} ")
+        notify(
+            f"Starting summarization up rank(s): {', '.join(self.summarized_ranks)} "
+        )
         for taxres in self.raw_taxresults:
             lininfo = taxres.lineageInfo
-            if lininfo and lininfo.filled_lineage: # won't always have lineage to summarize (skipped idents, missed idents)
+            if (
+                lininfo and lininfo.filled_lineage
+            ):  # won't always have lineage to summarize (skipped idents, missed idents)
                 # notify + track perfect matches
                 if taxres.f_unique_to_query >= 1.0:
                     if taxres.match_ident not in self.perfect_match:
-                        notify(f"WARNING: 100% match! Is query '{self.query_name}' identical to its database match, '{taxres.match_ident}'?")
+                        notify(
+                            f"WARNING: 100% match! Is query '{self.query_name}' identical to its database match, '{taxres.match_ident}'?"
+                        )
                         self.perfect_match.add(taxres.match_ident)
                 # add this taxresult to summary
                 for rank in self.summarized_ranks:
-                    if rank in lininfo.filled_ranks: # only store if this rank is filled.
+                    if (
+                        rank in lininfo.filled_ranks
+                    ):  # only store if this rank is filled.
                         lin_at_rank = lininfo.pop_to_rank(rank)
-                        self.sum_uniq_weighted[rank][lin_at_rank] += taxres.f_unique_weighted
-                        self.sum_uniq_to_query[rank][lin_at_rank] += taxres.f_unique_to_query
-                        self.sum_uniq_bp[rank][lin_at_rank] += taxres.unique_intersect_bp
+                        self.sum_uniq_weighted[rank][
+                            lin_at_rank
+                        ] += taxres.f_unique_weighted
+                        self.sum_uniq_to_query[rank][
+                            lin_at_rank
+                        ] += taxres.f_unique_to_query
+                        self.sum_uniq_bp[rank][
+                            lin_at_rank
+                        ] += taxres.unique_intersect_bp
         # reset ranks levels to the ones that were actually summarized + that we can access for summarized result
-        self.summarized_ranks = [x for x in self.summarized_ranks if x in self.sum_uniq_bp.keys()]
+        self.summarized_ranks = [
+            x for x in self.summarized_ranks if x in self.sum_uniq_bp.keys()
+        ]
         if single_rank and single_rank not in self.summarized_ranks:
-            raise ValueError(f"Error: rank '{single_rank}' was not available for any matching lineages.")
+            raise ValueError(
+                f"Error: rank '{single_rank}' was not available for any matching lineages."
+            )
 
     def build_summarized_result(self, single_rank=None, force_resummarize=False):
         # just reset if we've already built summarized result (avoid adding to existing)? Or write in an error/force option?
         self._init_summarization_results()
         # if taxresults haven't been summarized, do that first
         if not self.summarized_ranks or force_resummarize:
-            self.summarize_up_ranks(single_rank=single_rank, force_resummarize=force_resummarize)
+            self.summarize_up_ranks(
+                single_rank=single_rank, force_resummarize=force_resummarize
+            )
         # catch potential error from running summarize_up_ranks separately and passing in different single_rank
         if single_rank and single_rank not in self.summarized_ranks:
-            raise ValueError(f"Error: rank '{single_rank}' not in summarized rank(s), {','.join(self.summarized_ranks)}")
+            raise ValueError(
+                f"Error: rank '{single_rank}' not in summarized rank(s), {','.join(self.summarized_ranks)}"
+            )
         # rank loop is currently done in __main__
-        for rank in self.summarized_ranks[::-1]:  # reverse so that results are in descending order
-            sum_uniq_to_query = self.sum_uniq_to_query[rank] #should be lineage: value
+        for rank in self.summarized_ranks[
+            ::-1
+        ]:  # reverse so that results are in descending order
+            sum_uniq_to_query = self.sum_uniq_to_query[rank]  # should be lineage: value
             # first, sort
             sorted_sum_uniq_to_query = list(sum_uniq_to_query.items())
-            sorted_sum_uniq_to_query.sort(key = lambda x: -x[1])
+            sorted_sum_uniq_to_query.sort(key=lambda x: -x[1])
             for lineage, f_unique in sorted_sum_uniq_to_query:
                 # does this ever happen? do we need it?
-                if f_unique == 0: #no annotated results for this query. do we need to handle this differently now?
+                if (
+                    f_unique == 0
+                ):  # no annotated results for this query. do we need to handle this differently now?
                     continue
                 f_weighted_at_rank = self.sum_uniq_weighted[rank][lineage]
                 bp_intersect_at_rank = self.sum_uniq_bp[rank][lineage]
-                sres = SummarizedGatherResult(lineage=lineage, rank=rank,
-                                              f_weighted_at_rank=f_weighted_at_rank, fraction=f_unique,
-                                              bp_match_at_rank=bp_intersect_at_rank)
+                sres = SummarizedGatherResult(
+                    lineage=lineage,
+                    rank=rank,
+                    f_weighted_at_rank=f_weighted_at_rank,
+                    fraction=f_unique,
+                    bp_match_at_rank=bp_intersect_at_rank,
+                )
                 sres.set_query_ani(query_info=self.query_info)
                 self.summarized_lineage_results[rank].append(sres)
 
@@ -2028,43 +2289,69 @@ def build_summarized_result(self, single_rank=None, force_resummarize=False):
             f_unique = 1.0 - self.total_f_classified[rank]
             if f_unique > 0:
                 f_weighted_at_rank = 1.0 - self.total_f_weighted[rank]
-                bp_intersect_at_rank = self.query_info.query_bp - self.total_bp_classified[rank]
-                sres = SummarizedGatherResult(lineage=lineage, rank=rank, f_weighted_at_rank=f_weighted_at_rank,
-                                              fraction=f_unique, bp_match_at_rank=bp_intersect_at_rank, query_ani_at_rank=query_ani)
+                bp_intersect_at_rank = (
+                    self.query_info.query_bp - self.total_bp_classified[rank]
+                )
+                sres = SummarizedGatherResult(
+                    lineage=lineage,
+                    rank=rank,
+                    f_weighted_at_rank=f_weighted_at_rank,
+                    fraction=f_unique,
+                    bp_match_at_rank=bp_intersect_at_rank,
+                    query_ani_at_rank=query_ani,
+                )
                 self.summarized_lineage_results[rank].append(sres)
 
-    def build_classification_result(self, rank=None, ani_threshold=None, containment_threshold=0.1, force_resummarize=False, lingroup_ranks=None, lingroups=None):
+    def build_classification_result(
+        self,
+        rank=None,
+        ani_threshold=None,
+        containment_threshold=0.1,
+        force_resummarize=False,
+        lingroup_ranks=None,
+        lingroups=None,
+    ):
         if containment_threshold is not None and not 0 <= containment_threshold <= 1:
-            raise ValueError(f"Containment threshold must be between 0 and 1 (input value: {containment_threshold}).")
+            raise ValueError(
+                f"Containment threshold must be between 0 and 1 (input value: {containment_threshold})."
+            )
         if ani_threshold is not None and not 0 <= ani_threshold <= 1:
-            raise ValueError(f"ANI threshold must be between 0 and 1 (input value: {ani_threshold}).")
-        self._init_classification_results() # init some fields
+            raise ValueError(
+                f"ANI threshold must be between 0 and 1 (input value: {ani_threshold})."
+            )
+        self._init_classification_results()  # init some fields
         if not self.summarized_ranks or force_resummarize:
-            self.summarize_up_ranks(single_rank=rank, force_resummarize=force_resummarize)
+            self.summarize_up_ranks(
+                single_rank=rank, force_resummarize=force_resummarize
+            )
         # catch potential error from running summarize_up_ranks separately and passing in different single_rank
         self.classified_ranks = self.summarized_ranks
         # if a rank is provided, we need to classify ONLY using that rank
         if rank:
             if rank not in self.summarized_ranks:
-                raise ValueError(f"Error: rank '{rank}' not in summarized rank(s), {','.join(self.summarized_ranks)}")
+                raise ValueError(
+                    f"Error: rank '{rank}' not in summarized rank(s), {','.join(self.summarized_ranks)}"
+                )
             else:
                 self.classified_ranks = [rank]
         if lingroup_ranks:
             notify("Restricting classification to lingroups.")
-            self.classified_ranks = [x for x in self.classified_ranks if x in lingroup_ranks]
+            self.classified_ranks = [
+                x for x in self.classified_ranks if x in lingroup_ranks
+            ]
         if not self.classified_ranks:
-            raise ValueError(f"Error: no ranks remain for classification.")
+            raise ValueError("Error: no ranks remain for classification.")
         # CLASSIFY using summarization--> best only result. Best way = use ANI or containment threshold
         classif = None
-        for this_rank in self.classified_ranks: # ascending order or just single rank
+        for this_rank in self.classified_ranks:  # ascending order or just single rank
             # reset for this rank
-            f_weighted=0.0
-            f_unique_at_rank=0.0
-            bp_intersect_at_rank=0
+            f_weighted = 0.0
+            f_unique_at_rank = 0.0
+            bp_intersect_at_rank = 0
             sum_uniq_to_query = self.sum_uniq_to_query[this_rank]
             # sort the results and grab best
             sorted_sum_uniq_to_query = list(sum_uniq_to_query.items())
-            sorted_sum_uniq_to_query.sort(key = lambda x: -x[1])
+            sorted_sum_uniq_to_query.sort(key=lambda x: -x[1])
             # select best-at-rank only
             this_lineage, f_unique_at_rank = sorted_sum_uniq_to_query[0]
             # if in desired lineage groups, continue (or??)
@@ -2074,19 +2361,33 @@ def build_classification_result(self, rank=None, ani_threshold=None, containment
             bp_intersect_at_rank = self.sum_uniq_bp[this_rank][this_lineage]
             f_weighted = self.sum_uniq_weighted[this_rank][this_lineage]
 
-            classif = ClassificationResult(rank=this_rank, fraction=f_unique_at_rank, lineage=this_lineage,
-                                           f_weighted_at_rank=f_weighted, bp_match_at_rank=bp_intersect_at_rank)
-
-            classif.set_status(self.query_info, containment_threshold=containment_threshold, ani_threshold=ani_threshold)
+            classif = ClassificationResult(
+                rank=this_rank,
+                fraction=f_unique_at_rank,
+                lineage=this_lineage,
+                f_weighted_at_rank=f_weighted,
+                bp_match_at_rank=bp_intersect_at_rank,
+            )
+
+            classif.set_status(
+                self.query_info,
+                containment_threshold=containment_threshold,
+                ani_threshold=ani_threshold,
+            )
             # determine whether to move on to a higher tax rank (if avail)
-            if classif.status == 'match' or classif.status == "nomatch": # not sure we want/need the `nomatch` part...
+            if (
+                classif.status == "match" or classif.status == "nomatch"
+            ):  # not sure we want/need the `nomatch` part...
                 break
 
         # store the final classification result
         self.classification_result = classif
         # could do this later, in __main__.py, for example
-        self.krona_classified, self.krona_unclassified = self.classification_result.build_krona_result(rank=rank)
-        self.krona_header = self.make_krona_header(min_rank = rank)
+        (
+            self.krona_classified,
+            self.krona_unclassified,
+        ) = self.classification_result.build_krona_result(rank=rank)
+        self.krona_header = self.make_krona_header(min_rank=rank)
 
     def make_krona_header(self, min_rank):
         "make header for krona output"
@@ -2096,7 +2397,7 @@ def make_krona_header(self, min_rank):
             raise ValueError(f"Rank '{min_rank}' not present in summarized ranks.")
         else:
             rank_index = self.ranks.index(min_rank)
-        return ["fraction"] + list(self.ranks[:rank_index+1])
+        return ["fraction"] + list(self.ranks[: rank_index + 1])
 
     def check_classification(self):
         if not self.classification_result:
@@ -2125,41 +2426,65 @@ def make_full_summary(self, classification=False, limit_float=False):
         rD = {}
         if classification:
             self.check_classification()
-            header= ["query_name", "status", "rank", "fraction", "lineage",
-                     "query_md5", "query_filename", "f_weighted_at_rank",
-                     "bp_match_at_rank", "query_ani_at_rank"]
-            rD = self.classification_result.as_summary_dict(query_info = self.query_info, limit_float=limit_float)
-            del rD['total_weighted_hashes']
+            header = [
+                "query_name",
+                "status",
+                "rank",
+                "fraction",
+                "lineage",
+                "query_md5",
+                "query_filename",
+                "f_weighted_at_rank",
+                "bp_match_at_rank",
+                "query_ani_at_rank",
+            ]
+            rD = self.classification_result.as_summary_dict(
+                query_info=self.query_info, limit_float=limit_float
+            )
+            del rD["total_weighted_hashes"]
             results.append(rD)
         else:
             self.check_summarization()
-            header= ["query_name", "rank", "fraction", "lineage", "query_md5",
-                     "query_filename", "f_weighted_at_rank", "bp_match_at_rank",
-                     "query_ani_at_rank", "total_weighted_hashes"]
-
-            for rank in self.summarized_ranks[::-1]: #descending
-                unclassified=[]
+            header = [
+                "query_name",
+                "rank",
+                "fraction",
+                "lineage",
+                "query_md5",
+                "query_filename",
+                "f_weighted_at_rank",
+                "bp_match_at_rank",
+                "query_ani_at_rank",
+                "total_weighted_hashes",
+            ]
+
+            for rank in self.summarized_ranks[::-1]:  # descending
+                unclassified = []
                 rank_results = self.summarized_lineage_results[rank]
-                rank_results.sort(key=lambda res: -res.fraction) #v5?: f_weighted_at_rank)
+                rank_results.sort(
+                    key=lambda res: -res.fraction
+                )  # v5?: f_weighted_at_rank)
                 for res in rank_results:
-                    rD = res.as_summary_dict(query_info=self.query_info, limit_float=limit_float)
+                    rD = res.as_summary_dict(
+                        query_info=self.query_info, limit_float=limit_float
+                    )
                     # save unclassified for the end
-                    if rD['lineage'] == "unclassified":
+                    if rD["lineage"] == "unclassified":
                         unclassified.append(rD)
                     else:
                         results.append(rD)
-                results +=unclassified
+                results += unclassified
         return header, results
 
     def make_kreport_results(self):
-        '''
+        """
         Format taxonomy-summarized gather results as kraken-style kreport.
 
         STANDARD KREPORT FORMAT:
         - `Percent Reads Contained in Taxon`: The cumulative percentage of reads for this taxon and all descendants.
         - `Number of Reads Contained in Taxon`: The cumulative number of reads for this taxon and all descendants.
         - `Number of Reads Assigned to Taxon`: The number of reads assigned directly to this taxon (not a cumulative count of all descendants).
-        - `Rank Code`: (U)nclassified, (R)oot, (D)omain, (K)ingdom, (P)hylum, (C)lass, (O)rder, (F)amily, (G)enus, or (S)pecies. 
+        - `Rank Code`: (U)nclassified, (R)oot, (D)omain, (K)ingdom, (P)hylum, (C)lass, (O)rder, (F)amily, (G)enus, or (S)pecies.
         - `NCBI Taxon ID`: Numerical ID from the NCBI taxonomy database.
         - `Scientific Name`: The scientific name of the taxon.
 
@@ -2191,30 +2516,43 @@ def make_kreport_results(self):
         - `Percent Contained in Taxon`: Percent of all base pairs contained by this taxon (weighted by abundance if tracked)
         - `Estimated base pairs Contained in Taxon`: Number of base pairs contained by this taxon (weighted by abundance if tracked)
         - `Estimated base pairs Assigned to Taxon`: Number of base pairs at species-level (weighted by abundance if tracked)
-        - `Rank Code`: (U)nclassified, (R)oot, (D)omain, (K)ingdom, (P)hylum, (C)lass, (O)rder, (F)amily, (G)enus, or (S)pecies. 
+        - `Rank Code`: (U)nclassified, (R)oot, (D)omain, (K)ingdom, (P)hylum, (C)lass, (O)rder, (F)amily, (G)enus, or (S)pecies.
         - `NCBI Taxon ID` will not be reported (blank entries).
         - `Scientific Name`: The scientific name of the taxon.
 
         In the future, we may wish to report the NCBI taxid when we can (NCBI taxonomy only).
-        '''
+        """
         self.check_summarization()
-        header = ["percent_containment", "num_bp_contained", "num_bp_assigned", "rank_code", "ncbi_taxid", "sci_name"]
+        header = [
+            "percent_containment",
+            "num_bp_contained",
+            "num_bp_assigned",
+            "rank_code",
+            "ncbi_taxid",
+            "sci_name",
+        ]
         if self.query_info.total_weighted_hashes == 0:
-            raise ValueError("ERROR: cannot produce 'kreport' format from gather results before sourmash v4.5.0")
+            raise ValueError(
+                "ERROR: cannot produce 'kreport' format from gather results before sourmash v4.5.0"
+            )
         required_ranks = set(RANKCODE.keys())
-        acceptable_ranks = list(self.ranks) + ['unclassified', 'kingdom']
+        acceptable_ranks = list(self.ranks) + ["unclassified", "kingdom"]
         if not required_ranks.issubset(set(acceptable_ranks)):
-            raise ValueError("ERROR: cannot produce 'kreport' format from ranks {', '.join(self.ranks)}")
+            raise ValueError(
+                "ERROR: cannot produce 'kreport' format from ranks {', '.join(self.ranks)}"
+            )
         kreport_results = []
-        unclassified_recorded=False
+        unclassified_recorded = False
         # want to order results descending by rank
         for rank in self.ranks:
-            if rank == 'strain': # no code for strain, can't include in this output afaik
+            if (
+                rank == "strain"
+            ):  # no code for strain, can't include in this output afaik
                 continue
             rank_results = self.summarized_lineage_results[rank]
             for res in rank_results:
                 kresD = res.as_kreport_dict(self.query_info)
-                if kresD['sci_name'] == "unclassified":
+                if kresD["sci_name"] == "unclassified":
                     # SummarizedGatherResults have an unclassified lineage at every rank, to facilitate reporting at a specific rank.
                     # Here, we only need to report it once, since it will be the same fraction for all ranks
                     if unclassified_recorded:
@@ -2224,7 +2562,9 @@ def make_kreport_results(self):
                 kreport_results.append(kresD)
         return header, kreport_results
 
-    def make_lingroup_results(self, LINgroupsD): # LingroupsD is dictionary {lg_prefix: lg_name}
+    def make_lingroup_results(
+        self, LINgroupsD
+    ):  # LingroupsD is dictionary {lg_prefix: lg_name}
         """
         Report results for the specified LINGroups.
         Keep LCA paths in order as much as possible.
@@ -2233,7 +2573,9 @@ def make_lingroup_results(self, LINgroupsD): # LingroupsD is dictionary {lg_pref
         header = ["name", "lin", "percent_containment", "num_bp_contained"]
 
         if self.query_info.total_weighted_hashes == 0:
-            raise ValueError("ERROR: cannot produce 'lingroup' format from gather results before sourmash v4.5.0")
+            raise ValueError(
+                "ERROR: cannot produce 'lingroup' format from gather results before sourmash v4.5.0"
+            )
 
         # find the ranks we need to consider
         lg_ranks, all_lgs = parse_lingroups(LINgroupsD)
@@ -2243,17 +2585,19 @@ def make_lingroup_results(self, LINgroupsD): # LingroupsD is dictionary {lg_pref
         for rank in lg_ranks:
             rank_results = self.summarized_lineage_results[rank]
             for res in rank_results:
-                if res.lineage in all_lgs:# is this lineage in the list of LINgroups?
-                    this_lingroup_name = LINgroupsD[res.lineage.display_lineage(truncate_empty=True)]
+                if res.lineage in all_lgs:  # is this lineage in the list of LINgroups?
+                    this_lingroup_name = LINgroupsD[
+                        res.lineage.display_lineage(truncate_empty=True)
+                    ]
                     lg_resD = res.as_lingroup_dict(self.query_info, this_lingroup_name)
                     lg_results[res.lineage] = lg_resD
 
         # We want to return in ~ depth order: descending each specific path in order
         # use LineageTree to find ordered paths
         lg_tree = LineageTree(all_lgs)
-        ordered_paths = lg_tree.ordered_paths(include_internal = True)
+        ordered_paths = lg_tree.ordered_paths(include_internal=True)
         # store results in order:
-        lingroup_results=[]
+        lingroup_results = []
         for lg in ordered_paths:
             # get LINInfo object
             lg_LINInfo = LINLineageInfo(lineage=lg)
@@ -2261,9 +2605,9 @@ def make_lingroup_results(self, LINgroupsD): # LingroupsD is dictionary {lg_pref
             lg_res = lg_results.get(lg_LINInfo)
             if lg_res:
                 lingroup_results.append(lg_res)
-        
+
         return header, lingroup_results
- 
+
     def make_cami_bioboxes(self):
         """
         info: https://github.com/CAMI-challenge/contest_information/blob/master/file_formats/CAMI_TP_specification.mkd
@@ -2271,17 +2615,17 @@ def make_cami_bioboxes(self):
         columns:
         TAXID - specifies a unique alphanumeric ID for a node in a reference tree such as the NCBI taxonomy
         RANK -  superkingdom --> strain
-        TAXPATH - the path from the root of the reference taxonomy to the respective taxon 
+        TAXPATH - the path from the root of the reference taxonomy to the respective taxon
         TAXPATHSN - scientific names of taxpath
         PERCENTAGE (0-100) -  field specifies what percentage of the sample was assigned to the respective TAXID
 
         example:
-        
+
         #CAMI Submission for Taxonomic Profiling
         @Version:0.9.1
         @SampleID:SAMPLEID
         @Ranks:superkingdom|phylum|class|order|family|genus|species|strain
-        
+
         @@TAXID	RANK	TAXPATH	TAXPATHSN	PERCENTAGE
         2	superkingdom	2	Bacteria	98.81211
         2157	superkingdom	2157	Archaea	1.18789
@@ -2296,7 +2640,7 @@ def make_cami_bioboxes(self):
         204455	order	2|1224|28211|204455	Bacteria|Proteobacteria|Alphaproteobacteria|Rhodobacterales	8.42263
         2158	order	2157|28890|183925|2158	Archaea|Euryarchaeotes|Methanobacteria|Methanobacteriales	1.18789
         """
-        # build CAMI header info 
+        # build CAMI header info
         header_title = "# Taxonomic Profiling Output"
         version_info = "@Version:0.10.0"
         program = "@__program__:sourmash"
@@ -2308,9 +2652,9 @@ def make_cami_bioboxes(self):
         rank_info = f"@Ranks:{'|'.join(ranks)}"
 
         header_lines = [header_title, sample_info, version_info, rank_info, program]
-        colnames = ["@@TAXID","RANK","TAXPATH","TAXPATHSN","PERCENTAGE"]
-        header_lines.append('\t'.join(colnames))
-        
+        colnames = ["@@TAXID", "RANK", "TAXPATH", "TAXPATHSN", "PERCENTAGE"]
+        header_lines.append("\t".join(colnames))
+
         # now build results in CAMI format
         bioboxes_results = []
         # order results by rank (descending), then percentage
@@ -2322,4 +2666,3 @@ def make_cami_bioboxes(self):
                     bioboxes_results.append(bb_info)
 
         return header_lines, bioboxes_results
-
diff --git a/src/sourmash/utils.py b/src/sourmash/utils.py
index 71afc20261..1910504e05 100644
--- a/src/sourmash/utils.py
+++ b/src/sourmash/utils.py
@@ -42,7 +42,7 @@ def decode_str(s):
     """Decodes a SourmashStr"""
     try:
         if s.len == 0:
-            return u""
+            return ""
         return ffi.unpack(s.data, s.len).decode("utf-8", "replace")
     finally:
         if s.owned:
diff --git a/tests/conftest.py b/tests/conftest.py
index 3281133cd5..9cc035bb4a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -5,11 +5,14 @@
 import pytest
 
 import matplotlib.pyplot as plt
-plt.rcParams.update({'figure.max_open_warning': 0})
+
+plt.rcParams.update({"figure.max_open_warning": 0})
 
 from sourmash_tst_utils import TempDirectory, RunnerContext
+
 sys.stdout = sys.stderr
 
+
 @pytest.fixture
 def runtmp():
     with TempDirectory() as location:
@@ -66,16 +69,17 @@ def use_manifest(request):
     return request.param
 
 
-@pytest.fixture(params=['json', 'sql'])
+@pytest.fixture(params=["json", "sql"])
 def lca_db_format(request):
     return request.param
 
 
-@pytest.fixture(params=['csv', 'sql'])
+@pytest.fixture(params=["csv", "sql"])
 def manifest_db_format(request):
     return request.param
 
-@pytest.fixture(params=['sig', 'sig.gz', 'zip', '.d/', '.sqldb'])
+
+@pytest.fixture(params=["sig", "sig.gz", "zip", ".d/", ".sqldb"])
 def sig_save_extension(request):
     return request.param
 
@@ -89,29 +93,37 @@ def pytest_collection_modifyitems(items, config):
         deselected_items = []
 
         for item in items:
-            if fixture_name in getattr(item, 'fixturenames', ()):
+            if fixture_name in getattr(item, "fixturenames", ()):
                 selected_items.append(item)
             else:
                 deselected_items.append(item)
         config.hook.pytest_deselected(items=deselected_items)
         items[:] = selected_items
+
+
 # --- END - Only run tests using a particular fixture --- #
 
+
 def pytest_addoption(parser):
-    parser.addoption("--usesfixture",
-                     action="store",
-                     default=None,
-                     help="just run tests that use a particular fixture")
+    parser.addoption(
+        "--usesfixture",
+        action="store",
+        default=None,
+        help="just run tests that use a particular fixture",
+    )
+
+    parser.addoption(
+        "--run-hypothesis", action="store_true", help="run hypothesis tests"
+    )
 
-    parser.addoption("--run-hypothesis", action="store_true",
-                     help="run hypothesis tests")
 
 def pytest_runtest_setup(item):
     if item.config.getoption("--run-hypothesis"):
         if not any(mark for mark in item.iter_markers(name="hypothesis")):
             pytest.skip("--run-hypothesis option set, running only hypothesis tests")
 
+
 settings.register_profile("ci", max_examples=1000)
 settings.register_profile("dev", max_examples=10)
 settings.register_profile("debug", max_examples=10, verbosity=Verbosity.verbose)
-settings.load_profile(os.getenv(u'HYPOTHESIS_PROFILE', 'default'))
+settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "default"))
diff --git a/tests/sourmash_tst_utils.py b/tests/sourmash_tst_utils.py
index 7425934d2a..a2a35cb2e6 100644
--- a/tests/sourmash_tst_utils.py
+++ b/tests/sourmash_tst_utils.py
@@ -7,7 +7,6 @@
 import collections
 import pprint
 import traceback
-from io import open  # pylint: disable=redefined-builtin
 from io import StringIO
 from pathlib import Path
 
@@ -15,13 +14,21 @@
 from importlib.metadata import entry_points
 
 
-SIG_FILES = [os.path.join('demo', f) for f in (
-  "SRR2060939_1.sig", "SRR2060939_2.sig", "SRR2241509_1.sig",
-  "SRR2255622_1.sig", "SRR453566_1.sig", "SRR453569_1.sig", "SRR453570_1.sig")
+SIG_FILES = [
+    os.path.join("demo", f)
+    for f in (
+        "SRR2060939_1.sig",
+        "SRR2060939_2.sig",
+        "SRR2241509_1.sig",
+        "SRR2255622_1.sig",
+        "SRR453566_1.sig",
+        "SRR453569_1.sig",
+        "SRR453570_1.sig",
+    )
 ]
 
 
-def scriptpath(scriptname='sourmash'):
+def scriptpath(scriptname="sourmash"):
     """Return the path to the scripts, in both dev and install situations."""
     # note - it doesn't matter what the scriptname is here, as long as
     # it's some script present in this version of sourmash.
@@ -34,7 +41,7 @@ def scriptpath(scriptname='sourmash'):
     if os.path.exists(os.path.join(path, scriptname)):
         return path
 
-    for path in os.environ['PATH'].split(':'):
+    for path in os.environ["PATH"].split(":"):
         if os.path.exists(os.path.join(path, scriptname)):
             return path
 
@@ -42,7 +49,7 @@ def scriptpath(scriptname='sourmash'):
 def _runscript(scriptname):
     """Find & run a script with exec (i.e. not via os.system or subprocess)."""
     namespace = {"__name__": "__main__"}
-    namespace['sys'] = globals()['sys']
+    namespace["sys"] = globals()["sys"]
 
     try:
         (script,) = entry_points(name=scriptname, group="console_scripts")
@@ -57,15 +64,15 @@ def _runscript(scriptname):
     if os.path.isfile(scriptfile):
         if os.path.isfile(scriptfile):
             exec(  # pylint: disable=exec-used
-                compile(Path(scriptfile).read_text(), scriptfile, 'exec'),
-                namespace)
+                compile(Path(scriptfile).read_text(), scriptfile, "exec"), namespace
+            )
             return 0
 
     return -1
 
 
-ScriptResults = collections.namedtuple('ScriptResults',
-                                       ['status', 'out', 'err'])
+ScriptResults = collections.namedtuple("ScriptResults", ["status", "out", "err"])
+
 
 def runscript(scriptname, args, **kwargs):
     """Run a Python script using exec().
@@ -81,8 +88,8 @@ def runscript(scriptname, args, **kwargs):
     sysargs.extend(args)
 
     cwd = os.getcwd()
-    in_directory = kwargs.get('in_directory', cwd)
-    fail_ok = kwargs.get('fail_ok', False)
+    in_directory = kwargs.get("in_directory", cwd)
+    fail_ok = kwargs.get("fail_ok", False)
 
     try:
         status = -1
@@ -90,8 +97,8 @@ def runscript(scriptname, args, **kwargs):
         sys.argv = sysargs
 
         oldin = None
-        if 'stdin_data' in kwargs:
-            oldin, sys.stdin = sys.stdin, StringIO(kwargs['stdin_data'])
+        if "stdin_data" in kwargs:
+            oldin, sys.stdin = sys.stdin, StringIO(kwargs["stdin_data"])
 
         oldout, olderr = sys.stdout, sys.stderr
         sys.stdout = StringIO()
@@ -101,13 +108,13 @@ def runscript(scriptname, args, **kwargs):
         os.chdir(in_directory)
 
         try:
-            print('running:', scriptname, 'in:', in_directory, file=oldout)
-            print('arguments', sysargs, file=oldout)
+            print("running:", scriptname, "in:", in_directory, file=oldout)
+            print("arguments", sysargs, file=oldout)
 
             status = _runscript(scriptname)
         except SystemExit as err:
             status = err.code
-            if status == None:
+            if status is None:
                 status = 0
         except:  # pylint: disable=bare-except
             traceback.print_exc(file=sys.stderr)
@@ -133,14 +140,13 @@ def runscript(scriptname, args, **kwargs):
 def get_test_data(filename):
     filepath = resources.files("sourmash") / "tests" / "test-data" / filename
     if not filepath.exists() or not os.path.isfile(filepath):
-        filepath = os.path.join(os.path.dirname(__file__), 'test-data',
-                                filename)
+        filepath = os.path.join(os.path.dirname(__file__), "test-data", filename)
     return filepath
 
 
-class TempDirectory(object):
+class TempDirectory:
     def __init__(self):
-        self.tempdir = tempfile.mkdtemp(prefix='sourmashtest_')
+        self.tempdir = tempfile.mkdtemp(prefix="sourmashtest_")
 
     def __enter__(self):
         return self.tempdir
@@ -158,10 +164,10 @@ def __exit__(self, exc_type, exc_value, traceback):
 class SourmashCommandFailed(Exception):
     def __init__(self, msg):
         Exception.__init__(self, msg)
-        self.message = msg 
+        self.message = msg
 
 
-class RunnerContext(object):
+class RunnerContext:
     """
     I am a RunnerContext object from sourmash_tst_utils.
 
@@ -171,6 +177,7 @@ class RunnerContext(object):
 
     You can use the 'output' method to build filenames in my temp directory.
     """
+
     def __init__(self, location):
         self.location = location
         self.last_command = None
@@ -178,25 +185,26 @@ def __init__(self, location):
 
     def run_sourmash(self, *args, **kwargs):
         "Run the sourmash script with the given arguments."
-        kwargs['fail_ok'] = True
-        if 'in_directory' not in kwargs:
-            kwargs['in_directory'] = self.location
+        kwargs["fail_ok"] = True
+        if "in_directory" not in kwargs:
+            kwargs["in_directory"] = self.location
 
-        cmdlist = ['sourmash']
-        cmdlist.extend(( str(x) for x in args))
+        cmdlist = ["sourmash"]
+        cmdlist.extend(str(x) for x in args)
         self.last_command = " ".join(cmdlist)
-        self.last_result = runscript('sourmash', args, **kwargs)
+        self.last_result = runscript("sourmash", args, **kwargs)
 
         if self.last_result.status:
             raise SourmashCommandFailed(self.last_result.err)
 
         return self.last_result
+
     sourmash = run_sourmash
 
     def run(self, scriptname, *args, **kwargs):
         "Run a script with the given arguments."
-        if 'in_directory' not in kwargs:
-            kwargs['in_directory'] = self.location
+        if "in_directory" not in kwargs:
+            kwargs["in_directory"] = self.location
         self.last_command = " ".join(args)
         self.last_result = runscript(scriptname, args, **kwargs)
         return self.last_result
@@ -207,18 +215,18 @@ def output(self, path):
     def __str__(self):
         s = ""
         if self.last_command:
-            s += "Last command run:\n{}\n".format(repr(self.last_command))
+            s += f"Last command run:\n{repr(self.last_command)}\n"
             if self.last_result:
                 s += "\nLAST RESULT:\n"
-                s += "- exit code: {}\n\n".format(self.last_result.status)
+                s += f"- exit code: {self.last_result.status}\n\n"
                 if self.last_result.out:
-                    s += "- stdout:\n---\n{}---\n".format(self.last_result.out)
+                    s += f"- stdout:\n---\n{self.last_result.out}---\n"
                 else:
-                    s += '(no stdout)\n\n'
+                    s += "(no stdout)\n\n"
                 if self.last_result.err:
-                    s += "- stderr:\n---\n{}---\n".format(self.last_result.err)
+                    s += f"- stderr:\n---\n{self.last_result.err}---\n"
                 else:
-                    s += '(no stderr)\n'
+                    s += "(no stderr)\n"
 
         return s
 
diff --git a/tests/test__minhash_hypothesis.py b/tests/test__minhash_hypothesis.py
index 7f1b421dbd..2778358caa 100644
--- a/tests/test__minhash_hypothesis.py
+++ b/tests/test__minhash_hypothesis.py
@@ -7,9 +7,11 @@
 from sourmash.minhash import _get_max_hash_for_scaled
 
 
-@given(st.lists(st.integers(min_value=0, max_value=2**64 - 1), min_size=10, max_size=1000),
-       st.lists(st.integers(min_value=0, max_value=2**64 - 1), min_size=10, max_size=1000),
-       st.integers(min_value=10, max_value=1000))
+@given(
+    st.lists(st.integers(min_value=0, max_value=2**64 - 1), min_size=10, max_size=1000),
+    st.lists(st.integers(min_value=0, max_value=2**64 - 1), min_size=10, max_size=1000),
+    st.integers(min_value=10, max_value=1000),
+)
 @example([1, 2], [3, 4], 2)
 def test_set_abundance_num_hypothesis(hashes, abundances, sketch_size):
     a = MinHash(sketch_size, 10, track_abundance=True)
@@ -25,9 +27,11 @@ def test_set_abundance_num_hypothesis(hashes, abundances, sketch_size):
         assert oracle[k] == v
 
 
-@given(st.lists(st.integers(min_value=0, max_value=2**64 - 1), min_size=10, max_size=1000),
-       st.lists(st.integers(min_value=0, max_value=2**64 - 1), min_size=10, max_size=1000),
-       st.integers(min_value=1000, max_value=10000))
+@given(
+    st.lists(st.integers(min_value=0, max_value=2**64 - 1), min_size=10, max_size=1000),
+    st.lists(st.integers(min_value=0, max_value=2**64 - 1), min_size=10, max_size=1000),
+    st.integers(min_value=1000, max_value=10000),
+)
 @example([0], [0], 1000)
 def test_set_abundance_scaled_hypothesis(hashes, abundances, scaled):
     a = MinHash(0, 10, track_abundance=True, scaled=scaled)
diff --git a/tests/test_api.py b/tests/test_api.py
index ccaf321df6..a06a610c83 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -9,10 +9,10 @@ def test_sourmash_signature_api(c):
     e = sourmash.MinHash(n=1, ksize=20)
     sig = sourmash.SourmashSignature(e)
 
-    with open(c.output('xxx.sig'), 'wt') as fp:
+    with open(c.output("xxx.sig"), "w") as fp:
         sourmash.save_signatures([sig], fp)
-    sig_x1 = sourmash.load_one_signature(c.output('xxx.sig'))
-    sig_x2 = list(sourmash.load_file_as_signatures(c.output('xxx.sig')))[0]
+    sig_x1 = sourmash.load_one_signature(c.output("xxx.sig"))
+    sig_x2 = list(sourmash.load_file_as_signatures(c.output("xxx.sig")))[0]
 
     assert sig_x1 == sig
     assert sig_x2 == sig
@@ -21,12 +21,12 @@ def test_sourmash_signature_api(c):
 @utils.in_tempdir
 def test_load_index_0_no_file(c):
     with pytest.raises(ValueError) as exc:
-        idx = sourmash.load_file_as_index(c.output('does-not-exist'))
-    assert 'Error while reading signatures from ' in str(exc.value)
+        sourmash.load_file_as_index(c.output("does-not-exist"))
+    assert "Error while reading signatures from " in str(exc.value)
 
 
 def test_load_index_1():
-    testfile = utils.get_test_data('prot/protein.sbt.zip')
+    testfile = utils.get_test_data("prot/protein.sbt.zip")
     idx = sourmash.load_file_as_index(testfile)
 
     sigs = list(idx.signatures())
@@ -34,7 +34,7 @@ def test_load_index_1():
 
 
 def test_load_index_2():
-    testfile = utils.get_test_data('prot/protein.lca.json.gz')
+    testfile = utils.get_test_data("prot/protein.lca.json.gz")
     idx = sourmash.load_file_as_index(testfile)
 
     sigs = list(idx.signatures())
@@ -42,7 +42,7 @@ def test_load_index_2():
 
 
 def test_load_index_3():
-    testfile = utils.get_test_data('prot/protein/')
+    testfile = utils.get_test_data("prot/protein/")
     idx = sourmash.load_file_as_index(testfile)
 
     sigs = list(idx.signatures())
@@ -50,7 +50,7 @@ def test_load_index_3():
 
 
 def test_load_index_4():
-    testfile = utils.get_test_data('prot/all.zip')
+    testfile = utils.get_test_data("prot/all.zip")
     idx = sourmash.load_file_as_index(testfile)
 
     sigs = list(idx.signatures())
@@ -58,7 +58,7 @@ def test_load_index_4():
 
 
 def test_load_index_4_b():
-    testfile = utils.get_test_data('prot/protein.zip')
+    testfile = utils.get_test_data("prot/protein.zip")
     idx = sourmash.load_file_as_index(testfile)
 
     sigs = list(idx.signatures())
@@ -67,19 +67,24 @@ def test_load_index_4_b():
 
 def test_load_fasta_as_signature():
     # try loading a fasta file - should fail with informative exception
-    testfile = utils.get_test_data('short.fa')
+    testfile = utils.get_test_data("short.fa")
 
     with pytest.raises(Exception) as exc:
-        idx = sourmash.load_file_as_index(testfile)
+        sourmash.load_file_as_index(testfile)
 
     print(exc.value)
 
-    assert f"Error while reading signatures from '{testfile}' - got sequences instead! Is this a FASTA/FASTQ file?" in str(exc.value)
+    assert (
+        f"Error while reading signatures from '{testfile}' - got sequences instead! Is this a FASTA/FASTQ file?"
+        in str(exc.value)
+    )
 
 
 def test_load_and_search_sbt_api():
-    treefile = utils.get_test_data('prot/protein.sbt.zip')
-    queryfile = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
+    treefile = utils.get_test_data("prot/protein.sbt.zip")
+    queryfile = utils.get_test_data(
+        "prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
 
     tree = sourmash.load_sbt_index(treefile)
     query = sourmash.load_one_signature(queryfile)
diff --git a/tests/test_bugs.py b/tests/test_bugs.py
index e0f3c5daf0..2b8f677279 100644
--- a/tests/test_bugs.py
+++ b/tests/test_bugs.py
@@ -1,11 +1,12 @@
 import sourmash_tst_utils as utils
 
+
 @utils.in_tempdir
 def test_bug_803(c):
     # can we do a 'sourmash search' on an LCA database and a query with abundance?
-    query = utils.get_test_data('track_abund/47.fa.sig')
-    lca_db = utils.get_test_data('lca/47+63.lca.json')
+    query = utils.get_test_data("track_abund/47.fa.sig")
+    lca_db = utils.get_test_data("lca/47+63.lca.json")
 
-    c.run_sourmash('search', query, lca_db, '--ignore-abundance')
+    c.run_sourmash("search", query, lca_db, "--ignore-abundance")
     print(c)
-    assert 'NC_009665.1 Shewanella baltica OS185, complete genome' in str(c)
+    assert "NC_009665.1 Shewanella baltica OS185, complete genome" in str(c)
diff --git a/tests/test_cmd_signature.py b/tests/test_cmd_signature.py
index 232ba6a218..7f8365118f 100644
--- a/tests/test_cmd_signature.py
+++ b/tests/test_cmd_signature.py
@@ -26,31 +26,33 @@ def _write_file(runtmp, basename, lines, *, gz=False):
     else:
         xopen = open
 
-    with xopen(loc, 'wt') as fp:
+    with xopen(loc, "wt") as fp:
         fp.write("\n".join(lines))
     return loc
 
 
 def test_run_sourmash_signature_cmd():
-    status, out, err = utils.runscript('sourmash', ['signature'], fail_ok=True)
-    assert not 'sourmash: error: argument cmd: invalid choice:' in err
-    assert 'Manipulate signature files:' in out
-    assert status != 0                    # no args provided, ok ;)
+    status, out, err = utils.runscript("sourmash", ["signature"], fail_ok=True)
+    assert "sourmash: error: argument cmd: invalid choice:" not in err
+    assert "Manipulate signature files:" in out
+    assert status != 0  # no args provided, ok ;)
 
 
 def test_run_sourmash_sig_cmd():
-    status, out, err = utils.runscript('sourmash', ['sig'], fail_ok=True)
-    assert not 'sourmash: error: argument cmd: invalid choice:' in err
-    assert 'Manipulate signature files:' in out
-    assert status != 0                    # no args provided, ok ;)
+    status, out, err = utils.runscript("sourmash", ["sig"], fail_ok=True)
+    assert "sourmash: error: argument cmd: invalid choice:" not in err
+    assert "Manipulate signature files:" in out
+    assert status != 0  # no args provided, ok ;)
 
 
 def test_run_cat_via_parse_args():
     # run a command ('sourmash.sig.cat') with args constructed via parse_args
-    import sourmash.sig, sourmash.cli
-    sig47 = utils.get_test_data('47.fa.sig')
+    import sourmash.sig
+    import sourmash.cli
 
-    args = sourmash.cli.parse_args(['sig', 'cat', sig47])
+    sig47 = utils.get_test_data("47.fa.sig")
+
+    args = sourmash.cli.parse_args(["sig", "cat", sig47])
     sourmash.sig.cat(args)
 
 
@@ -58,10 +60,10 @@ def test_sig_merge_1_use_full_signature_in_cmd(runtmp):
     c = runtmp
 
     # merge of 47 & 63 should be union of mins
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-    sig47and63 = utils.get_test_data('47+63.fa.sig')
-    c.run_sourmash('signature', 'merge', sig47, sig63)
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+    sig47and63 = utils.get_test_data("47+63.fa.sig")
+    c.run_sourmash("signature", "merge", sig47, sig63)
 
     # stdout should be new signature
     out = c.last_result.out
@@ -80,16 +82,21 @@ def test_sig_merge_1_fromfile_picklist(runtmp):
     c = runtmp
 
     # merge of 47 & 63 should be union of mins
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-    sig47and63 = utils.get_test_data('47+63.fa.sig')
-
-    from_file = _write_file(runtmp, 'list.txt', [sig47, sig63])
-    picklist = _write_file(runtmp, 'pl.csv',
-                           ['md5short', '09a08691', '38729c63'])
-
-    c.run_sourmash('signature', 'merge', '--from-file', from_file,
-                   '--picklist', f'{picklist}:md5short:md5short')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+    sig47and63 = utils.get_test_data("47+63.fa.sig")
+
+    from_file = _write_file(runtmp, "list.txt", [sig47, sig63])
+    picklist = _write_file(runtmp, "pl.csv", ["md5short", "09a08691", "38729c63"])
+
+    c.run_sourmash(
+        "signature",
+        "merge",
+        "--from-file",
+        from_file,
+        "--picklist",
+        f"{picklist}:md5short:md5short",
+    )
 
     # stdout should be new signature
     out = c.last_result.out
@@ -109,17 +116,23 @@ def test_sig_merge_1_fromfile_picklist_gz(runtmp):
     c = runtmp
 
     # merge of 47 & 63 should be union of mins
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-    sig47and63 = utils.get_test_data('47+63.fa.sig')
-
-    from_file = _write_file(runtmp, 'list.txt', [sig47, sig63])
-    picklist = _write_file(runtmp, 'pl.csv',
-                           ['md5short', '09a08691', '38729c63'],
-                           gz=True)
-
-    c.run_sourmash('signature', 'merge', '--from-file', from_file,
-                   '--picklist', f'{picklist}:md5short:md5short')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+    sig47and63 = utils.get_test_data("47+63.fa.sig")
+
+    from_file = _write_file(runtmp, "list.txt", [sig47, sig63])
+    picklist = _write_file(
+        runtmp, "pl.csv", ["md5short", "09a08691", "38729c63"], gz=True
+    )
+
+    c.run_sourmash(
+        "signature",
+        "merge",
+        "--from-file",
+        from_file,
+        "--picklist",
+        f"{picklist}:md5short:md5short",
+    )
 
     # stdout should be new signature
     out = c.last_result.out
@@ -137,10 +150,10 @@ def test_sig_merge_1_fromfile_picklist_gz(runtmp):
 @utils.in_tempdir
 def test_sig_merge_1(c):
     # merge of 47 & 63 should be union of mins
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-    sig47and63 = utils.get_test_data('47+63.fa.sig')
-    c.run_sourmash('sig', 'merge', sig47, sig63)
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+    sig47and63 = utils.get_test_data("47+63.fa.sig")
+    c.run_sourmash("sig", "merge", sig47, sig63)
 
     # stdout should be new signature
     out = c.last_result.out
@@ -158,9 +171,9 @@ def test_sig_merge_1(c):
 @utils.in_tempdir
 def test_sig_merge_1_multisig(c):
     # merge of 47 & 63 should be union of mins; here, sigs are in same file.
-    multisig = utils.get_test_data('47+63-multisig.sig')
-    sig47and63 = utils.get_test_data('47+63.fa.sig')
-    c.run_sourmash('sig', 'merge', multisig, '--flatten')
+    multisig = utils.get_test_data("47+63-multisig.sig")
+    sig47and63 = utils.get_test_data("47+63.fa.sig")
+    c.run_sourmash("sig", "merge", multisig, "--flatten")
 
     # stdout should be new signature
     out = c.last_result.out
@@ -178,13 +191,25 @@ def test_sig_merge_1_multisig(c):
 @utils.in_tempdir
 def test_sig_merge_1_name(c):
     # check name arg
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-
-    assignedSigName = 'SIG_NAME'
-    outsig = c.output('merged2and63.sig')
-
-    c.run_sourmash('sig', 'merge', sig2, sig63, '--dna', '-k', '31', '-o', "merged2and63.sig", '--name', assignedSigName )
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+
+    assignedSigName = "SIG_NAME"
+    outsig = c.output("merged2and63.sig")
+
+    c.run_sourmash(
+        "sig",
+        "merge",
+        sig2,
+        sig63,
+        "--dna",
+        "-k",
+        "31",
+        "-o",
+        "merged2and63.sig",
+        "--name",
+        assignedSigName,
+    )
 
     test_merge_sig = sourmash.load_one_signature(outsig)
 
@@ -197,10 +222,10 @@ def test_sig_merge_1_name(c):
 @utils.in_tempdir
 def test_sig_merge_1_ksize_moltype(c):
     # check ksize, moltype args
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-    sig2and63 = utils.get_test_data('2+63.fa.sig')
-    c.run_sourmash('sig', 'merge', sig2, sig63, '--dna', '-k', '31')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+    sig2and63 = utils.get_test_data("2+63.fa.sig")
+    c.run_sourmash("sig", "merge", sig2, sig63, "--dna", "-k", "31")
 
     # stdout should be new signature
     out = c.last_result.out
@@ -218,12 +243,12 @@ def test_sig_merge_1_ksize_moltype(c):
 @utils.in_tempdir
 def test_sig_merge_1_ksize_moltype_fail(c):
     # check ksize, moltype args
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-    sig2and63 = utils.get_test_data('2+63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+    utils.get_test_data("2+63.fa.sig")
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('sig', 'merge', sig2, sig63)
+        c.run_sourmash("sig", "merge", sig2, sig63)
 
     assert "ERROR when merging signature" in str(exc.value)
 
@@ -231,8 +256,8 @@ def test_sig_merge_1_ksize_moltype_fail(c):
 @utils.in_tempdir
 def test_sig_merge_2(c):
     # merge of 47 with nothing should be 47
-    sig47 = utils.get_test_data('47.fa.sig')
-    c.run_sourmash('sig', 'merge', sig47)
+    sig47 = utils.get_test_data("47.fa.sig")
+    c.run_sourmash("sig", "merge", sig47)
 
     # stdout should be new signature
     out = c.last_result.out
@@ -248,46 +273,52 @@ def test_sig_merge_2(c):
 @utils.in_tempdir
 def test_sig_merge_3_abund_ab_ok(c):
     # merge of 47 and 63 with abund should work
-    sig47abund = utils.get_test_data('track_abund/47.fa.sig')
-    sig63abund = utils.get_test_data('track_abund/63.fa.sig')
+    sig47abund = utils.get_test_data("track_abund/47.fa.sig")
+    sig63abund = utils.get_test_data("track_abund/63.fa.sig")
 
-    c.run_sourmash('sig', 'merge', sig47abund, sig63abund)
-    actual_merge_sig = sourmash.load_one_signature(c.last_result.out)
+    c.run_sourmash("sig", "merge", sig47abund, sig63abund)
+    sourmash.load_one_signature(c.last_result.out)
     # CTB: should check that this merge did what we think it should do!
 
 
 @utils.in_tempdir
 def test_sig_merge_3_abund_ab(c):
     # merge of 47 with abund, with 63 without, should fail; and vice versa
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63abund = utils.get_test_data('track_abund/63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63abund = utils.get_test_data("track_abund/63.fa.sig")
 
-    with pytest.raises(SourmashCommandFailed) as e:
-        c.run_sourmash('sig', 'merge', sig47, sig63abund)
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash("sig", "merge", sig47, sig63abund)
 
     print(c.last_result)
-    assert 'incompatible signatures: track_abundance is False in first sig, True in second' in c.last_result.err
+    assert (
+        "incompatible signatures: track_abundance is False in first sig, True in second"
+        in c.last_result.err
+    )
 
 
 @utils.in_tempdir
 def test_sig_merge_3_abund_ba(c):
     # merge of 47 without abund, with 63 with, should fail
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63abund = utils.get_test_data('track_abund/63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63abund = utils.get_test_data("track_abund/63.fa.sig")
 
-    with pytest.raises(SourmashCommandFailed) as e:
-        c.run_sourmash('sig', 'merge', sig63abund, sig47)
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash("sig", "merge", sig63abund, sig47)
 
     print(c.last_result)
-    assert 'incompatible signatures: track_abundance is True in first sig, False in second' in c.last_result.err
+    assert (
+        "incompatible signatures: track_abundance is True in first sig, False in second"
+        in c.last_result.err
+    )
 
 
 @utils.in_tempdir
 def test_sig_filter_1(c):
     # test basic filtering
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    sig63 = utils.get_test_data('track_abund/63.fa.sig')
-    c.run_sourmash('sig', 'filter', sig47, sig63)
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    sig63 = utils.get_test_data("track_abund/63.fa.sig")
+    c.run_sourmash("sig", "filter", sig47, sig63)
 
     # stdout should be new signature
     out = c.last_result.out
@@ -307,8 +338,8 @@ def test_sig_filter_1(c):
 @utils.in_tempdir
 def test_sig_filter_2(c):
     # test basic filtering
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    c.run_sourmash('sig', 'filter', '-m', '2', '-M', '5', sig47)
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    c.run_sourmash("sig", "filter", "-m", "2", "-M", "5", sig47)
 
     # stdout should be new signature
     out = c.last_result.out
@@ -317,7 +348,7 @@ def test_sig_filter_2(c):
     test_sig = sourmash.load_one_signature(sig47)
 
     abunds = test_sig.minhash.hashes
-    abunds = { k: v for (k, v) in abunds.items() if v >= 2 and v <= 5 }
+    abunds = {k: v for (k, v) in abunds.items() if v >= 2 and v <= 5}
     assert abunds
 
     assert filtered_sig.minhash.hashes == abunds
@@ -326,8 +357,8 @@ def test_sig_filter_2(c):
 @utils.in_tempdir
 def test_sig_filter_3(c):
     # test basic filtering
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    c.run_sourmash('sig', 'filter', '-m', '2', sig47)
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    c.run_sourmash("sig", "filter", "-m", "2", sig47)
 
     # stdout should be new signature
     out = c.last_result.out
@@ -336,7 +367,7 @@ def test_sig_filter_3(c):
     test_sig = sourmash.load_one_signature(sig47)
 
     abunds = test_sig.minhash.hashes
-    abunds = { k: v for (k, v) in abunds.items() if v >= 2 }
+    abunds = {k: v for (k, v) in abunds.items() if v >= 2}
     assert abunds
 
     assert filtered_sig.minhash.hashes == abunds
@@ -345,8 +376,8 @@ def test_sig_filter_3(c):
 @utils.in_tempdir
 def test_sig_filter_3_ksize_select(c):
     # test filtering with ksize selectiong
-    psw_mag = utils.get_test_data('lca/TARA_PSW_MAG_00136.sig')
-    c.run_sourmash('sig', 'filter', '-m', '2', psw_mag, '-k', '31')
+    psw_mag = utils.get_test_data("lca/TARA_PSW_MAG_00136.sig")
+    c.run_sourmash("sig", "filter", "-m", "2", psw_mag, "-k", "31")
 
     # stdout should be new signature
     out = c.last_result.out
@@ -355,7 +386,7 @@ def test_sig_filter_3_ksize_select(c):
     test_sig = sourmash.load_one_signature(psw_mag, ksize=31)
 
     abunds = test_sig.minhash.hashes
-    abunds = { k: v for (k, v) in abunds.items() if v >= 2 }
+    abunds = {k: v for (k, v) in abunds.items() if v >= 2}
     assert abunds
 
     assert filtered_sig.minhash.hashes == abunds
@@ -364,11 +395,11 @@ def test_sig_filter_3_ksize_select(c):
 @utils.in_tempdir
 def test_sig_merge_flatten(c):
     # merge of 47 without abund, with 63 with, will succeed with --flatten
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63abund = utils.get_test_data('track_abund/63.fa.sig')
-    sig47and63 = utils.get_test_data('47+63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63abund = utils.get_test_data("track_abund/63.fa.sig")
+    sig47and63 = utils.get_test_data("47+63.fa.sig")
 
-    c.run_sourmash('sig', 'merge', sig63abund, sig47, '--flatten')
+    c.run_sourmash("sig", "merge", sig63abund, sig47, "--flatten")
 
     print(c.last_result)
     out = c.last_result.out
@@ -386,11 +417,11 @@ def test_sig_merge_flatten(c):
 @utils.in_tempdir
 def test_sig_merge_flatten_2(c):
     # merge of 47 with abund, with 63 with, will succeed with --flatten
-    sig47abund = utils.get_test_data('track_abund/47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-    sig47and63 = utils.get_test_data('47+63.fa.sig')
+    sig47abund = utils.get_test_data("track_abund/47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+    sig47and63 = utils.get_test_data("47+63.fa.sig")
 
-    c.run_sourmash('sig', 'merge', sig63, sig47abund, '--flatten')
+    c.run_sourmash("sig", "merge", sig63, sig47abund, "--flatten")
 
     print(c.last_result)
     out = c.last_result.out
@@ -410,7 +441,7 @@ def test_sig_intersect_0(runtmp):
     c = runtmp
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('sig', 'intersect')
+        c.run_sourmash("sig", "intersect")
 
     err = c.last_result.err
     assert "no signatures provided to intersect!?" in err
@@ -420,10 +451,10 @@ def test_sig_intersect_1(runtmp):
     c = runtmp
 
     # intersect of 47 and 63 should be intersection of mins
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-    sig47and63 = utils.get_test_data('47+63-intersect.fa.sig')
-    c.run_sourmash('sig', 'intersect', sig47, sig63)
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+    sig47and63 = utils.get_test_data("47+63-intersect.fa.sig")
+    c.run_sourmash("sig", "intersect", sig47, sig63)
 
     # stdout should be new signature
     out = c.last_result.out
@@ -442,16 +473,21 @@ def test_sig_intersect_1_fromfile_picklist(runtmp):
     c = runtmp
 
     # intersect of 47 and 63 should be intersection of mins
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-    sig47and63 = utils.get_test_data('47+63-intersect.fa.sig')
-
-    from_file = _write_file(runtmp, 'list.txt', [sig47, sig63])
-    picklist = _write_file(runtmp, 'pl.csv',
-                           ['md5short', '09a08691', '38729c63'])
-
-    c.run_sourmash('signature', 'intersect', '--from-file', from_file,
-                   '--picklist', f'{picklist}:md5short:md5short')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+    sig47and63 = utils.get_test_data("47+63-intersect.fa.sig")
+
+    from_file = _write_file(runtmp, "list.txt", [sig47, sig63])
+    picklist = _write_file(runtmp, "pl.csv", ["md5short", "09a08691", "38729c63"])
+
+    c.run_sourmash(
+        "signature",
+        "intersect",
+        "--from-file",
+        from_file,
+        "--picklist",
+        f"{picklist}:md5short:md5short",
+    )
 
     # stdout should be new signature
     out = c.last_result.out
@@ -470,10 +506,10 @@ def test_sig_intersect_1_fromfile_picklist(runtmp):
 def test_sig_intersect_2(c):
     # intersect of 47 with abund and 63 with abund should be same
     # as without abund, i.e. intersect 'flattens'
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    sig63 = utils.get_test_data('track_abund/63.fa.sig')
-    sig47and63 = utils.get_test_data('47+63-intersect.fa.sig')
-    c.run_sourmash('sig', 'intersect', sig47, sig63)
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    sig63 = utils.get_test_data("track_abund/63.fa.sig")
+    sig47and63 = utils.get_test_data("47+63-intersect.fa.sig")
+    c.run_sourmash("sig", "intersect", sig47, sig63)
 
     # stdout should be new signature
     out = c.last_result.out
@@ -491,9 +527,9 @@ def test_sig_intersect_2(c):
 @utils.in_tempdir
 def test_sig_intersect_3(c):
     # use --abundances-from to preserve abundances from sig #47
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    sig63 = utils.get_test_data('track_abund/63.fa.sig')
-    c.run_sourmash('sig', 'intersect', '--abundances-from', sig47, sig63)
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    sig63 = utils.get_test_data("track_abund/63.fa.sig")
+    c.run_sourmash("sig", "intersect", "--abundances-from", sig47, sig63)
 
     # stdout should be new signature
     out = c.last_result.out
@@ -510,7 +546,7 @@ def test_sig_intersect_3(c):
     mh63_mins.intersection_update(mh47_abunds)
 
     # take abundances from mh47 & create new sig
-    mh47_abunds = { k: mh47_abunds[k] for k in mh63_mins }
+    mh47_abunds = {k: mh47_abunds[k] for k in mh63_mins}
     test_mh = mh47.copy_and_clear()
     test_mh.set_abundances(mh47_abunds)
 
@@ -523,9 +559,9 @@ def test_sig_intersect_3(c):
 @utils.in_tempdir
 def test_sig_intersect_4(c):
     # use --abundances-from to preserve abundances from sig #47
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    sig63 = utils.get_test_data('track_abund/63.fa.sig')
-    c.run_sourmash('sig', 'intersect', '--abundances-from', sig47, sig63)
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    sig63 = utils.get_test_data("track_abund/63.fa.sig")
+    c.run_sourmash("sig", "intersect", "--abundances-from", sig47, sig63)
 
     # stdout should be new signature
     out = c.last_result.out
@@ -542,7 +578,7 @@ def test_sig_intersect_4(c):
     mh63_mins.intersection_update(mh47_abunds)
 
     # take abundances from mh47 & create new sig
-    mh47_abunds = { k: mh47_abunds[k] for k in mh63_mins }
+    mh47_abunds = {k: mh47_abunds[k] for k in mh63_mins}
     test_mh = mh47.copy_and_clear()
     test_mh.set_abundances(mh47_abunds)
 
@@ -556,41 +592,41 @@ def test_sig_intersect_4(c):
 def test_sig_intersect_5(c):
     # use --abundances-from to preserve abundances from sig #47
     # make sure that you can't specify a flat sig for --abundances-from
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('track_abund/63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("track_abund/63.fa.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('sig', 'intersect', '--abundances-from', sig47, sig63)
+        c.run_sourmash("sig", "intersect", "--abundances-from", sig47, sig63)
 
 
 @utils.in_tempdir
 def test_sig_intersect_6_ksize_fail(c):
     # specify ksize to intersect 2.fa.sig with 47.fa.sig - 2.fa.sig contains
     # multiple ksizes.
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('sig', 'intersect', sig2, sig47)
+        c.run_sourmash("sig", "intersect", sig2, sig47)
 
 
 @utils.in_tempdir
 def test_sig_intersect_6_ksize_succeed(c):
     # specify ksize to intersect 2.fa.sig with 47.fa.sig - 2.fa.sig contains
     # multiple ksizes.
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
 
-    c.run_sourmash('sig', 'intersect', '-k', '31', sig2, sig47)
+    c.run_sourmash("sig", "intersect", "-k", "31", sig2, sig47)
 
-    assert 'loaded and intersected 2 signatures' in c.last_result.err
+    assert "loaded and intersected 2 signatures" in c.last_result.err
 
 
 @utils.in_tempdir
 def test_sig_intersect_7(c):
     # intersect of 47 and nothing should be self
-    sig47 = utils.get_test_data('47.fa.sig')
-    c.run_sourmash('sig', 'intersect', sig47)
+    sig47 = utils.get_test_data("47.fa.sig")
+    c.run_sourmash("sig", "intersect", sig47)
 
     # stdout should be new signature
     out = c.last_result.out
@@ -608,8 +644,8 @@ def test_sig_intersect_7(c):
 @utils.in_tempdir
 def test_sig_intersect_8_multisig(c):
     # intersect of all the multisig stuff should be nothing
-    sig47 = utils.get_test_data('47+63-multisig.sig')
-    c.run_sourmash('sig', 'intersect', sig47)
+    sig47 = utils.get_test_data("47+63-multisig.sig")
+    c.run_sourmash("sig", "intersect", sig47)
 
     # stdout should be new signature
     out = c.last_result.out
@@ -621,9 +657,9 @@ def test_sig_intersect_8_multisig(c):
 
 def test_sig_inflate_1(runtmp):
     # basic inflate test - inflate 47 flat with 47 abund
-    sig47_flat = utils.get_test_data('47.fa.sig')
-    sig47_abund = utils.get_test_data('track_abund/47.fa.sig')
-    runtmp.run_sourmash('sig', 'inflate', sig47_abund, sig47_flat)
+    sig47_flat = utils.get_test_data("47.fa.sig")
+    sig47_abund = utils.get_test_data("track_abund/47.fa.sig")
+    runtmp.run_sourmash("sig", "inflate", sig47_abund, sig47_flat)
 
     # stdout should be new signature
     out = runtmp.last_result.out
@@ -641,9 +677,9 @@ def test_sig_inflate_1(runtmp):
 
 def test_sig_inflate_2(runtmp):
     # use abundances from sig #47
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-    runtmp.run_sourmash('sig', 'inflate', sig47, sig63)
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+    runtmp.run_sourmash("sig", "inflate", sig47, sig63)
 
     # stdout should be new signature
     out = runtmp.last_result.out
@@ -660,7 +696,7 @@ def test_sig_inflate_2(runtmp):
     mh63_mins.intersection_update(mh47_abunds)
 
     # take abundances from mh47 & create new sig
-    mh47_abunds = { k: mh47_abunds[k] for k in mh63_mins }
+    mh47_abunds = {k: mh47_abunds[k] for k in mh63_mins}
     test_mh = mh47.copy_and_clear()
     test_mh.set_abundances(mh47_abunds)
 
@@ -672,34 +708,33 @@ def test_sig_inflate_2(runtmp):
 
 def test_sig_inflate_3(runtmp):
     # should fail on flat first sig
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('sig', 'inflate', sig63, sig47)
+    with pytest.raises(SourmashCommandFailed):
+        runtmp.run_sourmash("sig", "inflate", sig63, sig47)
 
-    assert 'has no abundances' in runtmp.last_result.err
+    assert "has no abundances" in runtmp.last_result.err
 
 
 def test_sig_inflate_4_picklist(runtmp):
     # try out picklists
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-    sig47_flat = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+    sig47_flat = utils.get_test_data("47.fa.sig")
 
     ss63 = sourmash.load_one_signature(sig63, ksize=31)
 
-    picklist = _write_file(runtmp, 'pl.csv', ['md5', ss63.md5sum()])
+    _write_file(runtmp, "pl.csv", ["md5", ss63.md5sum()])
 
     print(ss63.md5sum())
 
-
-    runtmp.run_sourmash('sig', 'inflate', sig47, sig63, sig47_flat,
-                        '--picklist', f'pl.csv:md5:md5')
+    runtmp.run_sourmash(
+        "sig", "inflate", sig47, sig63, sig47_flat, "--picklist", "pl.csv:md5:md5"
+    )
 
     # stdout should be new signature
     out = runtmp.last_result.out
-    err = runtmp.last_result.err
 
     actual_inflate_sig = sourmash.load_one_signature(out)
 
@@ -713,7 +748,7 @@ def test_sig_inflate_4_picklist(runtmp):
     mh63_mins.intersection_update(mh47_abunds)
 
     # take abundances from mh47 & create new sig
-    mh47_abunds = { k: mh47_abunds[k] for k in mh63_mins }
+    mh47_abunds = {k: mh47_abunds[k] for k in mh63_mins}
     test_mh = mh47.copy_and_clear()
     test_mh.set_abundances(mh47_abunds)
 
@@ -725,21 +760,21 @@ def test_sig_inflate_4_picklist(runtmp):
 
 def test_sig_inflate_5_bad_moltype(runtmp):
     # should fail when no signatures match moltype
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    prot = utils.get_test_data('prot/protein.zip')
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    prot = utils.get_test_data("prot/protein.zip")
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('sig', 'inflate', sig47, prot)
+    with pytest.raises(SourmashCommandFailed):
+        runtmp.run_sourmash("sig", "inflate", sig47, prot)
 
-    assert 'no signatures to inflate' in runtmp.last_result.err
+    assert "no signatures to inflate" in runtmp.last_result.err
 
 
 @utils.in_tempdir
 def test_sig_subtract_1(c):
     # subtract of 63 from 47
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-    c.run_sourmash('sig', 'subtract', sig47, sig63)
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+    c.run_sourmash("sig", "subtract", sig47, sig63)
 
     # stdout should be new signature
     out = c.last_result.out
@@ -758,9 +793,9 @@ def test_sig_subtract_1_abund(runtmp):
     # subtract 63 from 47, with abundances borrowed from 47
 
     c = runtmp
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    sig63 = utils.get_test_data('track_abund/63.fa.sig')
-    c.run_sourmash('sig', 'subtract', sig47, sig63, '-A', sig47)
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    sig63 = utils.get_test_data("track_abund/63.fa.sig")
+    c.run_sourmash("sig", "subtract", sig47, sig63, "-A", sig47)
 
     # stdout should be new signature
     out = c.last_result.out
@@ -791,21 +826,21 @@ def test_sig_subtract_1_abund_is_flat(runtmp):
     # subtract 63 from 47, with abundances borrowed from 47
 
     c = runtmp
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    sig63 = utils.get_test_data('track_abund/63.fa.sig')
-    sig47_flat = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    sig63 = utils.get_test_data("track_abund/63.fa.sig")
+    sig47_flat = utils.get_test_data("47.fa.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('sig', 'subtract', sig47, sig63, '-A', sig47_flat)
+        c.run_sourmash("sig", "subtract", sig47, sig63, "-A", sig47_flat)
 
 
 def test_sig_subtract_1_flatten(runtmp):
     # subtract 63 from 47, with abund signatures originally and --flatten
 
     c = runtmp
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    sig63 = utils.get_test_data('track_abund/63.fa.sig')
-    c.run_sourmash('sig', 'subtract', sig47, sig63, '--flatten')
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    sig63 = utils.get_test_data("track_abund/63.fa.sig")
+    c.run_sourmash("sig", "subtract", sig47, sig63, "--flatten")
 
     # stdout should be new signature
     out = c.last_result.out
@@ -824,9 +859,9 @@ def test_sig_subtract_1_flatten(runtmp):
 @utils.in_tempdir
 def test_sig_subtract_1_multisig(c):
     # subtract of everything from 47
-    sig47 = utils.get_test_data('47.fa.sig')
-    multisig = utils.get_test_data('47+63-multisig.sig')
-    c.run_sourmash('sig', 'subtract', sig47, multisig, '--flatten')
+    sig47 = utils.get_test_data("47.fa.sig")
+    multisig = utils.get_test_data("47+63-multisig.sig")
+    c.run_sourmash("sig", "subtract", sig47, multisig, "--flatten")
 
     # stdout should be new signature
     out = c.last_result.out
@@ -839,60 +874,60 @@ def test_sig_subtract_1_multisig(c):
 @utils.in_tempdir
 def test_sig_subtract_2(c):
     # subtract of 63 from 47 should fail if 47 has abund
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('sig', 'subtract', sig47, sig63)
+        c.run_sourmash("sig", "subtract", sig47, sig63)
 
 
 @utils.in_tempdir
 def test_sig_subtract_3(c):
     # subtract of 63 from 47 should fail if 63 has abund
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('track_abund/63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("track_abund/63.fa.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('sig', 'subtract', sig47, sig63)
+        c.run_sourmash("sig", "subtract", sig47, sig63)
 
 
 @utils.in_tempdir
 def test_sig_subtract_4_ksize_fail(c):
     # subtract of 2 from 47 should fail without -k specified
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig2 = utils.get_test_data("2.fa.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('sig', 'subtract', sig47, sig2)
+        c.run_sourmash("sig", "subtract", sig47, sig2)
 
 
 @utils.in_tempdir
 def test_sig_subtract_4_ksize_succeed(c):
     # subtract of 2 from 47 should fail without -k specified
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig2 = utils.get_test_data("2.fa.sig")
 
-    c.run_sourmash('sig', 'subtract', sig47, sig2, '-k', '31')
-    assert 'loaded and subtracted 1 signatures' in c.last_result.err
+    c.run_sourmash("sig", "subtract", sig47, sig2, "-k", "31")
+    assert "loaded and subtracted 1 signatures" in c.last_result.err
 
 
 def test_sig_subtract_5_bad_moltype(runtmp):
     # should fail when no matching sigs
-    sig47 = utils.get_test_data('47.fa.sig')
-    prot = utils.get_test_data('prot/protein.zip')
+    sig47 = utils.get_test_data("47.fa.sig")
+    prot = utils.get_test_data("prot/protein.zip")
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('sig', 'subtract', '-k', '31', sig47, prot)
+    with pytest.raises(SourmashCommandFailed):
+        runtmp.run_sourmash("sig", "subtract", "-k", "31", sig47, prot)
 
-    assert 'no signatures to subtract' in runtmp.last_result.err
+    assert "no signatures to subtract" in runtmp.last_result.err
 
 
 def test_sig_rename_1(runtmp):
     c = runtmp
 
     # set new name for 47
-    sig47 = utils.get_test_data('47.fa.sig')
-    c.run_sourmash('sig', 'rename', sig47, 'fiz bar')
+    sig47 = utils.get_test_data("47.fa.sig")
+    c.run_sourmash("sig", "rename", sig47, "fiz bar")
 
     # stdout should be new signature
     out = c.last_result.out
@@ -905,20 +940,27 @@ def test_sig_rename_1(runtmp):
 
     assert actual_rename_sig.minhash == test_rename_sig.minhash
     assert test_rename_sig.name != actual_rename_sig.name
-    assert actual_rename_sig.name == 'fiz bar'
+    assert actual_rename_sig.name == "fiz bar"
 
 
 def test_sig_rename_1_fromfile_picklist(runtmp):
     c = runtmp
 
     # set new name for 47
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
 
-    from_file = _write_file(runtmp, 'list.txt', [sig47])
-    picklist = _write_file(runtmp, 'pl.csv', ['md5short', '09a08691'])
+    from_file = _write_file(runtmp, "list.txt", [sig47])
+    picklist = _write_file(runtmp, "pl.csv", ["md5short", "09a08691"])
 
-    c.run_sourmash('sig', 'rename', '--from-file', from_file, 'fiz bar',
-                   '--picklist', f'{picklist}:md5short:md5short')
+    c.run_sourmash(
+        "sig",
+        "rename",
+        "--from-file",
+        from_file,
+        "fiz bar",
+        "--picklist",
+        f"{picklist}:md5short:md5short",
+    )
 
     # stdout should be new signature
     out = c.last_result.out
@@ -931,22 +973,22 @@ def test_sig_rename_1_fromfile_picklist(runtmp):
 
     assert actual_rename_sig.minhash == test_rename_sig.minhash
     assert test_rename_sig.name != actual_rename_sig.name
-    assert actual_rename_sig.name == 'fiz bar'
+    assert actual_rename_sig.name == "fiz bar"
 
 
 @utils.in_tempdir
 def test_sig_rename_1_multisig(c):
     # set new name for multiple signatures/files
-    multisig = utils.get_test_data('47+63-multisig.sig')
-    other_sig = utils.get_test_data('2.fa.sig')
-    c.run_sourmash('sig', 'rename', multisig, other_sig, 'fiz bar')
+    multisig = utils.get_test_data("47+63-multisig.sig")
+    other_sig = utils.get_test_data("2.fa.sig")
+    c.run_sourmash("sig", "rename", multisig, other_sig, "fiz bar")
 
     # stdout should be new signature
     out = c.last_result.out
 
     n = 0
     for sig in load_signatures(out):
-        assert sig.name == 'fiz bar'
+        assert sig.name == "fiz bar"
         n += 1
 
     assert n == 9, n
@@ -955,16 +997,16 @@ def test_sig_rename_1_multisig(c):
 @utils.in_tempdir
 def test_sig_rename_1_multisig_ksize(c):
     # set new name for multiple signatures/files; select k=31
-    multisig = utils.get_test_data('47+63-multisig.sig')
-    other_sig = utils.get_test_data('2.fa.sig')
-    c.run_sourmash('sig', 'rename', multisig, other_sig, 'fiz bar', '-k', '31')
+    multisig = utils.get_test_data("47+63-multisig.sig")
+    other_sig = utils.get_test_data("2.fa.sig")
+    c.run_sourmash("sig", "rename", multisig, other_sig, "fiz bar", "-k", "31")
 
     # stdout should be new signature
     out = c.last_result.out
 
     n = 0
     for sig in load_signatures(out):
-        assert sig.name == 'fiz bar'
+        assert sig.name == "fiz bar"
         n += 1
 
     assert n == 7, n
@@ -973,23 +1015,23 @@ def test_sig_rename_1_multisig_ksize(c):
 @utils.in_tempdir
 def test_sig_rename_2_output_to_same(c):
     # change name of signature "in place", same output file
-    sig47 = utils.get_test_data('47.fa.sig')
-    inplace = c.output('inplace.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    inplace = c.output("inplace.sig")
     shutil.copyfile(sig47, inplace)
 
     print(inplace)
 
-    c.run_sourmash('sig', 'rename', '-d', inplace, 'fiz bar', '-o', inplace)
+    c.run_sourmash("sig", "rename", "-d", inplace, "fiz bar", "-o", inplace)
 
     actual_rename_sig = sourmash.load_one_signature(inplace)
-    assert actual_rename_sig.name == 'fiz bar'
+    assert actual_rename_sig.name == "fiz bar"
 
 
 @utils.in_tempdir
 def test_sig_rename_3_file_dne(c):
     # rename on a file that does not exist should fail!
-    with pytest.raises(SourmashCommandFailed) as e:
-        c.run_sourmash('sig', 'rename', 'no-such-sig', 'fiz bar')
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash("sig", "rename", "no-such-sig", "fiz bar")
 
     assert "Error while reading signatures from 'no-such-sig'" in c.last_result.err
 
@@ -997,7 +1039,7 @@ def test_sig_rename_3_file_dne(c):
 @utils.in_tempdir
 def test_sig_rename_3_file_dne_force(c):
     # rename on a file that does not exist should fail!
-    c.run_sourmash('sig', 'rename', 'no-such-sig', 'fiz bar', '-f')
+    c.run_sourmash("sig", "rename", "no-such-sig", "fiz bar", "-f")
     print(c.last_result.err)
 
     assert "Error while reading signatures from 'no-such-sig'" in c.last_result.err
@@ -1005,35 +1047,37 @@ def test_sig_rename_3_file_dne_force(c):
 
 def test_sig_rename_4_pattern_include(runtmp):
     # test sig rename --include-db-pattern
-    sigfiles = glob.glob(utils.get_test_data('prot/*.zip'))
-    runtmp.sourmash('sig', 'rename', '--include', 'shewanella',
-                    *sigfiles, 'SHEWME', '-o', 'out.zip')
+    sigfiles = glob.glob(utils.get_test_data("prot/*.zip"))
+    runtmp.sourmash(
+        "sig", "rename", "--include", "shewanella", *sigfiles, "SHEWME", "-o", "out.zip"
+    )
 
-    idx = sourmash.load_file_as_index(runtmp.output('out.zip'))
-    names = [ ss.name for ss in idx.signatures() ]
+    idx = sourmash.load_file_as_index(runtmp.output("out.zip"))
+    names = [ss.name for ss in idx.signatures()]
     for n in names:
-        assert n == 'SHEWME'
+        assert n == "SHEWME"
     assert len(names) == 2
 
 
 def test_sig_rename_4_pattern_exclude(runtmp):
     # test sig rename --exclude-db-pattern
-    sigfiles = glob.glob(utils.get_test_data('prot/*.zip'))
-    runtmp.sourmash('sig', 'rename', '--exclude', 'shewanella',
-                    *sigfiles, 'NOSHEW', '-o', 'out.zip')
+    sigfiles = glob.glob(utils.get_test_data("prot/*.zip"))
+    runtmp.sourmash(
+        "sig", "rename", "--exclude", "shewanella", *sigfiles, "NOSHEW", "-o", "out.zip"
+    )
 
-    idx = sourmash.load_file_as_index(runtmp.output('out.zip'))
-    names = [ ss.name for ss in idx.signatures() ]
+    idx = sourmash.load_file_as_index(runtmp.output("out.zip"))
+    names = [ss.name for ss in idx.signatures()]
     for n in names:
-        assert n == 'NOSHEW'
+        assert n == "NOSHEW"
     assert len(names) == 6
 
 
 @utils.in_thisdir
 def test_sig_cat_1(c):
     # cat 47 to 47...
-    sig47 = utils.get_test_data('47.fa.sig')
-    c.run_sourmash('sig', 'cat', sig47)
+    sig47 = utils.get_test_data("47.fa.sig")
+    c.run_sourmash("sig", "cat", sig47)
 
     # stdout should be same signature
     out = c.last_result.out
@@ -1047,8 +1091,8 @@ def test_sig_cat_1(c):
 @utils.in_thisdir
 def test_sig_cat_1_no_unique(c):
     # cat 47 to 47... twice
-    sig47 = utils.get_test_data('47.fa.sig')
-    c.run_sourmash('sig', 'cat', sig47, sig47)
+    sig47 = utils.get_test_data("47.fa.sig")
+    c.run_sourmash("sig", "cat", sig47, sig47)
 
     # stdout should be same signature
     out = c.last_result.out
@@ -1059,15 +1103,15 @@ def test_sig_cat_1_no_unique(c):
     for n, sig in enumerate(actual_cat_sigs):
         assert sig == test_cat_sig
 
-    assert n == 1 # two signatures, but enumerate stops at 1.
-    assert 'encountered 1 MinHashes multiple times' in c.last_result.err
+    assert n == 1  # two signatures, but enumerate stops at 1.
+    assert "encountered 1 MinHashes multiple times" in c.last_result.err
 
 
 @utils.in_thisdir
 def test_sig_cat_1_unique(c):
     # cat 47 to 47... twice... and get unique
-    sig47 = utils.get_test_data('47.fa.sig')
-    c.run_sourmash('sig', 'cat', sig47, sig47, '--unique')
+    sig47 = utils.get_test_data("47.fa.sig")
+    c.run_sourmash("sig", "cat", sig47, sig47, "--unique")
 
     # stdout should be same signature
     out = c.last_result.out
@@ -1079,18 +1123,18 @@ def test_sig_cat_1_unique(c):
     for n, sig in enumerate(actual_cat_sigs):
         assert sig == test_cat_sig
 
-    assert n == 0 # enumerate stops at 0, first sig.
-    assert 'encountered 1 MinHashes multiple times' in err
-    assert '...and removed the duplicates, because --unique was specified.' in err
+    assert n == 0  # enumerate stops at 0, first sig.
+    assert "encountered 1 MinHashes multiple times" in err
+    assert "...and removed the duplicates, because --unique was specified." in err
 
 
 @utils.in_thisdir
 def test_sig_cat_2(c):
     # cat several
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig47abund = utils.get_test_data('track_abund/47.fa.sig')
-    multisig = utils.get_test_data('47+63-multisig.sig')
-    c.run_sourmash('sig', 'cat', sig47, sig47abund, multisig)
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig47abund = utils.get_test_data("track_abund/47.fa.sig")
+    multisig = utils.get_test_data("47+63-multisig.sig")
+    c.run_sourmash("sig", "cat", sig47, sig47abund, multisig)
 
     # stdout should be same signatures
     out = c.last_result.out
@@ -1098,40 +1142,44 @@ def test_sig_cat_2(c):
     siglist = list(load_signatures(out))
     print(len(siglist))
 
-    assert repr(siglist) == """[SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 57e2b22f), SourmashSignature('NC_009661.1 Shewanella baltica OS185 plasmid pS18501, complete sequence', bde81a41), SourmashSignature('NC_011663.1 Shewanella baltica OS223, complete genome', f033bbd8), SourmashSignature('NC_011664.1 Shewanella baltica OS223 plasmid pS22301, complete sequence', 87a9aec4), SourmashSignature('NC_011668.1 Shewanella baltica OS223 plasmid pS22302, complete sequence', 837bf2a7), SourmashSignature('NC_011665.1 Shewanella baltica OS223 plasmid pS22303, complete sequence', 485c3377)]"""
+    assert (
+        repr(siglist)
+        == """[SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 57e2b22f), SourmashSignature('NC_009661.1 Shewanella baltica OS185 plasmid pS18501, complete sequence', bde81a41), SourmashSignature('NC_011663.1 Shewanella baltica OS223, complete genome', f033bbd8), SourmashSignature('NC_011664.1 Shewanella baltica OS223 plasmid pS22301, complete sequence', 87a9aec4), SourmashSignature('NC_011668.1 Shewanella baltica OS223 plasmid pS22302, complete sequence', 837bf2a7), SourmashSignature('NC_011665.1 Shewanella baltica OS223 plasmid pS22303, complete sequence', 485c3377)]"""
+    )
 
 
 @utils.in_tempdir
 def test_sig_cat_2_out(c):
     # cat several
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig47abund = utils.get_test_data('track_abund/47.fa.sig')
-    multisig = utils.get_test_data('47+63-multisig.sig')
-    c.run_sourmash('sig', 'cat', sig47, sig47abund, multisig,
-                   '-o', 'out.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig47abund = utils.get_test_data("track_abund/47.fa.sig")
+    multisig = utils.get_test_data("47+63-multisig.sig")
+    c.run_sourmash("sig", "cat", sig47, sig47abund, multisig, "-o", "out.sig")
 
     # stdout should be same signatures
-    out = c.output('out.sig')
+    out = c.output("out.sig")
 
     siglist = list(load_signatures(out))
     print(len(siglist))
 
-    assert repr(siglist) == """[SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 57e2b22f), SourmashSignature('NC_009661.1 Shewanella baltica OS185 plasmid pS18501, complete sequence', bde81a41), SourmashSignature('NC_011663.1 Shewanella baltica OS223, complete genome', f033bbd8), SourmashSignature('NC_011664.1 Shewanella baltica OS223 plasmid pS22301, complete sequence', 87a9aec4), SourmashSignature('NC_011668.1 Shewanella baltica OS223 plasmid pS22302, complete sequence', 837bf2a7), SourmashSignature('NC_011665.1 Shewanella baltica OS223 plasmid pS22303, complete sequence', 485c3377)]"""
+    assert (
+        repr(siglist)
+        == """[SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 57e2b22f), SourmashSignature('NC_009661.1 Shewanella baltica OS185 plasmid pS18501, complete sequence', bde81a41), SourmashSignature('NC_011663.1 Shewanella baltica OS223, complete genome', f033bbd8), SourmashSignature('NC_011664.1 Shewanella baltica OS223 plasmid pS22301, complete sequence', 87a9aec4), SourmashSignature('NC_011668.1 Shewanella baltica OS223 plasmid pS22302, complete sequence', 837bf2a7), SourmashSignature('NC_011665.1 Shewanella baltica OS223 plasmid pS22303, complete sequence', 485c3377)]"""
+    )
 
 
 @utils.in_tempdir
 def test_sig_cat_2_out_inplace(c):
     # cat several; check that we can overwrite one of the input files.
-    sig47 = utils.get_test_data('47.fa.sig')
-    input_sig = c.output('inp.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    input_sig = c.output("inp.sig")
     shutil.copyfile(sig47, input_sig)
 
-    sig47abund = utils.get_test_data('track_abund/47.fa.sig')
-    multisig = utils.get_test_data('47+63-multisig.sig')
+    sig47abund = utils.get_test_data("track_abund/47.fa.sig")
+    multisig = utils.get_test_data("47+63-multisig.sig")
 
     # write out to input.
-    c.run_sourmash('sig', 'cat', input_sig, sig47abund, multisig,
-                   '-o', input_sig)
+    c.run_sourmash("sig", "cat", input_sig, sig47abund, multisig, "-o", input_sig)
 
     # stdout should be same signatures
     out = input_sig
@@ -1139,25 +1187,27 @@ def test_sig_cat_2_out_inplace(c):
     siglist = list(load_signatures(out))
     print(len(siglist))
 
-    assert repr(siglist) == """[SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 57e2b22f), SourmashSignature('NC_009661.1 Shewanella baltica OS185 plasmid pS18501, complete sequence', bde81a41), SourmashSignature('NC_011663.1 Shewanella baltica OS223, complete genome', f033bbd8), SourmashSignature('NC_011664.1 Shewanella baltica OS223 plasmid pS22301, complete sequence', 87a9aec4), SourmashSignature('NC_011668.1 Shewanella baltica OS223 plasmid pS22302, complete sequence', 837bf2a7), SourmashSignature('NC_011665.1 Shewanella baltica OS223 plasmid pS22303, complete sequence', 485c3377)]"""
+    assert (
+        repr(siglist)
+        == """[SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 57e2b22f), SourmashSignature('NC_009661.1 Shewanella baltica OS185 plasmid pS18501, complete sequence', bde81a41), SourmashSignature('NC_011663.1 Shewanella baltica OS223, complete genome', f033bbd8), SourmashSignature('NC_011664.1 Shewanella baltica OS223 plasmid pS22301, complete sequence', 87a9aec4), SourmashSignature('NC_011668.1 Shewanella baltica OS223 plasmid pS22302, complete sequence', 837bf2a7), SourmashSignature('NC_011665.1 Shewanella baltica OS223 plasmid pS22303, complete sequence', 485c3377)]"""
+    )
 
 
 @utils.in_tempdir
 def test_sig_cat_3_filelist(c):
     # cat using a file list as input
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig47abund = utils.get_test_data('track_abund/47.fa.sig')
-    multisig = utils.get_test_data('47+63-multisig.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig47abund = utils.get_test_data("track_abund/47.fa.sig")
+    multisig = utils.get_test_data("47+63-multisig.sig")
 
     filelist = c.output("filelist")
-    with open(filelist, 'w') as f:
+    with open(filelist, "w") as f:
         f.write("\n".join((sig47, sig47abund, multisig)))
 
-    c.run_sourmash('sig', 'cat', filelist,
-                   '-o', 'out.sig')
+    c.run_sourmash("sig", "cat", filelist, "-o", "out.sig")
 
     # stdout should be same signatures
-    out = c.output('out.sig')
+    out = c.output("out.sig")
 
     # make this a list, not a set, because a set will collapse identical
     # signatures. `sig cat` does not collapse identical signatures, although
@@ -1174,27 +1224,29 @@ def test_sig_cat_3_filelist(c):
     assert len(all_sigs) == len(siglist)
 
     # sort the signatures by something deterministic and unique
-    siglist.sort(key = lambda x: x.md5sum())
+    siglist.sort(key=lambda x: x.md5sum())
 
-    assert repr(siglist) == """[SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_011665.1 Shewanella baltica OS223 plasmid pS22303, complete sequence', 485c3377), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 57e2b22f), SourmashSignature('NC_011668.1 Shewanella baltica OS223 plasmid pS22302, complete sequence', 837bf2a7), SourmashSignature('NC_011664.1 Shewanella baltica OS223 plasmid pS22301, complete sequence', 87a9aec4), SourmashSignature('NC_009661.1 Shewanella baltica OS185 plasmid pS18501, complete sequence', bde81a41), SourmashSignature('NC_011663.1 Shewanella baltica OS223, complete genome', f033bbd8)]"""
+    assert (
+        repr(siglist)
+        == """[SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_011665.1 Shewanella baltica OS223 plasmid pS22303, complete sequence', 485c3377), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 57e2b22f), SourmashSignature('NC_011668.1 Shewanella baltica OS223 plasmid pS22302, complete sequence', 837bf2a7), SourmashSignature('NC_011664.1 Shewanella baltica OS223 plasmid pS22301, complete sequence', 87a9aec4), SourmashSignature('NC_009661.1 Shewanella baltica OS185 plasmid pS18501, complete sequence', bde81a41), SourmashSignature('NC_011663.1 Shewanella baltica OS223, complete genome', f033bbd8)]"""
+    )
 
 
 @utils.in_tempdir
 def test_sig_cat_4_filelist_with_dbs(c):
     # cat using a file list as input
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig47abund = utils.get_test_data('track_abund/47.fa.sig')
-    sbt = utils.get_test_data('v6.sbt.zip')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig47abund = utils.get_test_data("track_abund/47.fa.sig")
+    sbt = utils.get_test_data("v6.sbt.zip")
 
     filelist = c.output("filelist")
-    with open(filelist, 'w') as f:
+    with open(filelist, "w") as f:
         f.write("\n".join((sig47, sig47abund, sbt)))
 
-    c.run_sourmash('sig', 'cat', filelist,
-                   '-o', 'out.sig')
+    c.run_sourmash("sig", "cat", filelist, "-o", "out.sig")
 
     # stdout should be same signatures
-    out = c.output('out.sig')
+    out = c.output("out.sig")
 
     siglist = list(load_signatures(out))
     print(len(siglist))
@@ -1211,27 +1263,29 @@ def test_sig_cat_4_filelist_with_dbs(c):
     assert len(all_sigs) == len(siglist)
 
     # sort the signatures by something deterministic and unique
-    siglist.sort(key = lambda x: x.md5sum())
+    siglist.sort(key=lambda x: x.md5sum())
 
-    assert repr(siglist) == """[SourmashSignature('', 0107d767), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('', 4e94e602), SourmashSignature('', 60f7e23c), SourmashSignature('', 6d6e87e1), SourmashSignature('', b59473c9), SourmashSignature('', f0c834bc), SourmashSignature('', f71e7817)]"""
+    assert (
+        repr(siglist)
+        == """[SourmashSignature('', 0107d767), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('', 4e94e602), SourmashSignature('', 60f7e23c), SourmashSignature('', 6d6e87e1), SourmashSignature('', b59473c9), SourmashSignature('', f0c834bc), SourmashSignature('', f71e7817)]"""
+    )
 
 
 @utils.in_tempdir
 def test_sig_cat_5_from_file(c):
     # cat using a file list as input
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig47abund = utils.get_test_data('track_abund/47.fa.sig')
-    sbt = utils.get_test_data('v6.sbt.zip')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig47abund = utils.get_test_data("track_abund/47.fa.sig")
+    sbt = utils.get_test_data("v6.sbt.zip")
 
     filelist = c.output("filelist")
-    with open(filelist, 'w') as f:
+    with open(filelist, "w") as f:
         f.write("\n".join((sig47, sig47abund, sbt)))
 
-    c.run_sourmash('sig', 'cat', '--from-file', filelist,
-                   '-o', 'out.sig')
+    c.run_sourmash("sig", "cat", "--from-file", filelist, "-o", "out.sig")
 
     # stdout should be same signatures
-    out = c.output('out.sig')
+    out = c.output("out.sig")
 
     siglist = list(load_signatures(out))
     print(len(siglist))
@@ -1248,30 +1302,40 @@ def test_sig_cat_5_from_file(c):
     assert len(all_sigs) == len(siglist)
 
     # sort the signatures by something deterministic and unique
-    siglist.sort(key = lambda x: x.md5sum())
+    siglist.sort(key=lambda x: x.md5sum())
 
-    assert repr(siglist) == """[SourmashSignature('', 0107d767), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('', 4e94e602), SourmashSignature('', 60f7e23c), SourmashSignature('', 6d6e87e1), SourmashSignature('', b59473c9), SourmashSignature('', f0c834bc), SourmashSignature('', f71e7817)]"""
+    assert (
+        repr(siglist)
+        == """[SourmashSignature('', 0107d767), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('', 4e94e602), SourmashSignature('', 60f7e23c), SourmashSignature('', 6d6e87e1), SourmashSignature('', b59473c9), SourmashSignature('', f0c834bc), SourmashSignature('', f71e7817)]"""
+    )
 
 
 def test_sig_cat_5_from_file_picklist(runtmp):
     c = runtmp
 
     # cat using a file list as input
-    sig47 = utils.get_test_data('47.fa.sig')
-    sbt = utils.get_test_data('v6.sbt.zip')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sbt = utils.get_test_data("v6.sbt.zip")
 
     filelist = c.output("filelist")
-    with open(filelist, 'w') as f:
+    with open(filelist, "w") as f:
         f.write("\n".join((sig47, sbt)))
 
-    picklist = _write_file(runtmp, 'pl.csv', ['md5short', '09a08691'])
+    picklist = _write_file(runtmp, "pl.csv", ["md5short", "09a08691"])
 
-    c.run_sourmash('sig', 'cat', '--from-file', filelist,
-                   '--picklist', f'{picklist}:md5short:md5short',
-                   '-o', 'out.sig')
+    c.run_sourmash(
+        "sig",
+        "cat",
+        "--from-file",
+        filelist,
+        "--picklist",
+        f"{picklist}:md5short:md5short",
+        "-o",
+        "out.sig",
+    )
 
     # stdout should be same signatures
-    out = c.output('out.sig')
+    out = c.output("out.sig")
 
     siglist = list(load_signatures(out))
     print(len(siglist))
@@ -1286,46 +1350,46 @@ def test_sig_cat_5_from_file_picklist(runtmp):
     assert len(all_sigs) == len(siglist)
 
     # sort the signatures by something deterministic and unique
-    siglist.sort(key = lambda x: x.md5sum())
+    siglist.sort(key=lambda x: x.md5sum())
 
-    assert repr(siglist) == """[SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691)]"""
+    assert (
+        repr(siglist)
+        == """[SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691)]"""
+    )
 
 
 def test_sig_cat_6_pattern_include(runtmp):
     # test --include-db-pattern
-    sigfiles = glob.glob(utils.get_test_data('prot/*.zip'))
+    sigfiles = glob.glob(utils.get_test_data("prot/*.zip"))
 
-    runtmp.sourmash('sig', 'cat', '--include', 'shewanella', *sigfiles,
-                    '-o', 'out.zip')
+    runtmp.sourmash("sig", "cat", "--include", "shewanella", *sigfiles, "-o", "out.zip")
 
-    idx = sourmash.load_file_as_index(runtmp.output('out.zip'))
+    idx = sourmash.load_file_as_index(runtmp.output("out.zip"))
     assert len(idx) == 2
-    names = [ ss.name for ss in idx.signatures() ]
+    names = [ss.name for ss in idx.signatures()]
     for n in names:
-        assert 'shewanella' in n.lower(), n
+        assert "shewanella" in n.lower(), n
 
 
 def test_sig_cat_6_pattern_exclude(runtmp):
     # test --exclude-db-pattern
-    sigfiles = glob.glob(utils.get_test_data('prot/*.zip'))
+    sigfiles = glob.glob(utils.get_test_data("prot/*.zip"))
 
-    runtmp.sourmash('sig', 'cat', '--exclude', 'shewanella', *sigfiles,
-                    '-o', 'out.zip')
+    runtmp.sourmash("sig", "cat", "--exclude", "shewanella", *sigfiles, "-o", "out.zip")
 
-    idx = sourmash.load_file_as_index(runtmp.output('out.zip'))
+    idx = sourmash.load_file_as_index(runtmp.output("out.zip"))
     assert len(idx) == 18
-    names = [ ss.name for ss in idx.signatures() ]
+    names = [ss.name for ss in idx.signatures()]
     for n in names:
-        assert 'shewanella' not in n.lower(), n
+        assert "shewanella" not in n.lower(), n
 
 
 def test_sig_cat_6_pattern_exclude_no_manifest(runtmp):
     # test --exclude-db-pattern
-    db = utils.get_test_data('v6.sbt.zip')
+    db = utils.get_test_data("v6.sbt.zip")
 
     with pytest.raises(SourmashCommandFailed) as e:
-        runtmp.sourmash('sig', 'cat', '--exclude', 'shewanella', db,
-                        '-o', 'out.zip')
+        runtmp.sourmash("sig", "cat", "--exclude", "shewanella", db, "-o", "out.zip")
 
     assert "require a manifest" in str(e)
 
@@ -1333,10 +1397,10 @@ def test_sig_cat_6_pattern_exclude_no_manifest(runtmp):
 def test_sig_split_1(runtmp):
     c = runtmp
     # split 47 into 1 sig :)
-    sig47 = utils.get_test_data('47.fa.sig')
-    c.run_sourmash('sig', 'split', sig47)
+    sig47 = utils.get_test_data("47.fa.sig")
+    c.run_sourmash("sig", "split", sig47)
 
-    outname = '09a08691.k=31.scaled=1000.DNA.dup=0.47.fa.sig'
+    outname = "09a08691.k=31.scaled=1000.DNA.dup=0.47.fa.sig"
 
     assert os.path.exists(c.output(outname))
 
@@ -1349,15 +1413,21 @@ def test_sig_split_1(runtmp):
 def test_sig_split_1_fromfile_picklist(runtmp):
     c = runtmp
     # split 47 into 1 sig :)
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
 
-    from_file = _write_file(runtmp, 'list.txt', [sig47])
-    picklist = _write_file(runtmp, 'pl.csv', ['md5short', '09a08691'])
+    from_file = _write_file(runtmp, "list.txt", [sig47])
+    picklist = _write_file(runtmp, "pl.csv", ["md5short", "09a08691"])
 
-    c.run_sourmash('sig', 'split', '--from-file', from_file,
-                   '--picklist', f'{picklist}:md5short:md5short')
+    c.run_sourmash(
+        "sig",
+        "split",
+        "--from-file",
+        from_file,
+        "--picklist",
+        f"{picklist}:md5short:md5short",
+    )
 
-    outname = '09a08691.k=31.scaled=1000.DNA.dup=0.47.fa.sig'
+    outname = "09a08691.k=31.scaled=1000.DNA.dup=0.47.fa.sig"
 
     assert os.path.exists(c.output(outname))
 
@@ -1370,27 +1440,27 @@ def test_sig_split_1_fromfile_picklist(runtmp):
 @utils.in_tempdir
 def test_sig_split_1_overwrite(c):
     # check message about overwriting
-    sig47 = utils.get_test_data('47.fa.sig')
-    c.run_sourmash('sig', 'split', sig47)
+    sig47 = utils.get_test_data("47.fa.sig")
+    c.run_sourmash("sig", "split", sig47)
 
-    outname = '09a08691.k=31.scaled=1000.DNA.dup=0.47.fa.sig'
+    outname = "09a08691.k=31.scaled=1000.DNA.dup=0.47.fa.sig"
     assert os.path.exists(c.output(outname))
 
-    c.run_sourmash('sig', 'split', sig47)
+    c.run_sourmash("sig", "split", sig47)
 
     err = c.last_result.err
     print(err)
-    assert '** overwriting existing file ' + outname in err
+    assert "** overwriting existing file " + outname in err
 
 
 @utils.in_tempdir
 def test_sig_split_2(c):
     # split 47 twice
-    sig47 = utils.get_test_data('47.fa.sig')
-    c.run_sourmash('sig', 'split', sig47, sig47)
+    sig47 = utils.get_test_data("47.fa.sig")
+    c.run_sourmash("sig", "split", sig47, sig47)
 
-    outname1 = '09a08691.k=31.scaled=1000.DNA.dup=0.47.fa.sig'
-    outname2 = '09a08691.k=31.scaled=1000.DNA.dup=1.47.fa.sig'
+    outname1 = "09a08691.k=31.scaled=1000.DNA.dup=0.47.fa.sig"
+    outname2 = "09a08691.k=31.scaled=1000.DNA.dup=1.47.fa.sig"
 
     assert os.path.exists(c.output(outname1))
     assert os.path.exists(c.output(outname2))
@@ -1407,12 +1477,12 @@ def test_sig_split_2(c):
 @utils.in_tempdir
 def test_sig_split_2_outdir(c):
     # split 47 twice, put in outdir
-    sig47 = utils.get_test_data('47.fa.sig')
-    outdir = c.output('sigout/')
-    c.run_sourmash('sig', 'split', sig47, sig47, '--outdir', outdir)
+    sig47 = utils.get_test_data("47.fa.sig")
+    outdir = c.output("sigout/")
+    c.run_sourmash("sig", "split", sig47, sig47, "--outdir", outdir)
 
-    outname1 = 'sigout/09a08691.k=31.scaled=1000.DNA.dup=0.47.fa.sig'
-    outname2 = 'sigout/09a08691.k=31.scaled=1000.DNA.dup=1.47.fa.sig'
+    outname1 = "sigout/09a08691.k=31.scaled=1000.DNA.dup=0.47.fa.sig"
+    outname2 = "sigout/09a08691.k=31.scaled=1000.DNA.dup=1.47.fa.sig"
 
     assert os.path.exists(c.output(outname1))
     assert os.path.exists(c.output(outname2))
@@ -1429,12 +1499,12 @@ def test_sig_split_2_outdir(c):
 @utils.in_tempdir
 def test_sig_split_2_output_dir(c):
     # split 47 twice, put in outdir via --output-dir instead of --outdir
-    sig47 = utils.get_test_data('47.fa.sig')
-    outdir = c.output('sigout/')
-    c.run_sourmash('sig', 'split', sig47, sig47, '--output-dir', outdir)
+    sig47 = utils.get_test_data("47.fa.sig")
+    outdir = c.output("sigout/")
+    c.run_sourmash("sig", "split", sig47, sig47, "--output-dir", outdir)
 
-    outname1 = 'sigout/09a08691.k=31.scaled=1000.DNA.dup=0.47.fa.sig'
-    outname2 = 'sigout/09a08691.k=31.scaled=1000.DNA.dup=1.47.fa.sig'
+    outname1 = "sigout/09a08691.k=31.scaled=1000.DNA.dup=0.47.fa.sig"
+    outname2 = "sigout/09a08691.k=31.scaled=1000.DNA.dup=1.47.fa.sig"
 
     assert os.path.exists(c.output(outname1))
     assert os.path.exists(c.output(outname2))
@@ -1451,16 +1521,18 @@ def test_sig_split_2_output_dir(c):
 @utils.in_tempdir
 def test_sig_split_3_multisig(c):
     # split 47 and 47+63-multisig.sig
-    sig47 = utils.get_test_data('47.fa.sig')
-    multisig = utils.get_test_data('47+63-multisig.sig')
-    c.run_sourmash('sig', 'split', sig47, multisig)
-
-    outlist = ['57e2b22f.k=31.scaled=1000.DNA.dup=0.none.sig',
-               'bde81a41.k=31.scaled=1000.DNA.dup=0.none.sig',
-               'f033bbd8.k=31.scaled=1000.DNA.dup=0.none.sig',
-               '87a9aec4.k=31.scaled=1000.DNA.dup=0.none.sig',
-               '837bf2a7.k=31.scaled=1000.DNA.dup=0.none.sig',
-               '485c3377.k=31.scaled=1000.DNA.dup=0.none.sig']
+    sig47 = utils.get_test_data("47.fa.sig")
+    multisig = utils.get_test_data("47+63-multisig.sig")
+    c.run_sourmash("sig", "split", sig47, multisig)
+
+    outlist = [
+        "57e2b22f.k=31.scaled=1000.DNA.dup=0.none.sig",
+        "bde81a41.k=31.scaled=1000.DNA.dup=0.none.sig",
+        "f033bbd8.k=31.scaled=1000.DNA.dup=0.none.sig",
+        "87a9aec4.k=31.scaled=1000.DNA.dup=0.none.sig",
+        "837bf2a7.k=31.scaled=1000.DNA.dup=0.none.sig",
+        "485c3377.k=31.scaled=1000.DNA.dup=0.none.sig",
+    ]
     for filename in outlist:
         assert os.path.exists(c.output(filename))
 
@@ -1469,16 +1541,18 @@ def test_sig_split_3_multisig_sig_gz(runtmp):
     # split 47 and 47+63-multisig.sig with a .sig.gz extension
     c = runtmp
 
-    sig47 = utils.get_test_data('47.fa.sig')
-    multisig = utils.get_test_data('47+63-multisig.sig')
-    c.run_sourmash('sig', 'split', sig47, multisig, '-E', '.sig.gz')
-
-    outlist = ['57e2b22f.k=31.scaled=1000.DNA.dup=0.none.sig.gz',
-               'bde81a41.k=31.scaled=1000.DNA.dup=0.none.sig.gz',
-               'f033bbd8.k=31.scaled=1000.DNA.dup=0.none.sig.gz',
-               '87a9aec4.k=31.scaled=1000.DNA.dup=0.none.sig.gz',
-               '837bf2a7.k=31.scaled=1000.DNA.dup=0.none.sig.gz',
-               '485c3377.k=31.scaled=1000.DNA.dup=0.none.sig.gz']
+    sig47 = utils.get_test_data("47.fa.sig")
+    multisig = utils.get_test_data("47+63-multisig.sig")
+    c.run_sourmash("sig", "split", sig47, multisig, "-E", ".sig.gz")
+
+    outlist = [
+        "57e2b22f.k=31.scaled=1000.DNA.dup=0.none.sig.gz",
+        "bde81a41.k=31.scaled=1000.DNA.dup=0.none.sig.gz",
+        "f033bbd8.k=31.scaled=1000.DNA.dup=0.none.sig.gz",
+        "87a9aec4.k=31.scaled=1000.DNA.dup=0.none.sig.gz",
+        "837bf2a7.k=31.scaled=1000.DNA.dup=0.none.sig.gz",
+        "485c3377.k=31.scaled=1000.DNA.dup=0.none.sig.gz",
+    ]
     for filename in outlist:
         assert os.path.exists(c.output(filename))
 
@@ -1487,16 +1561,18 @@ def test_sig_split_3_multisig_zip(runtmp):
     # split 47 and 47+63-multisig.sig with a .zip extension
     c = runtmp
 
-    sig47 = utils.get_test_data('47.fa.sig')
-    multisig = utils.get_test_data('47+63-multisig.sig')
-    c.run_sourmash('sig', 'split', sig47, multisig, '-E', '.zip')
-
-    outlist = ['57e2b22f.k=31.scaled=1000.DNA.dup=0.none.zip',
-               'bde81a41.k=31.scaled=1000.DNA.dup=0.none.zip',
-               'f033bbd8.k=31.scaled=1000.DNA.dup=0.none.zip',
-               '87a9aec4.k=31.scaled=1000.DNA.dup=0.none.zip',
-               '837bf2a7.k=31.scaled=1000.DNA.dup=0.none.zip',
-               '485c3377.k=31.scaled=1000.DNA.dup=0.none.zip']
+    sig47 = utils.get_test_data("47.fa.sig")
+    multisig = utils.get_test_data("47+63-multisig.sig")
+    c.run_sourmash("sig", "split", sig47, multisig, "-E", ".zip")
+
+    outlist = [
+        "57e2b22f.k=31.scaled=1000.DNA.dup=0.none.zip",
+        "bde81a41.k=31.scaled=1000.DNA.dup=0.none.zip",
+        "f033bbd8.k=31.scaled=1000.DNA.dup=0.none.zip",
+        "87a9aec4.k=31.scaled=1000.DNA.dup=0.none.zip",
+        "837bf2a7.k=31.scaled=1000.DNA.dup=0.none.zip",
+        "485c3377.k=31.scaled=1000.DNA.dup=0.none.zip",
+    ]
     for filename in outlist:
         assert os.path.exists(c.output(filename))
 
@@ -1504,17 +1580,19 @@ def test_sig_split_3_multisig_zip(runtmp):
 @utils.in_tempdir
 def test_sig_split_4_sbt_prot(c):
     # split sbt
-    sbt1 = utils.get_test_data('prot/protein.sbt.zip')
-    sbt2 = utils.get_test_data('prot/dayhoff.sbt.zip')
-    sbt3 = utils.get_test_data('prot/hp.sbt.zip')
-    c.run_sourmash('sig', 'split', sbt1, sbt2, sbt3)
-
-    outlist = ['16869d2c.k=19.scaled=100.protein.dup=0.GCA_001593925.1_ASM159392v1_protein.faa.gz.sig',
-               '120d311c.k=19.scaled=100.protein.dup=0.GCA_001593935.1_ASM159393v1_protein.faa.gz.sig',
-               'fbca5e52.k=19.scaled=100.dayhoff.dup=0.GCA_001593925.1_ASM159392v1_protein.faa.gz.sig',
-               '1cbd888b.k=19.scaled=100.dayhoff.dup=0.GCA_001593935.1_ASM159393v1_protein.faa.gz.sig',
-               'ea2a1ad2.k=19.scaled=100.hp.dup=0.GCA_001593925.1_ASM159392v1_protein.faa.gz.sig',
-               'bb0e6d90.k=19.scaled=100.hp.dup=0.GCA_001593935.1_ASM159393v1_protein.faa.gz.sig']
+    sbt1 = utils.get_test_data("prot/protein.sbt.zip")
+    sbt2 = utils.get_test_data("prot/dayhoff.sbt.zip")
+    sbt3 = utils.get_test_data("prot/hp.sbt.zip")
+    c.run_sourmash("sig", "split", sbt1, sbt2, sbt3)
+
+    outlist = [
+        "16869d2c.k=19.scaled=100.protein.dup=0.GCA_001593925.1_ASM159392v1_protein.faa.gz.sig",
+        "120d311c.k=19.scaled=100.protein.dup=0.GCA_001593935.1_ASM159393v1_protein.faa.gz.sig",
+        "fbca5e52.k=19.scaled=100.dayhoff.dup=0.GCA_001593925.1_ASM159392v1_protein.faa.gz.sig",
+        "1cbd888b.k=19.scaled=100.dayhoff.dup=0.GCA_001593935.1_ASM159393v1_protein.faa.gz.sig",
+        "ea2a1ad2.k=19.scaled=100.hp.dup=0.GCA_001593925.1_ASM159392v1_protein.faa.gz.sig",
+        "bb0e6d90.k=19.scaled=100.hp.dup=0.GCA_001593935.1_ASM159393v1_protein.faa.gz.sig",
+    ]
     for filename in outlist:
         assert os.path.exists(c.output(filename))
 
@@ -1522,20 +1600,22 @@ def test_sig_split_4_sbt_prot(c):
 @utils.in_tempdir
 def test_sig_split_4_lca_prot(c):
     # split lca
-    lca1 = utils.get_test_data('prot/protein.lca.json.gz')
-    lca2 = utils.get_test_data('prot/dayhoff.lca.json.gz')
-    lca3 = utils.get_test_data('prot/hp.lca.json.gz')
-    c.run_sourmash('sig', 'split', lca1, lca2, lca3)
+    lca1 = utils.get_test_data("prot/protein.lca.json.gz")
+    lca2 = utils.get_test_data("prot/dayhoff.lca.json.gz")
+    lca3 = utils.get_test_data("prot/hp.lca.json.gz")
+    c.run_sourmash("sig", "split", lca1, lca2, lca3)
 
     print(c.last_result.out)
     print(c.last_result.err)
 
-    outlist = ['16869d2c.k=19.scaled=100.protein.dup=0.none.sig',
-               '120d311c.k=19.scaled=100.protein.dup=0.none.sig',
-               'fbca5e52.k=19.scaled=100.dayhoff.dup=0.none.sig',
-               '1cbd888b.k=19.scaled=100.dayhoff.dup=0.none.sig',
-               'ea2a1ad2.k=19.scaled=100.hp.dup=0.none.sig',
-               'bb0e6d90.k=19.scaled=100.hp.dup=0.none.sig']
+    outlist = [
+        "16869d2c.k=19.scaled=100.protein.dup=0.none.sig",
+        "120d311c.k=19.scaled=100.protein.dup=0.none.sig",
+        "fbca5e52.k=19.scaled=100.dayhoff.dup=0.none.sig",
+        "1cbd888b.k=19.scaled=100.dayhoff.dup=0.none.sig",
+        "ea2a1ad2.k=19.scaled=100.hp.dup=0.none.sig",
+        "bb0e6d90.k=19.scaled=100.hp.dup=0.none.sig",
+    ]
     for filename in outlist:
         assert os.path.exists(c.output(filename))
 
@@ -1543,23 +1623,25 @@ def test_sig_split_4_lca_prot(c):
 @utils.in_tempdir
 def test_sig_split_5_no_exist(c):
     # no such file
-    with pytest.raises(SourmashCommandFailed) as e:
-        c.run_sourmash('sig', 'split', 'foo')
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash("sig", "split", "foo")
 
 
 def test_sig_split_6_numsigs(runtmp):
     c = runtmp
 
-    sigs11 = utils.get_test_data('genome-s11.fa.gz.sig')
-    c.run_sourmash('sig', 'split', sigs11)
+    sigs11 = utils.get_test_data("genome-s11.fa.gz.sig")
+    c.run_sourmash("sig", "split", sigs11)
 
     print(c.last_result.out)
     print(c.last_result.err)
 
-    outlist = ['1437d8ea.k=21.num=500.DNA.dup=0.genome-s11.fa.gz.sig',
-               '37aea787.k=7.num=500.protein.dup=0.genome-s11.fa.gz.sig',
-               '68c565be.k=30.num=500.DNA.dup=0.genome-s11.fa.gz.sig',
-               '73b6df1c.k=10.num=500.protein.dup=0.genome-s11.fa.gz.sig']
+    outlist = [
+        "1437d8ea.k=21.num=500.DNA.dup=0.genome-s11.fa.gz.sig",
+        "37aea787.k=7.num=500.protein.dup=0.genome-s11.fa.gz.sig",
+        "68c565be.k=30.num=500.DNA.dup=0.genome-s11.fa.gz.sig",
+        "73b6df1c.k=10.num=500.protein.dup=0.genome-s11.fa.gz.sig",
+    ]
 
     for filename in outlist:
         assert os.path.exists(c.output(filename))
@@ -1569,8 +1651,8 @@ def test_sig_extract_1(runtmp):
     c = runtmp
 
     # extract 47 from 47... :)
-    sig47 = utils.get_test_data('47.fa.sig')
-    c.run_sourmash('sig', 'extract', sig47)
+    sig47 = utils.get_test_data("47.fa.sig")
+    c.run_sourmash("sig", "extract", sig47)
 
     # stdout should be new signature
     out = c.last_result.out
@@ -1586,9 +1668,9 @@ def test_sig_extract_1_from_file(runtmp):
     c = runtmp
 
     # extract 47 from 47... :)
-    sig47 = utils.get_test_data('47.fa.sig')
-    from_file = _write_file(runtmp, 'list.txt', [sig47])
-    c.run_sourmash('sig', 'extract', '--from-file', from_file)
+    sig47 = utils.get_test_data("47.fa.sig")
+    from_file = _write_file(runtmp, "list.txt", [sig47])
+    c.run_sourmash("sig", "extract", "--from-file", from_file)
 
     # stdout should be new signature
     out = c.last_result.out
@@ -1602,9 +1684,9 @@ def test_sig_extract_1_from_file(runtmp):
 @utils.in_tempdir
 def test_sig_extract_2(c):
     # extract matches to 47's md5sum from among several
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-    c.run_sourmash('sig', 'extract', sig47, sig63, '--md5', '09a0869')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+    c.run_sourmash("sig", "extract", sig47, sig63, "--md5", "09a0869")
 
     # stdout should be new signature
     out = c.last_result.out
@@ -1621,10 +1703,10 @@ def test_sig_extract_2(c):
 @utils.in_tempdir
 def test_sig_extract_2_zipfile(c):
     # extract matches to 47's md5sum from among several in a zipfile
-    all_zip = utils.get_test_data('prot/all.zip')
-    sig47 = utils.get_test_data('47.fa.sig')
+    all_zip = utils.get_test_data("prot/all.zip")
+    sig47 = utils.get_test_data("47.fa.sig")
 
-    c.run_sourmash('sig', 'extract', all_zip, '--md5', '09a0869')
+    c.run_sourmash("sig", "extract", all_zip, "--md5", "09a0869")
 
     # stdout should be new signature
     out = c.last_result.out
@@ -1641,17 +1723,17 @@ def test_sig_extract_2_zipfile(c):
 @utils.in_tempdir
 def test_sig_extract_3(c):
     # extract nothing (no md5 match)
-    sig47 = utils.get_test_data('47.fa.sig')
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('sig', 'extract', sig47, '--md5', 'FOO')
+    sig47 = utils.get_test_data("47.fa.sig")
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash("sig", "extract", sig47, "--md5", "FOO")
 
 
 @utils.in_tempdir
 def test_sig_extract_4(c):
     # extract matches to 47's name from among several signatures
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-    c.run_sourmash('sig', 'extract', sig47, sig63, '--name', 'NC_009665.1')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+    c.run_sourmash("sig", "extract", sig47, sig63, "--name", "NC_009665.1")
 
     # stdout should be new signature
     out = c.last_result.out
@@ -1668,17 +1750,17 @@ def test_sig_extract_4(c):
 @utils.in_tempdir
 def test_sig_extract_5(c):
     # extract nothing (no name match)
-    sig47 = utils.get_test_data('47.fa.sig')
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('sig', 'extract', sig47, '--name', 'FOO')
+    sig47 = utils.get_test_data("47.fa.sig")
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash("sig", "extract", sig47, "--name", "FOO")
 
 
 @utils.in_tempdir
 def test_sig_extract_6(c):
     # extract matches to several names from among several signatures
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-    c.run_sourmash('sig', 'extract', sig47, sig63, '--name', 'Shewanella')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+    c.run_sourmash("sig", "extract", sig47, sig63, "--name", "Shewanella")
 
     # stdout should be new signature
     out = c.last_result.out
@@ -1692,8 +1774,8 @@ def test_sig_extract_6(c):
 @utils.in_tempdir
 def test_sig_extract_7(c):
     # extract matches based on ksize
-    sig2 = utils.get_test_data('2.fa.sig')
-    c.run_sourmash('sig', 'extract', sig2, '-k', '31')
+    sig2 = utils.get_test_data("2.fa.sig")
+    c.run_sourmash("sig", "extract", sig2, "-k", "31")
 
     # stdout should be new signature
     out = c.last_result.out
@@ -1707,8 +1789,8 @@ def test_sig_extract_7(c):
 @utils.in_tempdir
 def test_sig_extract_7_no_ksize(c):
     # extract all three matches when -k not specified
-    sig2 = utils.get_test_data('2.fa.sig')
-    c.run_sourmash('sig', 'extract', sig2)
+    sig2 = utils.get_test_data("2.fa.sig")
+    c.run_sourmash("sig", "extract", sig2)
 
     # stdout should be new signature
     out = c.last_result.out
@@ -1721,18 +1803,18 @@ def test_sig_extract_7_no_ksize(c):
 
 def test_sig_extract_8_empty_picklist_fail(runtmp):
     # what happens with an empty picklist?
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # make empty picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline=""):
         pass
 
     picklist_arg = f"{picklist_csv}:md5full:md5"
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)
+        runtmp.sourmash("sig", "extract", sig47, sig63, "--picklist", picklist_arg)
 
     err = runtmp.last_result.err
     print(err)
@@ -1742,15 +1824,15 @@ def test_sig_extract_8_empty_picklist_fail(runtmp):
 
 def test_sig_extract_8_nofile_picklist_fail(runtmp):
     # what happens when picklist file does not exist?
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # picklist file does not exist
-    picklist_csv = runtmp.output('pick.csv')
+    picklist_csv = runtmp.output("pick.csv")
     picklist_arg = f"{picklist_csv}:md5full:md5"
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)
+        runtmp.sourmash("sig", "extract", sig47, sig63, "--picklist", picklist_arg)
 
     err = runtmp.last_result.err
     print(err)
@@ -1760,25 +1842,27 @@ def test_sig_extract_8_nofile_picklist_fail(runtmp):
 
 def test_sig_extract_8_picklist_md5(runtmp):
     # extract 47 from 47, using a picklist w/full md5
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # select on any of these attributes
-    row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
-               md5full='09a08691ce52952152f0e866a59f6261',
-               md5short='09a08691ce5295215',
-               fullIdent='NC_009665.1',
-               nodotIdent='NC_009665')
+    row = dict(
+        exactName="NC_009665.1 Shewanella baltica OS185, complete genome",
+        md5full="09a08691ce52952152f0e866a59f6261",
+        md5short="09a08691ce5295215",
+        fullIdent="NC_009665.1",
+        nodotIdent="NC_009665",
+    )
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
         w = csv.DictWriter(csvfp, fieldnames=row.keys())
         w.writeheader()
         w.writerow(row)
 
     picklist_arg = f"{picklist_csv}:md5full:md5"
-    runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)
+    runtmp.sourmash("sig", "extract", sig47, sig63, "--picklist", picklist_arg)
 
     # stdout should be new signature
     out = runtmp.last_result.out
@@ -1799,26 +1883,28 @@ def test_sig_extract_8_picklist_md5(runtmp):
 
 def test_sig_extract_8_picklist_md5_zipfile(runtmp):
     # extract 47 from a zipfile,  using a picklist w/full md5
-    allzip = utils.get_test_data('prot/all.zip')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    allzip = utils.get_test_data("prot/all.zip")
+    sig47 = utils.get_test_data("47.fa.sig")
+    utils.get_test_data("63.fa.sig")
 
     # select on any of these attributes
-    row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
-               md5full='09a08691ce52952152f0e866a59f6261',
-               md5short='09a08691ce5295215',
-               fullIdent='NC_009665.1',
-               nodotIdent='NC_009665')
+    row = dict(
+        exactName="NC_009665.1 Shewanella baltica OS185, complete genome",
+        md5full="09a08691ce52952152f0e866a59f6261",
+        md5short="09a08691ce5295215",
+        fullIdent="NC_009665.1",
+        nodotIdent="NC_009665",
+    )
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
         w = csv.DictWriter(csvfp, fieldnames=row.keys())
         w.writeheader()
         w.writerow(row)
 
     picklist_arg = f"{picklist_csv}:md5full:md5"
-    runtmp.sourmash('sig', 'extract', allzip, '--picklist', picklist_arg)
+    runtmp.sourmash("sig", "extract", allzip, "--picklist", picklist_arg)
 
     # stdout should be new signature
     out = runtmp.last_result.out
@@ -1840,54 +1926,68 @@ def test_sig_extract_8_picklist_md5_zipfile(runtmp):
 def test_sig_extract_8_picklist_md5_lca_fail(runtmp):
     # try to extract 47 from an LCA database, using a picklist w/full md5; will
     # fail.
-    allzip = utils.get_test_data('lca/47+63.lca.json')
+    allzip = utils.get_test_data("lca/47+63.lca.json")
 
     # select on any of these attributes
-    row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
-               md5full='50a9274021e43eda8b2e77f8fa60ae8e',
-               md5short='50a9274021e43eda8b2e77f8fa60ae8e'[:8],
-               fullIdent='NC_009665.1',
-               nodotIdent='NC_009665')
+    row = dict(
+        exactName="NC_009665.1 Shewanella baltica OS185, complete genome",
+        md5full="50a9274021e43eda8b2e77f8fa60ae8e",
+        md5short="50a9274021e43eda8b2e77f8fa60ae8e"[:8],
+        fullIdent="NC_009665.1",
+        nodotIdent="NC_009665",
+    )
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
         w = csv.DictWriter(csvfp, fieldnames=row.keys())
         w.writeheader()
         w.writerow(row)
 
     picklist_arg = f"{picklist_csv}:md5full:md5"
-    with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.sourmash('sig', 'extract', allzip, '--picklist', picklist_arg,
-                        '--md5', '50a9274021e4')
+    with pytest.raises(SourmashCommandFailed):
+        runtmp.sourmash(
+            "sig",
+            "extract",
+            allzip,
+            "--picklist",
+            picklist_arg,
+            "--md5",
+            "50a9274021e4",
+        )
 
     # this happens b/c the implementation of 'extract' uses picklists, and
     # LCA databases don't support multiple picklists.
     print(runtmp.last_result.err)
-    assert "This input collection doesn't support 'extract' with picklists or patterns." in runtmp.last_result.err
+    assert (
+        "This input collection doesn't support 'extract' with picklists or patterns."
+        in runtmp.last_result.err
+    )
 
 
 def test_sig_extract_8_picklist_md5_include(runtmp):
     # extract 47 from 47, using a picklist w/full md5:: explicit include
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # select on any of these attributes
-    row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
-               md5full='09a08691ce52952152f0e866a59f6261',
-               md5short='09a08691ce5295215',
-               fullIdent='NC_009665.1',
-               nodotIdent='NC_009665')
+    row = dict(
+        exactName="NC_009665.1 Shewanella baltica OS185, complete genome",
+        md5full="09a08691ce52952152f0e866a59f6261",
+        md5short="09a08691ce5295215",
+        fullIdent="NC_009665.1",
+        nodotIdent="NC_009665",
+    )
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
         w = csv.DictWriter(csvfp, fieldnames=row.keys())
         w.writeheader()
         w.writerow(row)
 
     picklist_arg = f"{picklist_csv}:md5full:md5:include"
-    runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)
+    runtmp.sourmash("sig", "extract", sig47, sig63, "--picklist", picklist_arg)
 
     # stdout should be new signature
     out = runtmp.last_result.out
@@ -1908,25 +2008,27 @@ def test_sig_extract_8_picklist_md5_include(runtmp):
 
 def test_sig_extract_8_picklist_md5_exclude(runtmp):
     # extract 63 from 47,63 by excluding 47, using a picklist w/full md5
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # select on any of these attributes
-    row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
-               md5full='09a08691ce52952152f0e866a59f6261',
-               md5short='09a08691ce5295215',
-               fullIdent='NC_009665.1',
-               nodotIdent='NC_009665')
+    row = dict(
+        exactName="NC_009665.1 Shewanella baltica OS185, complete genome",
+        md5full="09a08691ce52952152f0e866a59f6261",
+        md5short="09a08691ce5295215",
+        fullIdent="NC_009665.1",
+        nodotIdent="NC_009665",
+    )
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
         w = csv.DictWriter(csvfp, fieldnames=row.keys())
         w.writeheader()
         w.writerow(row)
 
     picklist_arg = f"{picklist_csv}:md5full:md5:exclude"
-    runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)
+    runtmp.sourmash("sig", "extract", sig47, sig63, "--picklist", picklist_arg)
 
     # stdout should be new signature
     out = runtmp.last_result.out
@@ -1948,30 +2050,45 @@ def test_sig_extract_8_picklist_md5_exclude(runtmp):
 def test_sig_extract_8_picklist_md5_require_all(runtmp):
     # extract 47 from 47, using a picklist w/full md5;
     # confirm that check missing picklist val errors out on --picklist-require
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # select on any of these attributes
-    row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
-               md5full='09a08691ce52952152f0e866a59f6261',
-               md5short='09a08691ce5295215',
-               fullIdent='NC_009665.1',
-               nodotIdent='NC_009665')
+    row = dict(
+        exactName="NC_009665.1 Shewanella baltica OS185, complete genome",
+        md5full="09a08691ce52952152f0e866a59f6261",
+        md5short="09a08691ce5295215",
+        fullIdent="NC_009665.1",
+        nodotIdent="NC_009665",
+    )
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
         w = csv.DictWriter(csvfp, fieldnames=row.keys())
         w.writeheader()
         w.writerow(row)
-        w.writerow(dict(exactName='', md5full='BAD MD5',
-                        md5short='', fullIdent='', nodotIdent=''))
+        w.writerow(
+            dict(
+                exactName="",
+                md5full="BAD MD5",
+                md5short="",
+                fullIdent="",
+                nodotIdent="",
+            )
+        )
 
     picklist_arg = f"{picklist_csv}:md5full:md5"
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'extract', sig47, sig63,
-                        '--picklist', picklist_arg,
-                        '--picklist-require-all')
+        runtmp.sourmash(
+            "sig",
+            "extract",
+            sig47,
+            sig63,
+            "--picklist",
+            picklist_arg,
+            "--picklist-require-all",
+        )
 
     # stdout should be new signature
     out = runtmp.last_result.out
@@ -1988,31 +2105,33 @@ def test_sig_extract_8_picklist_md5_require_all(runtmp):
     assert "loaded 1 total that matched ksize & molecule type" in err
     assert "extracted 1 signatures from 2 file(s)" in err
     assert "for given picklist, found 1 matches to 2 distinct values" in err
-    assert 'WARNING: 1 missing picklist values.' in err
-    assert 'ERROR: failing because --picklist-require-all was set' in err
+    assert "WARNING: 1 missing picklist values." in err
+    assert "ERROR: failing because --picklist-require-all was set" in err
 
 
 def test_sig_extract_8_picklist_name(runtmp):
     # extract 47 from 47, using a picklist w/full md5
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # select on any of these attributes
-    row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
-               md5full='09a08691ce52952152f0e866a59f6261',
-               md5short='09a08691ce5295215',
-               fullIdent='NC_009665.1',
-               nodotIdent='NC_009665')
+    row = dict(
+        exactName="NC_009665.1 Shewanella baltica OS185, complete genome",
+        md5full="09a08691ce52952152f0e866a59f6261",
+        md5short="09a08691ce5295215",
+        fullIdent="NC_009665.1",
+        nodotIdent="NC_009665",
+    )
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
         w = csv.DictWriter(csvfp, fieldnames=row.keys())
         w.writeheader()
         w.writerow(row)
 
     picklist_arg = f"{picklist_csv}:exactName:name"
-    runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)
+    runtmp.sourmash("sig", "extract", sig47, sig63, "--picklist", picklist_arg)
 
     # stdout should be new signature
     out = runtmp.last_result.out
@@ -2025,25 +2144,27 @@ def test_sig_extract_8_picklist_name(runtmp):
 
 def test_sig_extract_8_picklist_name_exclude(runtmp):
     # exclude 47 based on name
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # select on any of these attributes
-    row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
-               md5full='09a08691ce52952152f0e866a59f6261',
-               md5short='09a08691ce5295215',
-               fullIdent='NC_009665.1',
-               nodotIdent='NC_009665')
+    row = dict(
+        exactName="NC_009665.1 Shewanella baltica OS185, complete genome",
+        md5full="09a08691ce52952152f0e866a59f6261",
+        md5short="09a08691ce5295215",
+        fullIdent="NC_009665.1",
+        nodotIdent="NC_009665",
+    )
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
         w = csv.DictWriter(csvfp, fieldnames=row.keys())
         w.writeheader()
         w.writerow(row)
 
     picklist_arg = f"{picklist_csv}:exactName:name:exclude"
-    runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)
+    runtmp.sourmash("sig", "extract", sig47, sig63, "--picklist", picklist_arg)
 
     # stdout should be new signature
     out = runtmp.last_result.out
@@ -2056,25 +2177,27 @@ def test_sig_extract_8_picklist_name_exclude(runtmp):
 
 def test_sig_extract_8_picklist_ident(runtmp):
     # extract 47 from 47, using a picklist w/full md5
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # select on any of these attributes
-    row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
-               md5full='09a08691ce52952152f0e866a59f6261',
-               md5short='09a08691ce5295215',
-               fullIdent='NC_009665.1',
-               nodotIdent='NC_009665')
+    row = dict(
+        exactName="NC_009665.1 Shewanella baltica OS185, complete genome",
+        md5full="09a08691ce52952152f0e866a59f6261",
+        md5short="09a08691ce5295215",
+        fullIdent="NC_009665.1",
+        nodotIdent="NC_009665",
+    )
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
         w = csv.DictWriter(csvfp, fieldnames=row.keys())
         w.writeheader()
         w.writerow(row)
 
     picklist_arg = f"{picklist_csv}:fullIdent:ident"
-    runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)
+    runtmp.sourmash("sig", "extract", sig47, sig63, "--picklist", picklist_arg)
 
     # stdout should be new signature
     out = runtmp.last_result.out
@@ -2087,25 +2210,27 @@ def test_sig_extract_8_picklist_ident(runtmp):
 
 def test_sig_extract_8_picklist_ident_exclude(runtmp):
     # exclude 47 based on ident
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # select on any of these attributes
-    row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
-               md5full='09a08691ce52952152f0e866a59f6261',
-               md5short='09a08691ce5295215',
-               fullIdent='NC_009665.1',
-               nodotIdent='NC_009665')
+    row = dict(
+        exactName="NC_009665.1 Shewanella baltica OS185, complete genome",
+        md5full="09a08691ce52952152f0e866a59f6261",
+        md5short="09a08691ce5295215",
+        fullIdent="NC_009665.1",
+        nodotIdent="NC_009665",
+    )
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
         w = csv.DictWriter(csvfp, fieldnames=row.keys())
         w.writeheader()
         w.writerow(row)
 
     picklist_arg = f"{picklist_csv}:fullIdent:ident:exclude"
-    runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)
+    runtmp.sourmash("sig", "extract", sig47, sig63, "--picklist", picklist_arg)
 
     # stdout should be new signature
     out = runtmp.last_result.out
@@ -2118,25 +2243,27 @@ def test_sig_extract_8_picklist_ident_exclude(runtmp):
 
 def test_sig_extract_8_picklist_ident_dot(runtmp):
     # extract 47 from 47, using a picklist w/full md5
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # select on any of these attributes
-    row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
-               md5full='09a08691ce52952152f0e866a59f6261',
-               md5short='09a08691ce5295215',
-               fullIdent='NC_009665.1',
-               nodotIdent='NC_009665')
+    row = dict(
+        exactName="NC_009665.1 Shewanella baltica OS185, complete genome",
+        md5full="09a08691ce52952152f0e866a59f6261",
+        md5short="09a08691ce5295215",
+        fullIdent="NC_009665.1",
+        nodotIdent="NC_009665",
+    )
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
         w = csv.DictWriter(csvfp, fieldnames=row.keys())
         w.writeheader()
         w.writerow(row)
 
     picklist_arg = f"{picklist_csv}:nodotIdent:identprefix"
-    runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)
+    runtmp.sourmash("sig", "extract", sig47, sig63, "--picklist", picklist_arg)
 
     # stdout should be new signature
     out = runtmp.last_result.out
@@ -2149,25 +2276,27 @@ def test_sig_extract_8_picklist_ident_dot(runtmp):
 
 def test_sig_extract_8_picklist_ident_dot_exclude(runtmp):
     # exlude 47 based on identprefix
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # select on any of these attributes
-    row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
-               md5full='09a08691ce52952152f0e866a59f6261',
-               md5short='09a08691ce5295215',
-               fullIdent='NC_009665.1',
-               nodotIdent='NC_009665')
+    row = dict(
+        exactName="NC_009665.1 Shewanella baltica OS185, complete genome",
+        md5full="09a08691ce52952152f0e866a59f6261",
+        md5short="09a08691ce5295215",
+        fullIdent="NC_009665.1",
+        nodotIdent="NC_009665",
+    )
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
         w = csv.DictWriter(csvfp, fieldnames=row.keys())
         w.writeheader()
         w.writerow(row)
 
     picklist_arg = f"{picklist_csv}:nodotIdent:identprefix:exclude"
-    runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)
+    runtmp.sourmash("sig", "extract", sig47, sig63, "--picklist", picklist_arg)
 
     # stdout should be new signature
     out = runtmp.last_result.out
@@ -2180,25 +2309,27 @@ def test_sig_extract_8_picklist_ident_dot_exclude(runtmp):
 
 def test_sig_extract_8_picklist_md5_short(runtmp):
     # extract 47 from 47, using a picklist w/full md5
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # select on any of these attributes
-    row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
-               md5full='09a08691ce52952152f0e866a59f6261',
-               md5short='09a08691ce5295215',
-               fullIdent='NC_009665.1',
-               nodotIdent='NC_009665')
+    row = dict(
+        exactName="NC_009665.1 Shewanella baltica OS185, complete genome",
+        md5full="09a08691ce52952152f0e866a59f6261",
+        md5short="09a08691ce5295215",
+        fullIdent="NC_009665.1",
+        nodotIdent="NC_009665",
+    )
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
         w = csv.DictWriter(csvfp, fieldnames=row.keys())
         w.writeheader()
         w.writerow(row)
 
     picklist_arg = f"{picklist_csv}:md5short:md5prefix8"
-    runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)
+    runtmp.sourmash("sig", "extract", sig47, sig63, "--picklist", picklist_arg)
 
     # stdout should be new signature
     out = runtmp.last_result.out
@@ -2211,25 +2342,27 @@ def test_sig_extract_8_picklist_md5_short(runtmp):
 
 def test_sig_extract_8_picklist_md5_short_exclude(runtmp):
     # exclude 47 based on md5prefix8
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # select on any of these attributes
-    row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
-               md5full='09a08691ce52952152f0e866a59f6261',
-               md5short='09a08691ce5295215',
-               fullIdent='NC_009665.1',
-               nodotIdent='NC_009665')
+    row = dict(
+        exactName="NC_009665.1 Shewanella baltica OS185, complete genome",
+        md5full="09a08691ce52952152f0e866a59f6261",
+        md5short="09a08691ce5295215",
+        fullIdent="NC_009665.1",
+        nodotIdent="NC_009665",
+    )
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
         w = csv.DictWriter(csvfp, fieldnames=row.keys())
         w.writeheader()
         w.writerow(row)
 
     picklist_arg = f"{picklist_csv}:md5short:md5prefix8:exclude"
-    runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)
+    runtmp.sourmash("sig", "extract", sig47, sig63, "--picklist", picklist_arg)
 
     # stdout should be new signature
     out = runtmp.last_result.out
@@ -2242,25 +2375,27 @@ def test_sig_extract_8_picklist_md5_short_exclude(runtmp):
 
 def test_sig_extract_8_picklist_md5_short_alias(runtmp):
     # extract 47 from 47, using a picklist w/full md5
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # select on any of these attributes
-    row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
-               md5full='09a08691ce52952152f0e866a59f6261',
-               md5short='09a08691ce5295215',
-               fullIdent='NC_009665.1',
-               nodotIdent='NC_009665')
+    row = dict(
+        exactName="NC_009665.1 Shewanella baltica OS185, complete genome",
+        md5full="09a08691ce52952152f0e866a59f6261",
+        md5short="09a08691ce5295215",
+        fullIdent="NC_009665.1",
+        nodotIdent="NC_009665",
+    )
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
         w = csv.DictWriter(csvfp, fieldnames=row.keys())
         w.writeheader()
         w.writerow(row)
 
     picklist_arg = f"{picklist_csv}:md5short:md5short"
-    runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)
+    runtmp.sourmash("sig", "extract", sig47, sig63, "--picklist", picklist_arg)
 
     # stdout should be new signature
     out = runtmp.last_result.out
@@ -2273,25 +2408,27 @@ def test_sig_extract_8_picklist_md5_short_alias(runtmp):
 
 def test_sig_extract_8_picklist_md5_short_alias_exclude(runtmp):
     # exlude 47 based on md5prefix8 alias, md5short
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # select on any of these attributes
-    row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
-               md5full='09a08691ce52952152f0e866a59f6261',
-               md5short='09a08691ce5295215',
-               fullIdent='NC_009665.1',
-               nodotIdent='NC_009665')
+    row = dict(
+        exactName="NC_009665.1 Shewanella baltica OS185, complete genome",
+        md5full="09a08691ce52952152f0e866a59f6261",
+        md5short="09a08691ce5295215",
+        fullIdent="NC_009665.1",
+        nodotIdent="NC_009665",
+    )
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
         w = csv.DictWriter(csvfp, fieldnames=row.keys())
         w.writeheader()
         w.writerow(row)
 
     picklist_arg = f"{picklist_csv}:md5short:md5short:exclude"
-    runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)
+    runtmp.sourmash("sig", "extract", sig47, sig63, "--picklist", picklist_arg)
 
     # stdout should be new signature
     out = runtmp.last_result.out
@@ -2304,57 +2441,63 @@ def test_sig_extract_8_picklist_md5_short_alias_exclude(runtmp):
 
 def test_sig_extract_8_picklist_md5_short_alias_with_md5_selector_nomatch(runtmp):
     # extract 47 from 47, using a picklist w/full md5 and also md5 selector
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # select on any of these attributes
-    row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
-               md5full='09a08691ce52952152f0e866a59f6261',
-               md5short='09a08691ce5295215',
-               fullIdent='NC_009665.1',
-               nodotIdent='NC_009665')
+    row = dict(
+        exactName="NC_009665.1 Shewanella baltica OS185, complete genome",
+        md5full="09a08691ce52952152f0e866a59f6261",
+        md5short="09a08691ce5295215",
+        fullIdent="NC_009665.1",
+        nodotIdent="NC_009665",
+    )
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
         w = csv.DictWriter(csvfp, fieldnames=row.keys())
         w.writeheader()
         w.writerow(row)
 
     picklist_arg = f"{picklist_csv}:md5short:md5short"
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'extract', sig47, sig63,
-                        '--picklist', picklist_arg,
-                        '--md5', 'XXX') # no match to md5 selector here
+        runtmp.sourmash(
+            "sig", "extract", sig47, sig63, "--picklist", picklist_arg, "--md5", "XXX"
+        )  # no match to md5 selector here
 
     err = runtmp.last_result.err
     assert "no matching signatures to save!" in err
 
 
-def test_sig_extract_8_picklist_md5_short_alias_with_md5_selector_nomatch_exclude(runtmp):
+def test_sig_extract_8_picklist_md5_short_alias_with_md5_selector_nomatch_exclude(
+    runtmp,
+):
     # exclude 47 using a picklist w/full md5 and also md5 selector
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # select on any of these attributes
-    row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
-               md5full='09a08691ce52952152f0e866a59f6261',
-               md5short='09a08691ce5295215',
-               fullIdent='NC_009665.1',
-               nodotIdent='NC_009665')
+    row = dict(
+        exactName="NC_009665.1 Shewanella baltica OS185, complete genome",
+        md5full="09a08691ce52952152f0e866a59f6261",
+        md5short="09a08691ce5295215",
+        fullIdent="NC_009665.1",
+        nodotIdent="NC_009665",
+    )
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
         w = csv.DictWriter(csvfp, fieldnames=row.keys())
         w.writeheader()
         w.writerow(row)
 
     picklist_arg = f"{picklist_csv}:md5short:md5short:exclude"
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'extract', sig47, sig63,
-                        '--picklist', picklist_arg,
-                        '--md5', 'XXX') # no match to md5 selector here
+        runtmp.sourmash(
+            "sig", "extract", sig47, sig63, "--picklist", picklist_arg, "--md5", "XXX"
+        )  # no match to md5 selector here
 
     err = runtmp.last_result.err
     assert "no matching signatures to save!" in err
@@ -2362,26 +2505,36 @@ def test_sig_extract_8_picklist_md5_short_alias_with_md5_selector_nomatch_exclud
 
 def test_sig_extract_8_picklist_md5_short_alias_with_md5_selector(runtmp):
     # extract 47 from 47, using a picklist w/full md5 and also md5 selector
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # select on any of these attributes
-    row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
-               md5full='09a08691ce52952152f0e866a59f6261',
-               md5short='09a08691ce5295215',
-               fullIdent='NC_009665.1',
-               nodotIdent='NC_009665')
+    row = dict(
+        exactName="NC_009665.1 Shewanella baltica OS185, complete genome",
+        md5full="09a08691ce52952152f0e866a59f6261",
+        md5short="09a08691ce5295215",
+        fullIdent="NC_009665.1",
+        nodotIdent="NC_009665",
+    )
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
         w = csv.DictWriter(csvfp, fieldnames=row.keys())
         w.writeheader()
         w.writerow(row)
 
     picklist_arg = f"{picklist_csv}:md5short:md5short"
-    runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg,
-                    '--md5', '09a08691ce5295215')
+    runtmp.sourmash(
+        "sig",
+        "extract",
+        sig47,
+        sig63,
+        "--picklist",
+        picklist_arg,
+        "--md5",
+        "09a08691ce5295215",
+    )
 
     # stdout should be new signature
     out = runtmp.last_result.out
@@ -2391,54 +2544,64 @@ def test_sig_extract_8_picklist_md5_short_alias_with_md5_selector(runtmp):
 
     assert actual_extract_sig == test_extract_sig
 
+
 def test_sig_extract_8_picklist_md5_short_alias_with_md5_selector_exclude(runtmp):
     # exclude 47, using a picklist w/full md5; but try to select with md5 selector
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # select on any of these attributes
-    row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
-               md5full='09a08691ce52952152f0e866a59f6261',
-               md5short='09a08691ce5295215',
-               fullIdent='NC_009665.1',
-               nodotIdent='NC_009665')
+    row = dict(
+        exactName="NC_009665.1 Shewanella baltica OS185, complete genome",
+        md5full="09a08691ce52952152f0e866a59f6261",
+        md5short="09a08691ce5295215",
+        fullIdent="NC_009665.1",
+        nodotIdent="NC_009665",
+    )
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
         w = csv.DictWriter(csvfp, fieldnames=row.keys())
         w.writeheader()
         w.writerow(row)
 
     picklist_arg = f"{picklist_csv}:md5short:md5short:exclude"
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg,
-                    '--md5', '09a08691ce5295215')
+        runtmp.sourmash(
+            "sig",
+            "extract",
+            sig47,
+            sig63,
+            "--picklist",
+            picklist_arg,
+            "--md5",
+            "09a08691ce5295215",
+        )
 
     # NTP: do we want to emit a more informative "conflicting selectors" type of msg?
     err = runtmp.last_result.err
     print(err)
     assert "loaded 1 distinct values into picklist." in err
     assert "loaded 1 total that matched ksize & molecule type" in err
-    assert 'no matching signatures to save!' in err
+    assert "no matching signatures to save!" in err
 
 
 def test_sig_extract_8_picklist_md5_nomatch(runtmp):
     # use an empty picklist => no match
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
-        w = csv.DictWriter(csvfp, fieldnames=['md5short'])
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
+        w = csv.DictWriter(csvfp, fieldnames=["md5short"])
         w.writeheader()
 
     picklist_arg = f"{picklist_csv}:md5short:md5prefix8"
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist',
-                        picklist_arg)
+        runtmp.sourmash("sig", "extract", sig47, sig63, "--picklist", picklist_arg)
 
     # stdout should be new signature
     out = runtmp.last_result.out
@@ -2451,19 +2614,18 @@ def test_sig_extract_8_picklist_md5_nomatch(runtmp):
 
 def test_sig_extract_8_picklist_md5_nomatch_exclude(runtmp):
     # use an empty picklist to exclude => no match => include everything
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
-        w = csv.DictWriter(csvfp, fieldnames=['md5short'])
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
+        w = csv.DictWriter(csvfp, fieldnames=["md5short"])
         w.writeheader()
 
     picklist_arg = f"{picklist_csv}:md5short:md5prefix8:exclude"
 
-    runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist',
-                        picklist_arg)
+    runtmp.sourmash("sig", "extract", sig47, sig63, "--picklist", picklist_arg)
 
     # stdout should be both signatures
     out = runtmp.last_result.out
@@ -2478,91 +2640,94 @@ def test_sig_extract_8_picklist_md5_nomatch_exclude(runtmp):
     err = runtmp.last_result.err
     print(err)
     assert runtmp.last_result.status == 0
-    assert 'loaded 0 distinct values into picklist.' in err
-    assert 'loaded 2 total that matched ksize & molecule type' in err
-    assert 'extracted 2 signatures from 2 file(s)' in err
-    assert 'for given picklist, found 2 matches by excluding 0 distinct values' in err
+    assert "loaded 0 distinct values into picklist." in err
+    assert "loaded 2 total that matched ksize & molecule type" in err
+    assert "extracted 2 signatures from 2 file(s)" in err
+    assert "for given picklist, found 2 matches by excluding 0 distinct values" in err
 
 
 def test_sig_extract_9_picklist_md5_ksize_hp_select(runtmp):
     # test with -k and moltype selector
-    sigdir = utils.get_test_data('prot/')
+    sigdir = utils.get_test_data("prot/")
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
-        w = csv.DictWriter(csvfp, fieldnames=['md5'])
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
+        w = csv.DictWriter(csvfp, fieldnames=["md5"])
         w.writeheader()
-        w.writerow(dict(md5='ea2a1ad233c2908529d124a330bcb672'))
+        w.writerow(dict(md5="ea2a1ad233c2908529d124a330bcb672"))
 
     picklist_arg = f"{picklist_csv}:md5:md5"
 
-    runtmp.sourmash('sig', 'extract', sigdir, '--picklist',
-                    picklist_arg, '-k', '19', '--hp')
+    runtmp.sourmash(
+        "sig", "extract", sigdir, "--picklist", picklist_arg, "-k", "19", "--hp"
+    )
 
     # stdout should be new signature
     out = runtmp.last_result.out
     actual_extract_sig = sourmash.load_one_signature(out)
 
     print(actual_extract_sig.md5sum)
-    assert str(actual_extract_sig) == 'GCA_001593925'
-    assert actual_extract_sig.md5sum() == 'ea2a1ad233c2908529d124a330bcb672'
+    assert str(actual_extract_sig) == "GCA_001593925"
+    assert actual_extract_sig.md5sum() == "ea2a1ad233c2908529d124a330bcb672"
     assert actual_extract_sig.minhash.ksize == 19
-    assert actual_extract_sig.minhash.moltype == 'hp'
+    assert actual_extract_sig.minhash.moltype == "hp"
 
 
 def test_sig_extract_9_picklist_md5_ksize_hp_select_exclude(runtmp):
     # test picklist exclude with -k and moltype selector
-    sigdir = utils.get_test_data('prot/')
+    sigdir = utils.get_test_data("prot/")
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
-        w = csv.DictWriter(csvfp, fieldnames=['md5'])
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
+        w = csv.DictWriter(csvfp, fieldnames=["md5"])
         w.writeheader()
-        w.writerow(dict(md5='ea2a1ad233c2908529d124a330bcb672'))
+        w.writerow(dict(md5="ea2a1ad233c2908529d124a330bcb672"))
 
     picklist_arg = f"{picklist_csv}:md5:md5:exclude"
 
-    runtmp.sourmash('sig', 'extract', sigdir, '--picklist',
-                    picklist_arg, '-k', '19', '--hp')
+    runtmp.sourmash(
+        "sig", "extract", sigdir, "--picklist", picklist_arg, "-k", "19", "--hp"
+    )
 
     # stdout should be new signature
     out = runtmp.last_result.out
     actual_extract_sig = sourmash.load_one_signature(out)
     print(actual_extract_sig.md5sum)
 
-    assert str(actual_extract_sig) == 'GCA_001593935'
-    assert actual_extract_sig.md5sum() == 'bb0e6d90df01b7bd5d0956a5f9e3ed12'
+    assert str(actual_extract_sig) == "GCA_001593935"
+    assert actual_extract_sig.md5sum() == "bb0e6d90df01b7bd5d0956a5f9e3ed12"
     assert actual_extract_sig.minhash.ksize == 19
-    assert actual_extract_sig.minhash.moltype == 'hp'
+    assert actual_extract_sig.minhash.moltype == "hp"
 
 
 def test_sig_extract_10_picklist_md5_dups_and_empty(runtmp):
     # test empty picklist values, and duplicate picklist values
-    sigdir = utils.get_test_data('prot/')
+    sigdir = utils.get_test_data("prot/")
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
-        w = csv.DictWriter(csvfp, fieldnames=['md5'])
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
+        w = csv.DictWriter(csvfp, fieldnames=["md5"])
         w.writeheader()
-        w.writerow(dict(md5='ea2a1ad233c2908529d124a330bcb672'))
-        w.writerow(dict(md5='ea2a1ad233c2908529d124a330bcb672'))
-        w.writerow(dict(md5=''))
+        w.writerow(dict(md5="ea2a1ad233c2908529d124a330bcb672"))
+        w.writerow(dict(md5="ea2a1ad233c2908529d124a330bcb672"))
+        w.writerow(dict(md5=""))
 
     picklist_arg = f"{picklist_csv}:md5:md5"
 
-    runtmp.sourmash('sig', 'extract', sigdir, '--picklist',
-                    picklist_arg, '-k', '19', '--hp')
+    runtmp.sourmash(
+        "sig", "extract", sigdir, "--picklist", picklist_arg, "-k", "19", "--hp"
+    )
 
     # stdout should be new signature
     out = runtmp.last_result.out
     actual_extract_sig = sourmash.load_one_signature(out)
 
     assert actual_extract_sig.minhash.ksize == 19
-    assert actual_extract_sig.minhash.moltype == 'hp'
-    assert actual_extract_sig.md5sum() == 'ea2a1ad233c2908529d124a330bcb672'
+    assert actual_extract_sig.minhash.moltype == "hp"
+    assert actual_extract_sig.md5sum() == "ea2a1ad233c2908529d124a330bcb672"
 
     err = runtmp.last_result.err
     print(err)
@@ -2573,29 +2738,30 @@ def test_sig_extract_10_picklist_md5_dups_and_empty(runtmp):
 
 def test_sig_extract_10_picklist_md5_dups_and_empty_exclude(runtmp):
     # test empty picklist values, and duplicate picklist values for exclude
-    sigdir = utils.get_test_data('prot/')
+    sigdir = utils.get_test_data("prot/")
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
-        w = csv.DictWriter(csvfp, fieldnames=['md5'])
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
+        w = csv.DictWriter(csvfp, fieldnames=["md5"])
         w.writeheader()
-        w.writerow(dict(md5='ea2a1ad233c2908529d124a330bcb672'))
-        w.writerow(dict(md5='ea2a1ad233c2908529d124a330bcb672'))
-        w.writerow(dict(md5=''))
+        w.writerow(dict(md5="ea2a1ad233c2908529d124a330bcb672"))
+        w.writerow(dict(md5="ea2a1ad233c2908529d124a330bcb672"))
+        w.writerow(dict(md5=""))
 
     picklist_arg = f"{picklist_csv}:md5:md5:exclude"
 
-    runtmp.sourmash('sig', 'extract', sigdir, '--picklist',
-                    picklist_arg, '-k', '19', '--hp')
+    runtmp.sourmash(
+        "sig", "extract", sigdir, "--picklist", picklist_arg, "-k", "19", "--hp"
+    )
 
     # stdout should be new signature
     out = runtmp.last_result.out
     actual_extract_sig = sourmash.load_one_signature(out)
 
     assert actual_extract_sig.minhash.ksize == 19
-    assert actual_extract_sig.minhash.moltype == 'hp'
-    assert actual_extract_sig.md5sum() == 'bb0e6d90df01b7bd5d0956a5f9e3ed12'
+    assert actual_extract_sig.minhash.moltype == "hp"
+    assert actual_extract_sig.md5sum() == "bb0e6d90df01b7bd5d0956a5f9e3ed12"
 
     err = runtmp.last_result.err
     print(err)
@@ -2606,20 +2772,21 @@ def test_sig_extract_10_picklist_md5_dups_and_empty_exclude(runtmp):
 
 def test_sig_extract_11_picklist_bad_coltype(runtmp):
     # test with invalid picklist coltype
-    sigdir = utils.get_test_data('prot/')
+    sigdir = utils.get_test_data("prot/")
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
-        w = csv.DictWriter(csvfp, fieldnames=['md5'])
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
+        w = csv.DictWriter(csvfp, fieldnames=["md5"])
         w.writeheader()
-        w.writerow(dict(md5='ea2a1ad233c2908529d124a330bcb672'))
+        w.writerow(dict(md5="ea2a1ad233c2908529d124a330bcb672"))
 
     picklist_arg = f"{picklist_csv}:md5:BADCOLTYPE"
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'extract', sigdir, '--picklist',
-                        picklist_arg, '-k', '19', '--hp')
+        runtmp.sourmash(
+            "sig", "extract", sigdir, "--picklist", picklist_arg, "-k", "19", "--hp"
+        )
 
     err = runtmp.last_result.err
     print(err)
@@ -2628,20 +2795,21 @@ def test_sig_extract_11_picklist_bad_coltype(runtmp):
 
 def test_sig_extract_11_picklist_bad_coltype_exclude(runtmp):
     # test with invalid picklist coltype
-    sigdir = utils.get_test_data('prot/')
+    sigdir = utils.get_test_data("prot/")
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
-        w = csv.DictWriter(csvfp, fieldnames=['md5'])
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
+        w = csv.DictWriter(csvfp, fieldnames=["md5"])
         w.writeheader()
-        w.writerow(dict(md5='ea2a1ad233c2908529d124a330bcb672'))
+        w.writerow(dict(md5="ea2a1ad233c2908529d124a330bcb672"))
 
     picklist_arg = f"{picklist_csv}:md5:BADCOLTYPE:exclude"
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'extract', sigdir, '--picklist',
-                        picklist_arg, '-k', '19', '--hp')
+        runtmp.sourmash(
+            "sig", "extract", sigdir, "--picklist", picklist_arg, "-k", "19", "--hp"
+        )
 
     err = runtmp.last_result.err
     print(err)
@@ -2650,20 +2818,21 @@ def test_sig_extract_11_picklist_bad_coltype_exclude(runtmp):
 
 def test_sig_extract_12_picklist_bad_argstr(runtmp):
     # test with invalid argument format to --picklist
-    sigdir = utils.get_test_data('prot/')
+    sigdir = utils.get_test_data("prot/")
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
-        w = csv.DictWriter(csvfp, fieldnames=['md5'])
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
+        w = csv.DictWriter(csvfp, fieldnames=["md5"])
         w.writeheader()
-        w.writerow(dict(md5='ea2a1ad233c2908529d124a330bcb672'))
+        w.writerow(dict(md5="ea2a1ad233c2908529d124a330bcb672"))
 
     picklist_arg = f"{picklist_csv}"
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'extract', sigdir, '--picklist',
-                        picklist_arg, '-k', '19', '--hp')
+        runtmp.sourmash(
+            "sig", "extract", sigdir, "--picklist", picklist_arg, "-k", "19", "--hp"
+        )
 
     err = runtmp.last_result.err
     print(err)
@@ -2672,42 +2841,47 @@ def test_sig_extract_12_picklist_bad_argstr(runtmp):
 
 def test_sig_extract_12_picklist_bad_pickstyle(runtmp):
     # test with invalid argument format to --picklist
-    sigdir = utils.get_test_data('prot/')
+    sigdir = utils.get_test_data("prot/")
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
-        w = csv.DictWriter(csvfp, fieldnames=['md5'])
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
+        w = csv.DictWriter(csvfp, fieldnames=["md5"])
         w.writeheader()
-        w.writerow(dict(md5='ea2a1ad233c2908529d124a330bcb672'))
+        w.writerow(dict(md5="ea2a1ad233c2908529d124a330bcb672"))
 
     picklist_arg = f"{picklist_csv}:md5:md5:XXX"
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'extract', sigdir, '--picklist',
-                        picklist_arg, '-k', '19', '--hp')
+        runtmp.sourmash(
+            "sig", "extract", sigdir, "--picklist", picklist_arg, "-k", "19", "--hp"
+        )
 
     err = runtmp.last_result.err
     print(err)
-    assert "invalid picklist 'pickstyle' argument 4: 'XXX' must be 'include' or 'exclude'" in err
+    assert (
+        "invalid picklist 'pickstyle' argument 4: 'XXX' must be 'include' or 'exclude'"
+        in err
+    )
 
 
 def test_sig_extract_12_picklist_bad_colname(runtmp):
     # test with invalid picklist colname
-    sigdir = utils.get_test_data('prot/')
+    sigdir = utils.get_test_data("prot/")
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
-        w = csv.DictWriter(csvfp, fieldnames=['md5'])
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
+        w = csv.DictWriter(csvfp, fieldnames=["md5"])
         w.writeheader()
-        w.writerow(dict(md5='ea2a1ad233c2908529d124a330bcb672'))
+        w.writerow(dict(md5="ea2a1ad233c2908529d124a330bcb672"))
 
     picklist_arg = f"{picklist_csv}:BADCOLNAME:md5"
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'extract', sigdir, '--picklist',
-                        picklist_arg, '-k', '19', '--hp')
+        runtmp.sourmash(
+            "sig", "extract", sigdir, "--picklist", picklist_arg, "-k", "19", "--hp"
+        )
 
     err = runtmp.last_result.err
     print(err)
@@ -2716,20 +2890,21 @@ def test_sig_extract_12_picklist_bad_colname(runtmp):
 
 def test_sig_extract_12_picklist_bad_colname_exclude(runtmp):
     # test with invalid picklist colname
-    sigdir = utils.get_test_data('prot/')
+    sigdir = utils.get_test_data("prot/")
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
-        w = csv.DictWriter(csvfp, fieldnames=['md5'])
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
+        w = csv.DictWriter(csvfp, fieldnames=["md5"])
         w.writeheader()
-        w.writerow(dict(md5='ea2a1ad233c2908529d124a330bcb672'))
+        w.writerow(dict(md5="ea2a1ad233c2908529d124a330bcb672"))
 
     picklist_arg = f"{picklist_csv}:BADCOLNAME:md5:exclude"
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'extract', sigdir, '--picklist',
-                        picklist_arg, '-k', '19', '--hp')
+        runtmp.sourmash(
+            "sig", "extract", sigdir, "--picklist", picklist_arg, "-k", "19", "--hp"
+        )
 
     err = runtmp.last_result.err
     print(err)
@@ -2738,45 +2913,47 @@ def test_sig_extract_12_picklist_bad_colname_exclude(runtmp):
 
 def test_sig_extract_11_pattern_include(runtmp):
     # test --include-db-pattern
-    sigfiles = glob.glob(utils.get_test_data('prot/*.zip'))
+    sigfiles = glob.glob(utils.get_test_data("prot/*.zip"))
 
-    runtmp.sourmash('sig', 'extract', '--include', 'shewanella', *sigfiles,
-                    '-o', 'out.zip')
+    runtmp.sourmash(
+        "sig", "extract", "--include", "shewanella", *sigfiles, "-o", "out.zip"
+    )
 
-    idx = sourmash.load_file_as_index(runtmp.output('out.zip'))
+    idx = sourmash.load_file_as_index(runtmp.output("out.zip"))
     assert len(idx) == 2
-    names = [ ss.name for ss in idx.signatures() ]
+    names = [ss.name for ss in idx.signatures()]
     for n in names:
-        assert 'shewanella' in n.lower(), n
+        assert "shewanella" in n.lower(), n
 
 
 def test_sig_extract_11_pattern_exclude(runtmp):
     # test --exclude-db-pattern
-    sigfiles = glob.glob(utils.get_test_data('prot/*.zip'))
+    sigfiles = glob.glob(utils.get_test_data("prot/*.zip"))
 
-    runtmp.sourmash('sig', 'extract', '--exclude', 'shewanella', *sigfiles,
-                    '-o', 'out.zip')
+    runtmp.sourmash(
+        "sig", "extract", "--exclude", "shewanella", *sigfiles, "-o", "out.zip"
+    )
 
-    idx = sourmash.load_file_as_index(runtmp.output('out.zip'))
+    idx = sourmash.load_file_as_index(runtmp.output("out.zip"))
     assert len(idx) == 18
-    names = [ ss.name for ss in idx.signatures() ]
+    names = [ss.name for ss in idx.signatures()]
     for n in names:
-        assert 'shewanella' not in n.lower(), n
+        assert "shewanella" not in n.lower(), n
 
 
 def test_sig_extract_identical_md5s(runtmp):
     # test that we properly handle different signatures with identical md5s
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss = load_signatures(sig47)
     sig = list(ss)[0]
     new_sig = sig.to_mutable()
-    new_sig.name = 'foo'
-    sig47foo = runtmp.output('foo.sig')
+    new_sig.name = "foo"
+    sig47foo = runtmp.output("foo.sig")
     # this was only a problem when the signatures are stored in the same file
-    with open(sig47foo, 'wt') as fp:
+    with open(sig47foo, "w") as fp:
         sourmash.save_signatures([new_sig, sig], fp)
 
-    runtmp.run_sourmash('sig', 'extract', '--name', 'foo', sig47foo)
+    runtmp.run_sourmash("sig", "extract", "--name", "foo", sig47foo)
 
     out = runtmp.last_result.out
     print(out)
@@ -2784,18 +2961,18 @@ def test_sig_extract_identical_md5s(runtmp):
     ss = list(ss)
     assert len(ss) == 1
     ss = ss[0]
-    assert 'Shewanella' not in ss.name
-    assert 'foo' in ss.name
-    assert ss.md5sum() == '09a08691ce52952152f0e866a59f6261'
+    assert "Shewanella" not in ss.name
+    assert "foo" in ss.name
+    assert ss.md5sum() == "09a08691ce52952152f0e866a59f6261"
 
 
 def test_sig_flatten_1(runtmp):
     c = runtmp
 
     # extract matches to several names from among several signatures & flatten
-    sig47abund = utils.get_test_data('track_abund/47.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    c.run_sourmash('sig', 'flatten', sig47abund, '--name', 'Shewanella')
+    sig47abund = utils.get_test_data("track_abund/47.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    c.run_sourmash("sig", "flatten", sig47abund, "--name", "Shewanella")
 
     # stdout should be new signature
     out = c.last_result.out
@@ -2813,14 +2990,20 @@ def test_sig_flatten_1_from_file(runtmp):
     c = runtmp
 
     # extract matches to several names from among several signatures & flatten
-    sig47abund = utils.get_test_data('track_abund/47.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47abund = utils.get_test_data("track_abund/47.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
 
-    from_file = _write_file(runtmp, 'list.txt', [sig47abund])
-    picklist = _write_file(runtmp, 'pl.csv', ['md5short', '09a08691'])
+    from_file = _write_file(runtmp, "list.txt", [sig47abund])
+    picklist = _write_file(runtmp, "pl.csv", ["md5short", "09a08691"])
 
-    c.run_sourmash('sig', 'flatten', '--from-file', from_file,
-                   '--picklist', f'{picklist}:md5short:md5short')
+    c.run_sourmash(
+        "sig",
+        "flatten",
+        "--from-file",
+        from_file,
+        "--picklist",
+        f"{picklist}:md5short:md5short",
+    )
 
     # stdout should be new signature
     out = c.last_result.out
@@ -2837,10 +3020,10 @@ def test_sig_flatten_1_from_file(runtmp):
 @utils.in_tempdir
 def test_sig_flatten_1_select_name(c):
     # extract matches to several names from among several signatures & flatten
-    sig47abund = utils.get_test_data('track_abund/47.fa.sig')
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    c.run_sourmash('sig', 'flatten', sig2, sig47abund, '--name', 'Shewanella')
+    sig47abund = utils.get_test_data("track_abund/47.fa.sig")
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    c.run_sourmash("sig", "flatten", sig2, sig47abund, "--name", "Shewanella")
 
     # stdout should be new signature
     out = c.last_result.out
@@ -2858,10 +3041,10 @@ def test_sig_flatten_1_select_md5(runtmp):
     c = runtmp
 
     # extract matches to several names from among several signatures & flatten
-    sig47abund = utils.get_test_data('track_abund/47.fa.sig')
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    c.run_sourmash('sig', 'flatten', sig2, sig47abund, '--md5', '09a08691c')
+    sig47abund = utils.get_test_data("track_abund/47.fa.sig")
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    c.run_sourmash("sig", "flatten", sig2, sig47abund, "--md5", "09a08691c")
 
     # stdout should be new signature
     out = c.last_result.out
@@ -2878,8 +3061,8 @@ def test_sig_flatten_1_select_md5(runtmp):
 def test_sig_flatten_2_ksize(runtmp):
     c = runtmp
     # flatten only one signature selected using ksize
-    psw_mag = utils.get_test_data('lca/TARA_PSW_MAG_00136.sig')
-    c.run_sourmash('sig', 'flatten', psw_mag, '-k', '31')
+    psw_mag = utils.get_test_data("lca/TARA_PSW_MAG_00136.sig")
+    c.run_sourmash("sig", "flatten", psw_mag, "-k", "31")
 
     # stdout should be new signature
     out = c.last_result.out
@@ -2893,8 +3076,8 @@ def test_sig_flatten_2_ksize(runtmp):
 @utils.in_tempdir
 def test_sig_downsample_1_scaled(c):
     # downsample a scaled signature
-    sig47 = utils.get_test_data('47.fa.sig')
-    c.run_sourmash('sig', 'downsample', '--scaled', '10000', sig47)
+    sig47 = utils.get_test_data("47.fa.sig")
+    c.run_sourmash("sig", "downsample", "--scaled", "10000", sig47)
 
     # stdout should be new signature
     out = c.last_result.out
@@ -2910,8 +3093,8 @@ def test_sig_downsample_1_scaled(c):
 @utils.in_tempdir
 def test_sig_downsample_1_scaled_downsample_multisig(c):
     # downsample many scaled signatures in one file
-    multisig = utils.get_test_data('47+63-multisig.sig')
-    c.run_sourmash('sig', 'downsample', '--scaled', '10000', multisig)
+    multisig = utils.get_test_data("47+63-multisig.sig")
+    c.run_sourmash("sig", "downsample", "--scaled", "10000", multisig)
 
     # stdout should be new signatures
     out = c.last_result.out
@@ -2923,8 +3106,8 @@ def test_sig_downsample_1_scaled_downsample_multisig(c):
 @utils.in_tempdir
 def test_sig_downsample_1_scaled_to_num(c):
     # downsample a scaled signature
-    sig47 = utils.get_test_data('47.fa.sig')
-    c.run_sourmash('sig', 'downsample', '--num', '500', sig47)
+    sig47 = utils.get_test_data("47.fa.sig")
+    c.run_sourmash("sig", "downsample", "--num", "500", sig47)
 
     # stdout should be new signature
     out = c.last_result.out
@@ -2938,70 +3121,72 @@ def test_sig_downsample_1_scaled_to_num(c):
     test_mins = test_downsample_sig.minhash.hashes.keys()
     test_mins = list(test_mins)
     test_mins.sort()
-    test_mins = test_mins[:500]           # take 500 smallest
+    test_mins = test_mins[:500]  # take 500 smallest
 
     assert actual_mins == test_mins
 
 
 def test_sig_downsample_check_num_bounds_negative(runtmp):
-    c=runtmp
-    sig47 = utils.get_test_data('47.fa.sig')
+    c = runtmp
+    sig47 = utils.get_test_data("47.fa.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('sig', 'downsample', '--num', '-5', sig47)
+        c.run_sourmash("sig", "downsample", "--num", "-5", sig47)
 
     assert "ERROR: num value must be positive" in c.last_result.err
 
 
 def test_sig_downsample_check_num_bounds_less_than_minimum(runtmp):
-    c=runtmp
-    sig47 = utils.get_test_data('47.fa.sig')
+    c = runtmp
+    sig47 = utils.get_test_data("47.fa.sig")
 
-    c.run_sourmash('sig', 'downsample', '--num', '25', sig47)
+    c.run_sourmash("sig", "downsample", "--num", "25", sig47)
 
     assert "WARNING: num value should be >= 50. Continuing anyway." in c.last_result.err
 
 
 def test_sig_downsample_check_num_bounds_more_than_maximum(runtmp):
-    c=runtmp
-    sig47 = utils.get_test_data('47.fa.sig')
+    c = runtmp
+    sig47 = utils.get_test_data("47.fa.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('sig', 'downsample', '--num', '100000', sig47)
+        c.run_sourmash("sig", "downsample", "--num", "100000", sig47)
 
-    assert "WARNING: num value should be <= 50000. Continuing anyway." in c.last_result.err
+    assert (
+        "WARNING: num value should be <= 50000. Continuing anyway." in c.last_result.err
+    )
 
 
 @utils.in_tempdir
 def test_sig_downsample_1_scaled_to_num_fail(c):
     # downsample a scaled signature
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('sig', 'downsample', '--num', '50000', sig47)
+        c.run_sourmash("sig", "downsample", "--num", "50000", sig47)
 
 
 @utils.in_tempdir
 def test_sig_downsample_1_scaled_empty(c):
     # downsample a scaled signature
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('sig', 'downsample', sig47)
+        c.run_sourmash("sig", "downsample", sig47)
 
 
 @utils.in_tempdir
 def test_sig_downsample_2_num(c):
     # downsample a num signature
-    sigs11 = utils.get_test_data('genome-s11.fa.gz.sig')
-    c.run_sourmash('sig', 'downsample', '--num', '500',
-                   '-k', '21', '--dna', sigs11)
+    sigs11 = utils.get_test_data("genome-s11.fa.gz.sig")
+    c.run_sourmash("sig", "downsample", "--num", "500", "-k", "21", "--dna", sigs11)
 
     # stdout should be new signature
     out = c.last_result.out
 
-    test_downsample_sig = sourmash.load_one_signature(sigs11, ksize=21,
-                                                      select_moltype='DNA')
+    test_downsample_sig = sourmash.load_one_signature(
+        sigs11, ksize=21, select_moltype="DNA"
+    )
     actual_downsample_sig = sourmash.load_one_signature(out)
     test_mh = test_downsample_sig.minhash.downsample(num=500)
 
@@ -3011,15 +3196,17 @@ def test_sig_downsample_2_num(c):
 @utils.in_tempdir
 def test_sig_downsample_2_num_to_scaled(c):
     # downsample a num signature and convert it into a scaled sig
-    sigs11 = utils.get_test_data('genome-s11.fa.gz.sig')
-    c.run_sourmash('sig', 'downsample', '--scaled', '10000',
-                   '-k', '21', '--dna', sigs11)
+    sigs11 = utils.get_test_data("genome-s11.fa.gz.sig")
+    c.run_sourmash(
+        "sig", "downsample", "--scaled", "10000", "-k", "21", "--dna", sigs11
+    )
 
     # stdout should be new signature
     out = c.last_result.out
 
-    test_downsample_sig = sourmash.load_one_signature(sigs11, ksize=21,
-                                                      select_moltype='DNA')
+    test_downsample_sig = sourmash.load_one_signature(
+        sigs11, ksize=21, select_moltype="DNA"
+    )
     actual_downsample_sig = sourmash.load_one_signature(out)
 
     test_mins = test_downsample_sig.minhash.hashes.keys()
@@ -3027,7 +3214,7 @@ def test_sig_downsample_2_num_to_scaled(c):
 
     # select those mins that are beneath the new max hash...
     max_hash = actual_downsample_sig.minhash._max_hash
-    test_mins_down = { k for k in test_mins if k < max_hash }
+    test_mins_down = {k for k in test_mins if k < max_hash}
     assert test_mins_down == set(actual_mins)
 
 
@@ -3035,38 +3222,49 @@ def test_sig_downsample_2_num_to_scaled(c):
 def test_sig_downsample_2_num_to_scaled_fail(c):
     # downsample a num signature and FAIL to convert it into a scaled sig
     # because new scaled is too low
-    sigs11 = utils.get_test_data('genome-s11.fa.gz.sig')
+    sigs11 = utils.get_test_data("genome-s11.fa.gz.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('sig', 'downsample', '--scaled', '100',
-                       '-k', '21', '--dna', sigs11)
+        c.run_sourmash(
+            "sig", "downsample", "--scaled", "100", "-k", "21", "--dna", sigs11
+        )
 
 
 @utils.in_tempdir
 def test_sig_downsample_2_num_and_scaled_both_fail(c):
     # cannot specify both --num and --scaled
-    sigs11 = utils.get_test_data('genome-s11.fa.gz.sig')
+    sigs11 = utils.get_test_data("genome-s11.fa.gz.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('sig', 'downsample', '--scaled', '100', '--num', '50',
-                       '-k', '21', '--dna', sigs11)
+        c.run_sourmash(
+            "sig",
+            "downsample",
+            "--scaled",
+            "100",
+            "--num",
+            "50",
+            "-k",
+            "21",
+            "--dna",
+            sigs11,
+        )
 
 
 @utils.in_tempdir
 def test_sig_downsample_2_num_empty(c):
     # downsample a num signature
-    sigs11 = utils.get_test_data('genome-s11.fa.gz.sig')
+    sigs11 = utils.get_test_data("genome-s11.fa.gz.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('sig', 'downsample', '-k', '21', '--dna', sigs11)
+        c.run_sourmash("sig", "downsample", "-k", "21", "--dna", sigs11)
 
 
 def test_sig_describe_1(runtmp):
     c = runtmp
 
     # get basic info on a signature
-    sig47 = utils.get_test_data('47.fa.sig')
-    c.run_sourmash('sig', 'describe', sig47)
+    sig47 = utils.get_test_data("47.fa.sig")
+    c.run_sourmash("sig", "describe", sig47)
 
     out = c.last_result.out
     print(c.last_result)
@@ -3087,12 +3285,18 @@ def test_sig_describe_1_fromfile_picklist(runtmp):
     c = runtmp
 
     # get basic info on a signature
-    sig47 = utils.get_test_data('47.fa.sig')
-    from_file = _write_file(runtmp, 'list.txt', [sig47])
-    picklist = _write_file(runtmp, 'pl.csv', ['md5short', '09a08691'])
-
-    c.run_sourmash('sig', 'describe',  '--from-file', from_file,
-                   '--picklist', f'{picklist}:md5short:md5short')
+    sig47 = utils.get_test_data("47.fa.sig")
+    from_file = _write_file(runtmp, "list.txt", [sig47])
+    picklist = _write_file(runtmp, "pl.csv", ["md5short", "09a08691"])
+
+    c.run_sourmash(
+        "sig",
+        "describe",
+        "--from-file",
+        from_file,
+        "--picklist",
+        f"{picklist}:md5short:md5short",
+    )
 
     out = c.last_result.out
     print(c.last_result)
@@ -3112,41 +3316,55 @@ def test_sig_describe_1_fromfile_picklist(runtmp):
 @utils.in_thisdir
 def test_sig_describe_protein(c):
     # test describe on a singleton protein signature
-    testdata = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    c.run_sourmash('sig', 'describe', testdata)
+    testdata = utils.get_test_data(
+        "prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    c.run_sourmash("sig", "describe", testdata)
 
-    assert 'k=19 molecule=protein num=0 scaled=100 seed=42 track_abundance=0' in c.last_result.out
+    assert (
+        "k=19 molecule=protein num=0 scaled=100 seed=42 track_abundance=0"
+        in c.last_result.out
+    )
 
 
 @utils.in_thisdir
 def test_sig_describe_hp(c):
     # test describe on a singleton hp signature
-    testdata = utils.get_test_data('prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    c.run_sourmash('sig', 'describe', testdata)
+    testdata = utils.get_test_data(
+        "prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    c.run_sourmash("sig", "describe", testdata)
 
-    assert 'k=19 molecule=hp num=0 scaled=100 seed=42 track_abundance=0' in c.last_result.out
+    assert (
+        "k=19 molecule=hp num=0 scaled=100 seed=42 track_abundance=0"
+        in c.last_result.out
+    )
 
 
 @utils.in_thisdir
 def test_sig_describe_dayhoff(c):
     # test describe on a singleton dayhoff signature
-    testdata = utils.get_test_data('prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    c.run_sourmash('sig', 'describe', testdata)
+    testdata = utils.get_test_data(
+        "prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    c.run_sourmash("sig", "describe", testdata)
 
-    assert 'k=19 molecule=dayhoff num=0 scaled=100 seed=42 track_abundance=0' in c.last_result.out
+    assert (
+        "k=19 molecule=dayhoff num=0 scaled=100 seed=42 track_abundance=0"
+        in c.last_result.out
+    )
 
 
 @utils.in_tempdir
 def test_sig_describe_1_hp(c):
     # get basic info on a signature
-    testdata = utils.get_test_data('short.fa')
-    c.run_sourmash('compute', '-k', '21,30',
-                   '--dayhoff', '--hp', '--protein',
-                   '--dna',
-                   testdata)
+    testdata = utils.get_test_data("short.fa")
+    c.run_sourmash(
+        "compute", "-k", "21,30", "--dayhoff", "--hp", "--protein", "--dna", testdata
+    )
     # stdout should be new signature
-    computed_sig = os.path.join(c.location, 'short.fa.sig')
-    c.run_sourmash('sig', 'describe', computed_sig)
+    computed_sig = os.path.join(c.location, "short.fa.sig")
+    c.run_sourmash("sig", "describe", computed_sig)
 
     out = c.last_result.out
     print(c.last_result.out)
@@ -3237,16 +3455,15 @@ def test_sig_describe_1_hp(c):
 
 """.splitlines()
     for line in out.splitlines():
-        cleaned_line = line.strip().replace(
-            testdata_dirname, '').replace(location, '')
+        cleaned_line = line.strip().replace(testdata_dirname, "").replace(location, "")
         assert cleaned_line in expected_output, cleaned_line
 
 
 @utils.in_tempdir
 def test_sig_describe_1_multisig(c):
     # get basic info on multiple signatures in a single file
-    sigs = utils.get_test_data('47+63-multisig.sig')
-    c.run_sourmash('sig', 'describe', sigs)
+    sigs = utils.get_test_data("47+63-multisig.sig")
+    c.run_sourmash("sig", "describe", sigs)
 
     out = c.last_result.out
     print(c.last_result)
@@ -3265,8 +3482,8 @@ def test_sig_describe_1_multisig(c):
 @utils.in_tempdir
 def test_sig_describe_1_sbt(c):
     # get basic info on multiple signatures in an SBT
-    sigs = utils.get_test_data('prot/protein.sbt.zip')
-    c.run_sourmash('sig', 'describe', sigs)
+    sigs = utils.get_test_data("prot/protein.sbt.zip")
+    c.run_sourmash("sig", "describe", sigs)
 
     out = c.last_result.out
     print(c.last_result)
@@ -3282,8 +3499,8 @@ def test_sig_describe_1_sbt(c):
 @utils.in_tempdir
 def test_sig_describe_1_lca(c):
     # get basic info on multiple signatures in an LCA database
-    sigs = utils.get_test_data('prot/protein.lca.json.gz')
-    c.run_sourmash('sig', 'describe', sigs)
+    sigs = utils.get_test_data("prot/protein.lca.json.gz")
+    c.run_sourmash("sig", "describe", sigs)
 
     out = c.last_result.out
     print(c.last_result)
@@ -3299,8 +3516,8 @@ def test_sig_describe_1_lca(c):
 @utils.in_tempdir
 def test_sig_describe_1_dir(c):
     # get basic info on multiple signatures in a directory
-    sigs = utils.get_test_data('prot/protein/')
-    c.run_sourmash('sig', 'describe', sigs)
+    sigs = utils.get_test_data("prot/protein/")
+    c.run_sourmash("sig", "describe", sigs)
 
     out = c.last_result.out
     print(c.last_result)
@@ -3320,8 +3537,8 @@ def test_sig_describe_1_dir(c):
 @utils.in_tempdir
 def test_sig_describe_1_zipfile(c):
     # get basic info on multiple signatures in a zipfile
-    sigs = utils.get_test_data('prot/all.zip')
-    c.run_sourmash('sig', 'describe', sigs)
+    sigs = utils.get_test_data("prot/all.zip")
+    c.run_sourmash("sig", "describe", sigs)
 
     out = c.last_result.out
     print(c.last_result)
@@ -3342,8 +3559,8 @@ def test_sig_describe_1_sig_abund(runtmp):
     # check output of sig describe on a sketch with abundances
     c = runtmp
 
-    sigfile = utils.get_test_data('track_abund/47.fa.sig')
-    c.run_sourmash('sig', 'describe', sigfile)
+    sigfile = utils.get_test_data("track_abund/47.fa.sig")
+    c.run_sourmash("sig", "describe", sigfile)
 
     out = c.last_result.out
     print(c.last_result.out)
@@ -3363,18 +3580,22 @@ def test_sig_describe_1_sig_abund(runtmp):
 
 @utils.in_thisdir
 def test_sig_describe_stdin(c):
-    sig = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    with open(sig, 'rt') as fp:
+    sig = utils.get_test_data(
+        "prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    with open(sig) as fp:
         data = fp.read()
 
-    c.run_sourmash('sig', 'describe', '-', stdin_data=data)
+    c.run_sourmash("sig", "describe", "-", stdin_data=data)
 
-    assert 'signature: GCA_001593925' in c.last_result.out
+    assert "signature: GCA_001593925" in c.last_result.out
 
 
 @utils.in_tempdir
 def test_sig_describe_empty(c):
-    sig = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
+    sig = utils.get_test_data(
+        "prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
 
     ss = sourmash.load_file_as_signatures(sig)
     ss = list(ss)
@@ -3382,34 +3603,34 @@ def test_sig_describe_empty(c):
     ss = ss[0]
 
     ss = ss.to_mutable()
-    ss.name = ''
-    ss.filename = ''
+    ss.name = ""
+    ss.filename = ""
 
-    outsig = c.output('xxx.sig')
-    with open(outsig, 'wt') as fp:
+    outsig = c.output("xxx.sig")
+    with open(outsig, "w") as fp:
         sourmash.save_signatures([ss], fp)
 
     ss = sourmash.load_file_as_signatures(outsig)
     ss = list(ss)
     assert len(ss) == 1
     ss = ss[0]
-    assert ss.name == ''
-    assert ss.filename == ''
+    assert ss.name == ""
+    assert ss.filename == ""
 
-    c.run_sourmash('sig', 'describe', outsig)
+    c.run_sourmash("sig", "describe", outsig)
     print(c.last_result.out)
-    assert 'signature: ** no name **' in c.last_result.out
-    assert 'source file: ** no name **' in c.last_result.out
+    assert "signature: ** no name **" in c.last_result.out
+    assert "source file: ** no name **" in c.last_result.out
 
 
 def test_sig_describe_sqldb(runtmp):
     # make a sqldb and run fileinfo on it
-    gcf_all = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    sqldb = runtmp.output('some.sqldb')
+    gcf_all = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    sqldb = runtmp.output("some.sqldb")
 
-    runtmp.sourmash('sig', 'cat', '-k', '31', *gcf_all, '-o', sqldb)
+    runtmp.sourmash("sig", "cat", "-k", "31", *gcf_all, "-o", sqldb)
 
-    runtmp.sourmash('sig', 'describe', sqldb)
+    runtmp.sourmash("sig", "describe", sqldb)
 
     err = runtmp.last_result.err
     print(err)
@@ -3417,28 +3638,30 @@ def test_sig_describe_sqldb(runtmp):
     out = runtmp.last_result.out
     print(out)
 
-    assert 'md5: 4289d4241be8573145282352215ca3c4' in out
-    assert 'md5: 85c3aeec6457c0b1d210472ddeb67714' in out
+    assert "md5: 4289d4241be8573145282352215ca3c4" in out
+    assert "md5: 85c3aeec6457c0b1d210472ddeb67714" in out
 
 
 def test_sig_describe_2_csv(runtmp):
     # output info in CSV spreadsheet
     c = runtmp
 
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-    c.run_sourmash('sig', 'describe', sig47, sig63, '--csv', 'out.csv')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+    c.run_sourmash("sig", "describe", sig47, sig63, "--csv", "out.csv")
 
-    expected_md5 = ['09a08691ce52952152f0e866a59f6261',
-                    '38729c6374925585db28916b82a6f513']
+    expected_md5 = [
+        "09a08691ce52952152f0e866a59f6261",
+        "38729c6374925585db28916b82a6f513",
+    ]
 
-    with open(c.output('out.csv'), 'rt') as fp:
+    with open(c.output("out.csv")) as fp:
         r = csv.DictReader(fp)
 
         n = 0
 
         for row, md5 in zip(r, expected_md5):
-            assert row['md5'] == md5
+            assert row["md5"] == md5
             n += 1
 
         assert n == 2
@@ -3448,20 +3671,22 @@ def test_sig_describe_2_csv_gz(runtmp):
     # output info in CSV spreadsheet, gzipped
     c = runtmp
 
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-    c.run_sourmash('sig', 'describe', sig47, sig63, '--csv', 'out.csv.gz')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+    c.run_sourmash("sig", "describe", sig47, sig63, "--csv", "out.csv.gz")
 
-    expected_md5 = ['09a08691ce52952152f0e866a59f6261',
-                    '38729c6374925585db28916b82a6f513']
+    expected_md5 = [
+        "09a08691ce52952152f0e866a59f6261",
+        "38729c6374925585db28916b82a6f513",
+    ]
 
-    with gzip.open(c.output('out.csv.gz'), 'rt', newline="") as fp:
+    with gzip.open(c.output("out.csv.gz"), "rt", newline="") as fp:
         r = csv.DictReader(fp)
 
         n = 0
 
         for row, md5 in zip(r, expected_md5):
-            assert row['md5'] == md5
+            assert row["md5"] == md5
             n += 1
 
         assert n == 2
@@ -3471,31 +3696,29 @@ def test_sig_describe_2_csv_abund(runtmp):
     # output info in CSV spreadsheet, for abund sig
     c = runtmp
 
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    c.run_sourmash('sig', 'describe', sig47, '--csv', 'out.csv')
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    c.run_sourmash("sig", "describe", sig47, "--csv", "out.csv")
 
-    with open(c.output('out.csv'), 'rt') as fp:
+    with open(c.output("out.csv")) as fp:
         r = csv.DictReader(fp)
 
-        n = 0
-
         rows = list(r)
         assert len(rows) == 1
         row = rows[0]
 
-        assert row['signature_file'] == sig47
-        assert row['md5'] == "09a08691ce52952152f0e866a59f6261"
-        assert row['ksize'] == "31"
-        assert row['moltype'] == "DNA"
-        assert row['num'] == "0"
-        assert row['scaled'] == "1000"
-        assert row['n_hashes'] == "5177"
-        assert row['seed'] == "42"
-        assert row['with_abundance'] == "1"
-        assert row['name'] == "NC_009665.1 Shewanella baltica OS185, complete genome"
-        assert row['filename'] == "podar-ref/47.fa"
-        assert row['license'] == "CC0"
-        assert row['sum_hashes'] == "5292"
+        assert row["signature_file"] == sig47
+        assert row["md5"] == "09a08691ce52952152f0e866a59f6261"
+        assert row["ksize"] == "31"
+        assert row["moltype"] == "DNA"
+        assert row["num"] == "0"
+        assert row["scaled"] == "1000"
+        assert row["n_hashes"] == "5177"
+        assert row["seed"] == "42"
+        assert row["with_abundance"] == "1"
+        assert row["name"] == "NC_009665.1 Shewanella baltica OS185, complete genome"
+        assert row["filename"] == "podar-ref/47.fa"
+        assert row["license"] == "CC0"
+        assert row["sum_hashes"] == "5292"
 
 
 def test_sig_describe_2_csv_as_picklist(runtmp):
@@ -3503,14 +3726,12 @@ def test_sig_describe_2_csv_as_picklist(runtmp):
     # pickfile
     c = runtmp
 
-    sig47 = utils.get_test_data('47.fa.sig')
-    outcsv = runtmp.output('out.csv')
+    sig47 = utils.get_test_data("47.fa.sig")
+    outcsv = runtmp.output("out.csv")
 
-    c.run_sourmash('sig', 'describe', sig47,
-                   '--csv', outcsv)
+    c.run_sourmash("sig", "describe", sig47, "--csv", outcsv)
 
-    c.run_sourmash('sig', 'describe', sig47,
-                   '--picklist', f'{outcsv}::manifest')
+    c.run_sourmash("sig", "describe", sig47, "--picklist", f"{outcsv}::manifest")
 
     out = c.last_result.out
     print(c.last_result)
@@ -3531,10 +3752,9 @@ def test_sig_describe_2_include_db_pattern(runtmp):
     # test sig describe --include-db-pattern
     c = runtmp
 
-    allzip = utils.get_test_data('prot/all.zip')
+    allzip = utils.get_test_data("prot/all.zip")
 
-    c.run_sourmash('sig', 'describe', allzip,
-                   '--include-db-pattern', 'os185')
+    c.run_sourmash("sig", "describe", allzip, "--include-db-pattern", "os185")
 
     out = c.last_result.out
     print(c.last_result)
@@ -3555,10 +3775,11 @@ def test_sig_describe_2_exclude_db_pattern(runtmp):
     # test sig describe --exclude-db-pattern
     c = runtmp
 
-    allzip = utils.get_test_data('prot/all.zip')
+    allzip = utils.get_test_data("prot/all.zip")
 
-    c.run_sourmash('sig', 'describe', allzip, '--dna', '-k', '31',
-                   '--exclude-db-pattern', 'os223')
+    c.run_sourmash(
+        "sig", "describe", allzip, "--dna", "-k", "31", "--exclude-db-pattern", "os223"
+    )
 
     out = c.last_result.out
     print(c.last_result)
@@ -3577,13 +3798,13 @@ def test_sig_describe_2_exclude_db_pattern(runtmp):
 
 def test_sig_describe_3_manifest_works(runtmp):
     # test on a manifest with relative paths, in proper location
-    mf = utils.get_test_data('scaled/mf.csv')
-    runtmp.sourmash('sig', 'describe', mf, '--csv', 'out.csv')
+    mf = utils.get_test_data("scaled/mf.csv")
+    runtmp.sourmash("sig", "describe", mf, "--csv", "out.csv")
 
     out = runtmp.last_result.out
     print(out)
 
-    with open(runtmp.output('out.csv'), newline='') as fp:
+    with open(runtmp.output("out.csv"), newline="") as fp:
         r = csv.reader(fp)
         rows = list(r)
         assert len(rows) == 16  # 15 signatures, plus head
@@ -3593,41 +3814,41 @@ def test_sig_describe_3_manifest_fails_when_moved(runtmp):
     # test on a manifest with relative paths, when in wrong place;
     # should fail, because actual signatures cannot be loaded now.
     # note: this tests lazy loading.
-    mf = utils.get_test_data('scaled/mf.csv')
-    shutil.copyfile(mf, runtmp.output('mf.csv'))
+    mf = utils.get_test_data("scaled/mf.csv")
+    shutil.copyfile(mf, runtmp.output("mf.csv"))
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'describe', 'mf.csv')
+        runtmp.sourmash("sig", "describe", "mf.csv")
+
 
- 
 @utils.in_tempdir
 def test_sig_overlap(c):
     # get overlap details
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-    c.run_sourmash('sig', 'overlap', sig47, sig63)
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+    c.run_sourmash("sig", "overlap", sig47, sig63)
     out = c.last_result.out
 
     print(out)
 
     # md5s
-    assert '09a08691ce52952152f0e866a59f6261' in out
-    assert '38729c6374925585db28916b82a6f513' in out
+    assert "09a08691ce52952152f0e866a59f6261" in out
+    assert "38729c6374925585db28916b82a6f513" in out
 
-    assert 'similarity:                  0.32069' in out
-    assert 'number of hashes in common:  2529' in out
+    assert "similarity:                  0.32069" in out
+    assert "number of hashes in common:  2529" in out
 
 
 @utils.in_tempdir
 def test_import_export_1(c):
     # check to make sure we can import what we've exported!
-    inp = utils.get_test_data('genome-s11.fa.gz.sig')
-    outp = c.output('export.json')
+    inp = utils.get_test_data("genome-s11.fa.gz.sig")
+    outp = c.output("export.json")
 
-    c.run_sourmash('sig', 'export', inp, '-o', outp, '-k', '21', '--dna')
-    c.run_sourmash('sig', 'import', outp)
+    c.run_sourmash("sig", "export", inp, "-o", outp, "-k", "21", "--dna")
+    c.run_sourmash("sig", "import", outp)
 
-    original = sourmash.load_one_signature(inp, ksize=21, select_moltype='DNA')
+    original = sourmash.load_one_signature(inp, ksize=21, select_moltype="DNA")
     roundtrip = sourmash.load_one_signature(c.last_result.out)
 
     assert original.minhash == roundtrip.minhash
@@ -3636,13 +3857,13 @@ def test_import_export_1(c):
 @utils.in_tempdir
 def test_import_export_1_by_md5(c):
     # check to make sure we can import what we've exported!
-    inp = utils.get_test_data('genome-s11.fa.gz.sig')
-    outp = c.output('export.json')
+    inp = utils.get_test_data("genome-s11.fa.gz.sig")
+    outp = c.output("export.json")
 
-    c.run_sourmash('sig', 'export', inp, '-o', outp, '--md5', '1437d8eae6')
-    c.run_sourmash('sig', 'import', outp)
+    c.run_sourmash("sig", "export", inp, "-o", outp, "--md5", "1437d8eae6")
+    c.run_sourmash("sig", "import", outp)
 
-    original = sourmash.load_one_signature(inp, ksize=21, select_moltype='DNA')
+    original = sourmash.load_one_signature(inp, ksize=21, select_moltype="DNA")
     roundtrip = sourmash.load_one_signature(c.last_result.out)
 
     assert original.minhash == roundtrip.minhash
@@ -3655,271 +3876,259 @@ def test_import_export_2(c):
     #   mash sketch -s 500 -k 21 ./tests/test-data/genome-s11.fa.gz
     #   mash info -d ./tests/test-data/genome-s11.fa.gz.msh > tests/test-data/genome-s11.fa.gz.msh.json_dump
     #
-    sig1 = utils.get_test_data('genome-s11.fa.gz.sig')
-    msh_sig = utils.get_test_data('genome-s11.fa.gz.msh.json_dump')
+    sig1 = utils.get_test_data("genome-s11.fa.gz.sig")
+    msh_sig = utils.get_test_data("genome-s11.fa.gz.msh.json_dump")
 
-    c.run_sourmash('sig', 'import', msh_sig)
+    c.run_sourmash("sig", "import", msh_sig)
     imported = sourmash.load_one_signature(c.last_result.out)
-    compare = sourmash.load_one_signature(sig1, ksize=21, select_moltype='DNA')
+    compare = sourmash.load_one_signature(sig1, ksize=21, select_moltype="DNA")
 
     assert imported.minhash == compare.minhash
 
 
 def test_import_mash_csv_to_sig(runtmp):
     # test copied over from 'sourmash import_csv'.
-    testdata1 = utils.get_test_data('short.fa.msh.dump')
-    testdata2 = utils.get_test_data('short.fa')
+    testdata1 = utils.get_test_data("short.fa.msh.dump")
+    testdata2 = utils.get_test_data("short.fa")
 
-    runtmp.sourmash('sig', 'import', '--csv', testdata1, '-o', 'xxx.sig')
+    runtmp.sourmash("sig", "import", "--csv", testdata1, "-o", "xxx.sig")
 
-    runtmp.sourmash('sketch', 'dna', '-p', 'k=31,num=970', testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,num=970", testdata2)
 
-    runtmp.sourmash('search', '-k', '31', 'short.fa.sig', 'xxx.sig')
+    runtmp.sourmash("search", "-k", "31", "short.fa.sig", "xxx.sig")
 
     print("RUNTEMP", runtmp)
 
-    assert '1 matches' in runtmp.last_result.out
-    assert '100.0%       short.fa' in runtmp.last_result.out
+    assert "1 matches" in runtmp.last_result.out
+    assert "100.0%       short.fa" in runtmp.last_result.out
 
 
 def test_sig_manifest_1_zipfile(runtmp):
     # make a manifest from a .zip file
-    protzip = utils.get_test_data('prot/protein.zip')
-    runtmp.sourmash('sig', 'manifest', protzip, '-o', 'SOURMASH-MANIFEST.csv')
+    protzip = utils.get_test_data("prot/protein.zip")
+    runtmp.sourmash("sig", "manifest", protzip, "-o", "SOURMASH-MANIFEST.csv")
 
-    manifest_fn = runtmp.output('SOURMASH-MANIFEST.csv')
-    with open(manifest_fn, newline='') as csvfp:
+    manifest_fn = runtmp.output("SOURMASH-MANIFEST.csv")
+    with open(manifest_fn, newline="") as csvfp:
         manifest = CollectionManifest.load_from_csv(csvfp)
 
     assert len(manifest) == 2
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list
-    assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "16869d2c8a1d29d1c8e56f5c561e585e" in md5_list
+    assert "120d311cc785cc9d0df9dc0646b2b857" in md5_list
 
 
 def test_sig_manifest_1_zipfile_csv_gz(runtmp):
     # make a gzipped manifest from a .zip file
-    protzip = utils.get_test_data('prot/protein.zip')
-    runtmp.sourmash('sig', 'manifest', protzip,
-                    '-o', 'SOURMASH-MANIFEST.csv.gz')
+    protzip = utils.get_test_data("prot/protein.zip")
+    runtmp.sourmash("sig", "manifest", protzip, "-o", "SOURMASH-MANIFEST.csv.gz")
 
-    manifest_fn = runtmp.output('SOURMASH-MANIFEST.csv.gz')
-    with gzip.open(manifest_fn, "rt", newline='') as csvfp:
+    manifest_fn = runtmp.output("SOURMASH-MANIFEST.csv.gz")
+    with gzip.open(manifest_fn, "rt", newline="") as csvfp:
         manifest = CollectionManifest.load_from_csv(csvfp)
 
     assert len(manifest) == 2
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list
-    assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "16869d2c8a1d29d1c8e56f5c561e585e" in md5_list
+    assert "120d311cc785cc9d0df9dc0646b2b857" in md5_list
 
 
 def test_sig_manifest_1_zipfile_already_exists(runtmp):
     # make a manifest from a .zip file; f
-    protzip = utils.get_test_data('prot/protein.zip')
+    protzip = utils.get_test_data("prot/protein.zip")
 
-    mf_csv = runtmp.output('mf.csv')
+    mf_csv = runtmp.output("mf.csv")
     with open(mf_csv, "w") as fp:
         fp.write("hello, world")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'manifest', protzip, '-o', 'mf.csv')
+        runtmp.sourmash("sig", "manifest", protzip, "-o", "mf.csv")
 
 
 def test_sig_manifest_1_zipfile_already_exists_force(runtmp):
     # make a manifest from a .zip file
-    protzip = utils.get_test_data('prot/protein.zip')
+    protzip = utils.get_test_data("prot/protein.zip")
 
-    mf_csv = runtmp.output('mf.csv')
+    mf_csv = runtmp.output("mf.csv")
     with open(mf_csv, "w") as fp:
         fp.write("hello, world")
 
-    runtmp.sourmash('sig', 'manifest', protzip, '-o', 'mf.csv', '-f')
+    runtmp.sourmash("sig", "manifest", protzip, "-o", "mf.csv", "-f")
 
-    with open(mf_csv, newline='') as csvfp:
+    with open(mf_csv, newline="") as csvfp:
         manifest = CollectionManifest.load_from_csv(csvfp)
 
     assert len(manifest) == 2
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list
-    assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "16869d2c8a1d29d1c8e56f5c561e585e" in md5_list
+    assert "120d311cc785cc9d0df9dc0646b2b857" in md5_list
 
 
 def test_sig_manifest_1_zipfile_already_exists_sql(runtmp):
     # make a manifest from a .zip file
-    protzip = utils.get_test_data('prot/protein.zip')
+    protzip = utils.get_test_data("prot/protein.zip")
 
-    mf_csv = runtmp.output('mf.mfsql')
-    runtmp.sourmash('sig', 'manifest', protzip, '-o', 'mf.mfsql', '-F', 'sql')
-    runtmp.sourmash('sig', 'manifest', protzip, '-o', 'mf.mfsql', '-F', 'sql',
-                    '-f')
+    mf_csv = runtmp.output("mf.mfsql")
+    runtmp.sourmash("sig", "manifest", protzip, "-o", "mf.mfsql", "-F", "sql")
+    runtmp.sourmash("sig", "manifest", protzip, "-o", "mf.mfsql", "-F", "sql", "-f")
 
     manifest = CollectionManifest.load_from_filename(mf_csv)
 
     assert len(manifest) == 2
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list
-    assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "16869d2c8a1d29d1c8e56f5c561e585e" in md5_list
+    assert "120d311cc785cc9d0df9dc0646b2b857" in md5_list
 
 
 def test_sig_manifest_2_sigfile(runtmp):
     # make a manifest from a .sig file
-    sigfile = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
+    sigfile = utils.get_test_data(
+        "prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
 
-    runtmp.sourmash('sig', 'manifest', sigfile, '-o', 'SOURMASH-MANIFEST.csv')
+    runtmp.sourmash("sig", "manifest", sigfile, "-o", "SOURMASH-MANIFEST.csv")
 
-    status = runtmp.last_result.status
-    out = runtmp.last_result.out
-    err = runtmp.last_result.err
-
-    manifest_fn = runtmp.output('SOURMASH-MANIFEST.csv')
-    with open(manifest_fn, newline='') as csvfp:
+    manifest_fn = runtmp.output("SOURMASH-MANIFEST.csv")
+    with open(manifest_fn, newline="") as csvfp:
         manifest = CollectionManifest.load_from_csv(csvfp)
 
     assert len(manifest) == 1
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "16869d2c8a1d29d1c8e56f5c561e585e" in md5_list
 
 
 def test_sig_manifest_3_sbt(runtmp):
     # make a manifest from an SBT
-    protzip = utils.get_test_data('prot/protein.sbt.zip')
-    runtmp.sourmash('sig', 'manifest', protzip, '-o', 'SOURMASH-MANIFEST.csv')
+    protzip = utils.get_test_data("prot/protein.sbt.zip")
+    runtmp.sourmash("sig", "manifest", protzip, "-o", "SOURMASH-MANIFEST.csv")
 
-    manifest_fn = runtmp.output('SOURMASH-MANIFEST.csv')
-    with open(manifest_fn, newline='') as csvfp:
+    manifest_fn = runtmp.output("SOURMASH-MANIFEST.csv")
+    with open(manifest_fn, newline="") as csvfp:
         manifest = CollectionManifest.load_from_csv(csvfp)
 
     assert len(manifest) == 2
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list
-    assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "16869d2c8a1d29d1c8e56f5c561e585e" in md5_list
+    assert "120d311cc785cc9d0df9dc0646b2b857" in md5_list
 
 
 def test_sig_manifest_4_lca(runtmp):
     # make a manifest from a .lca.json file
-    sigfile = utils.get_test_data('prot/protein.lca.json.gz')
-    runtmp.sourmash('sig', 'manifest', sigfile, '-o',
-                    'SOURMASH-MANIFEST.csv')
+    sigfile = utils.get_test_data("prot/protein.lca.json.gz")
+    runtmp.sourmash("sig", "manifest", sigfile, "-o", "SOURMASH-MANIFEST.csv")
 
-    manifest_fn = runtmp.output('SOURMASH-MANIFEST.csv')
-    with open(manifest_fn, newline='') as csvfp:
+    manifest_fn = runtmp.output("SOURMASH-MANIFEST.csv")
+    with open(manifest_fn, newline="") as csvfp:
         manifest = CollectionManifest.load_from_csv(csvfp)
 
     assert len(manifest) == 2
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list
-    assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "16869d2c8a1d29d1c8e56f5c561e585e" in md5_list
+    assert "120d311cc785cc9d0df9dc0646b2b857" in md5_list
 
 
 def test_sig_manifest_5_dir(runtmp):
     # make a manifest from a directory
-    sigfile = utils.get_test_data('prot/protein/')
-    runtmp.sourmash('sig', 'manifest', sigfile, '-o', 'SOURMASH-MANIFEST.csv')
-
-    status = runtmp.last_result.status
-    out = runtmp.last_result.out
-    err = runtmp.last_result.err
+    sigfile = utils.get_test_data("prot/protein/")
+    runtmp.sourmash("sig", "manifest", sigfile, "-o", "SOURMASH-MANIFEST.csv")
 
-    manifest_fn = runtmp.output('SOURMASH-MANIFEST.csv')
-    with open(manifest_fn, newline='') as csvfp:
+    manifest_fn = runtmp.output("SOURMASH-MANIFEST.csv")
+    with open(manifest_fn, newline="") as csvfp:
         manifest = CollectionManifest.load_from_csv(csvfp)
 
     assert len(manifest) == 2
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list
-    assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "16869d2c8a1d29d1c8e56f5c561e585e" in md5_list
+    assert "120d311cc785cc9d0df9dc0646b2b857" in md5_list
 
 
 def test_sig_manifest_6_pathlist(runtmp):
     # make a manifest from a pathlist file
-    sigfiles = utils.get_test_data('prot/protein/*.sig')
+    sigfiles = utils.get_test_data("prot/protein/*.sig")
     sigfiles = glob.glob(sigfiles)
 
-    pathlist = runtmp.output('pathlist.txt')
-    with open(pathlist, 'wt') as fp:
+    pathlist = runtmp.output("pathlist.txt")
+    with open(pathlist, "w") as fp:
         fp.write("\n".join(sigfiles))
 
-    runtmp.sourmash('sig', 'manifest', pathlist, '-o', 'SOURMASH-MANIFEST.csv')
+    runtmp.sourmash("sig", "manifest", pathlist, "-o", "SOURMASH-MANIFEST.csv")
 
-    status = runtmp.last_result.status
-    out = runtmp.last_result.out
-    err = runtmp.last_result.err
-
-    manifest_fn = runtmp.output('SOURMASH-MANIFEST.csv')
-    with open(manifest_fn, newline='') as csvfp:
+    manifest_fn = runtmp.output("SOURMASH-MANIFEST.csv")
+    with open(manifest_fn, newline="") as csvfp:
         manifest = CollectionManifest.load_from_csv(csvfp)
 
     assert len(manifest) == 2
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list
-    assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "16869d2c8a1d29d1c8e56f5c561e585e" in md5_list
+    assert "120d311cc785cc9d0df9dc0646b2b857" in md5_list
 
     # note: the manifest output for pathlists will contain the locations
     # used in the pathlist. This is required by StandaloneManifestIndex.
     for row in manifest.rows:
-        iloc = row['internal_location']
+        iloc = row["internal_location"]
         print(iloc)
-        assert iloc.startswith('/'), iloc
+        assert iloc.startswith("/"), iloc
 
 
 def test_sig_manifest_does_not_exist(runtmp):
     with pytest.raises(SourmashCommandFailed):
-        runtmp.run_sourmash('sig', 'manifest', 'does-not-exist',
-                            '-o', 'out.csv')
+        runtmp.run_sourmash("sig", "manifest", "does-not-exist", "-o", "out.csv")
 
-    assert "Cannot open 'does-not-exist' as a sourmash signature collection." in runtmp.last_result.err
+    assert (
+        "Cannot open 'does-not-exist' as a sourmash signature collection."
+        in runtmp.last_result.err
+    )
 
 
 def test_sig_manifest_7_allzip_1(runtmp):
     # the rebuilt manifest w/o '-f' will miss dna-sig.noext
-    allzip = utils.get_test_data('prot/all.zip')
-    runtmp.sourmash('sig', 'manifest', allzip, '-o', 'xyz.csv')
+    allzip = utils.get_test_data("prot/all.zip")
+    runtmp.sourmash("sig", "manifest", allzip, "-o", "xyz.csv")
 
-    manifest_fn = runtmp.output('xyz.csv')
-    with open(manifest_fn, newline='') as csvfp:
+    manifest_fn = runtmp.output("xyz.csv")
+    with open(manifest_fn, newline="") as csvfp:
         manifest = CollectionManifest.load_from_csv(csvfp)
 
     assert len(manifest) == 7
-    filenames = set( row['internal_location'] for row in manifest.rows )
-    assert 'dna-sig.noext' not in filenames
+    filenames = set(row["internal_location"] for row in manifest.rows)
+    assert "dna-sig.noext" not in filenames
 
 
 def test_sig_manifest_7_allzip_2(runtmp):
     # the rebuilt manifest w/ '-f' will contain dna-sig.noext
-    allzip = utils.get_test_data('prot/all.zip')
-    runtmp.sourmash('sig', 'manifest', allzip, '-o', 'xyz.csv', '-f')
+    allzip = utils.get_test_data("prot/all.zip")
+    runtmp.sourmash("sig", "manifest", allzip, "-o", "xyz.csv", "-f")
 
-    manifest_fn = runtmp.output('xyz.csv')
-    with open(manifest_fn, newline='') as csvfp:
+    manifest_fn = runtmp.output("xyz.csv")
+    with open(manifest_fn, newline="") as csvfp:
         manifest = CollectionManifest.load_from_csv(csvfp)
 
     assert len(manifest) == 8
-    filenames = set( row['internal_location'] for row in manifest.rows )
-    assert 'dna-sig.noext' in filenames
+    filenames = set(row["internal_location"] for row in manifest.rows)
+    assert "dna-sig.noext" in filenames
 
 
 def test_sig_manifest_7_allzip_3(runtmp):
     # the existing manifest contains 'dna-sig.noext' whther or not -f is
     # used.
-    allzip = utils.get_test_data('prot/all.zip')
-    runtmp.sourmash('sig', 'manifest', allzip, '-o', 'xyz.csv',
-                    '--no-rebuild')
+    allzip = utils.get_test_data("prot/all.zip")
+    runtmp.sourmash("sig", "manifest", allzip, "-o", "xyz.csv", "--no-rebuild")
 
-    manifest_fn = runtmp.output('xyz.csv')
-    with open(manifest_fn, newline='') as csvfp:
+    manifest_fn = runtmp.output("xyz.csv")
+    with open(manifest_fn, newline="") as csvfp:
         manifest = CollectionManifest.load_from_csv(csvfp)
 
     assert len(manifest) == 8
-    filenames = set( row['internal_location'] for row in manifest.rows )
-    assert 'dna-sig.noext' in filenames
+    filenames = set(row["internal_location"] for row in manifest.rows)
+    assert "dna-sig.noext" in filenames
 
 
 def test_sig_manifest_8_sqldb(runtmp):
     # make a sqldb and then run sig manifest on it.
-    gcf_all = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    sqldb = runtmp.output('some.sqldb')
+    gcf_all = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    sqldb = runtmp.output("some.sqldb")
 
-    runtmp.sourmash('sig', 'cat', '-k', '31', *gcf_all, '-o', sqldb)
+    runtmp.sourmash("sig", "cat", "-k", "31", *gcf_all, "-o", sqldb)
 
     # need to use '--no-rebuild-manifest' with 'sig manifest' on sqldb,
     # because it has a manifest but not the _signatures_with_internal
@@ -3927,11 +4136,10 @@ def test_sig_manifest_8_sqldb(runtmp):
 
     # so, this should fail...
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'manifest', sqldb, '-o', 'mf.csv')
+        runtmp.sourmash("sig", "manifest", sqldb, "-o", "mf.csv")
 
     # ...and this should succeed:
-    runtmp.sourmash('sig', 'manifest', sqldb, '-o', 'mf.csv',
-                    '--no-rebuild')
+    runtmp.sourmash("sig", "manifest", sqldb, "-o", "mf.csv", "--no-rebuild")
 
     err = runtmp.last_result.err
     print(err)
@@ -3939,23 +4147,22 @@ def test_sig_manifest_8_sqldb(runtmp):
     out = runtmp.last_result.out
     print(out)
 
-    assert 'manifest contains 12 signatures total.' in err
+    assert "manifest contains 12 signatures total." in err
     assert "wrote manifest to 'mf.csv'" in err
 
-    mf = CollectionManifest.load_from_filename(runtmp.output('mf.csv'))
+    mf = CollectionManifest.load_from_filename(runtmp.output("mf.csv"))
     assert len(mf) == 12
 
 
 def test_sig_manifest_8_sqldb_out(runtmp):
     # make a zip and run manifest out on it to make a sql format manifest.
-    gcf_all = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    zipfile = runtmp.output('some.zip')
+    gcf_all = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    zipfile = runtmp.output("some.zip")
 
-    runtmp.sourmash('sig', 'cat', '-k', '31', *gcf_all, '-o', zipfile)
+    runtmp.sourmash("sig", "cat", "-k", "31", *gcf_all, "-o", zipfile)
 
     # ...and this should succeed:
-    runtmp.sourmash('sig', 'manifest', zipfile, '-o', 'mf.sqldb',
-                    '-F', 'sql')
+    runtmp.sourmash("sig", "manifest", zipfile, "-o", "mf.sqldb", "-F", "sql")
 
     err = runtmp.last_result.err
     print(err)
@@ -3963,38 +4170,46 @@ def test_sig_manifest_8_sqldb_out(runtmp):
     out = runtmp.last_result.out
     print(out)
 
-    assert 'manifest contains 12 signatures total.' in err
+    assert "manifest contains 12 signatures total." in err
     assert "wrote manifest to 'mf.sqldb'" in err
 
-    mf = CollectionManifest.load_from_filename(runtmp.output('mf.sqldb'))
+    mf = CollectionManifest.load_from_filename(runtmp.output("mf.sqldb"))
     assert len(mf) == 12
 
 
 def test_sig_kmers_1_dna(runtmp):
     # test sig kmers on dna
-    seqfile = utils.get_test_data('short.fa')
+    seqfile = utils.get_test_data("short.fa")
 
-    runtmp.sourmash('sketch', 'dna', seqfile, '-p', 'scaled=1')
-    ss = sourmash.load_one_signature(runtmp.output('short.fa.sig'))
+    runtmp.sourmash("sketch", "dna", seqfile, "-p", "scaled=1")
+    ss = sourmash.load_one_signature(runtmp.output("short.fa.sig"))
     mh = ss.minhash
-    assert mh.moltype == 'DNA'
-
-    runtmp.sourmash('sig', 'kmers', '--sig', 'short.fa.sig',
-                    '--seq', seqfile,
-                    '--save-kmers', 'short.csv',
-                    '--save-sequences', 'matched.fa')
+    assert mh.moltype == "DNA"
+
+    runtmp.sourmash(
+        "sig",
+        "kmers",
+        "--sig",
+        "short.fa.sig",
+        "--seq",
+        seqfile,
+        "--save-kmers",
+        "short.csv",
+        "--save-sequences",
+        "matched.fa",
+    )
 
     out = runtmp.last_result.out
     print(out)
     err = runtmp.last_result.err
     print(err)
 
-    assert 'total hashes in merged signature: 970' in err
-    assert 'found 970 distinct matching hashes (100.0%)' in err
+    assert "total hashes in merged signature: 970" in err
+    assert "found 970 distinct matching hashes (100.0%)" in err
 
     # check FASTA output
-    assert os.path.exists(runtmp.output('matched.fa'))
-    with screed.open(runtmp.output('matched.fa')) as f:
+    assert os.path.exists(runtmp.output("matched.fa"))
+    with screed.open(runtmp.output("matched.fa")) as f:
         records = list(f)
     assert len(records) == 1
     assert len(records[0].sequence) == 1000, len(records[0].sequence)
@@ -4005,8 +4220,8 @@ def test_sig_kmers_1_dna(runtmp):
     assert seq_mh.similarity(mh) == 1.0
 
     # check CSV output w/k-mers and hashes etc
-    assert os.path.exists(runtmp.output('short.csv'))
-    with open(runtmp.output('short.csv'), newline='') as fp:
+    assert os.path.exists(runtmp.output("short.csv"))
+    with open(runtmp.output("short.csv"), newline="") as fp:
         r = csv.DictReader(fp)
         rows = list(r)
         assert len(rows) == 970
@@ -4014,58 +4229,56 @@ def test_sig_kmers_1_dna(runtmp):
     check_mh = mh.copy_and_clear()
     check_mh2 = mh.copy_and_clear()
     for row in rows:
-        check_mh.add_sequence(row['kmer'])
-        check_mh2.add_hash(int(row['hashval']))
+        check_mh.add_sequence(row["kmer"])
+        check_mh2.add_hash(int(row["hashval"]))
     assert check_mh.similarity(mh) == 1.0
     assert check_mh2.similarity(mh) == 1.0
 
 
 def test_sig_kmers_1_dna_more_in_query(runtmp):
     # test sig kmers on dna, where query has more than matches
-    seqfile = utils.get_test_data('short.fa')
+    seqfile = utils.get_test_data("short.fa")
 
-    runtmp.sourmash('sketch', 'dna', seqfile, '-p', 'scaled=1')
-    ss = sourmash.load_one_signature(runtmp.output('short.fa.sig'))
+    runtmp.sourmash("sketch", "dna", seqfile, "-p", "scaled=1")
+    ss = sourmash.load_one_signature(runtmp.output("short.fa.sig"))
     mh = ss.minhash
-    assert mh.moltype == 'DNA'
+    assert mh.moltype == "DNA"
 
     # make a new sequence for query, with more k-mers
-    query_seqfile = runtmp.output('query.fa')
-    with open(query_seqfile, 'wt') as fp:
+    query_seqfile = runtmp.output("query.fa")
+    with open(query_seqfile, "w") as fp:
         with screed.open(seqfile) as screed_iter:
             for record in screed_iter:
                 fp.write(f">{record.name}\n{record.sequence}AGTTACGATC\n")
 
-    runtmp.sourmash('sig', 'kmers', '--sig', 'short.fa.sig',
-                    '--seq', query_seqfile)
+    runtmp.sourmash("sig", "kmers", "--sig", "short.fa.sig", "--seq", query_seqfile)
 
     out = runtmp.last_result.out
     print(out)
     err = runtmp.last_result.err
     print(err)
 
-    assert 'total hashes in merged signature: 970' in err
+    assert "total hashes in merged signature: 970" in err
     # should only find 970 overlapping hashes here --
-    assert 'found 970 distinct matching hashes (100.0%)' in err
+    assert "found 970 distinct matching hashes (100.0%)" in err
 
 
 def test_sig_kmers_1_dna_empty_seq(runtmp):
     # test sig kmers with empty query seq
-    seqfile = utils.get_test_data('short.fa')
+    seqfile = utils.get_test_data("short.fa")
 
-    runtmp.sourmash('sketch', 'dna', seqfile, '-p', 'scaled=1')
-    ss = sourmash.load_one_signature(runtmp.output('short.fa.sig'))
+    runtmp.sourmash("sketch", "dna", seqfile, "-p", "scaled=1")
+    ss = sourmash.load_one_signature(runtmp.output("short.fa.sig"))
     mh = ss.minhash
-    assert mh.moltype == 'DNA'
+    assert mh.moltype == "DNA"
 
     # make a new sequence for query, with more k-mers
-    query_seqfile = runtmp.output('query.fa')
-    with open(query_seqfile, 'wt') as fp:
+    query_seqfile = runtmp.output("query.fa")
+    with open(query_seqfile, "w"):
         pass
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'kmers', '--sig', 'short.fa.sig',
-                        '--seq', query_seqfile)
+        runtmp.sourmash("sig", "kmers", "--sig", "short.fa.sig", "--seq", query_seqfile)
 
     out = runtmp.last_result.out
     print(out)
@@ -4077,16 +4290,15 @@ def test_sig_kmers_1_dna_empty_seq(runtmp):
 
 def test_sig_kmers_1_dna_empty_sig(runtmp):
     # test sig kmers with empty query sig
-    seqfile = utils.get_test_data('short.fa')
+    seqfile = utils.get_test_data("short.fa")
 
     mh = sourmash.MinHash(ksize=31, n=0, scaled=1)
     ss = sourmash.SourmashSignature(mh, name="empty")
-    with open(runtmp.output('empty.sig'), 'wt') as fp:
+    with open(runtmp.output("empty.sig"), "w") as fp:
         sourmash.save_signatures([ss], fp)
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'kmers', '--sig', 'empty.sig',
-                        '--seq', seqfile)
+        runtmp.sourmash("sig", "kmers", "--sig", "empty.sig", "--seq", seqfile)
 
     out = runtmp.last_result.out
     print(out)
@@ -4098,51 +4310,58 @@ def test_sig_kmers_1_dna_empty_sig(runtmp):
 
 def test_sig_kmers_1_dna_single_sig(runtmp):
     # test sig kmers with a fabricated query sig with a single hash
-    seqfile = utils.get_test_data('short.fa')
+    seqfile = utils.get_test_data("short.fa")
 
     mh = sourmash.MinHash(ksize=31, n=0, scaled=1)
     mh.add_hash(1070961951490202715)
     ss = sourmash.SourmashSignature(mh, name="small")
-    with open(runtmp.output('small.sig'), 'wt') as fp:
+    with open(runtmp.output("small.sig"), "w") as fp:
         sourmash.save_signatures([ss], fp)
 
-    runtmp.sourmash('sig', 'kmers', '--sig', 'small.sig',
-                    '--seq', seqfile)
+    runtmp.sourmash("sig", "kmers", "--sig", "small.sig", "--seq", seqfile)
 
     out = runtmp.last_result.out
     print(out)
     err = runtmp.last_result.err
     print(err)
 
-    assert 'total hashes in merged signature: 1' in err
-    assert 'found 1 distinct matching hashes (100.0%)' in err
+    assert "total hashes in merged signature: 1" in err
+    assert "found 1 distinct matching hashes (100.0%)" in err
 
 
 def test_sig_kmers_1_dna_lowscaled(runtmp):
     # test sig kmers on dna with a scaled of 100, so not all k-mers
-    seqfile = utils.get_test_data('short.fa')
+    seqfile = utils.get_test_data("short.fa")
 
-    runtmp.sourmash('sketch', 'dna', seqfile, '-p', 'scaled=100')
-    ss = sourmash.load_one_signature(runtmp.output('short.fa.sig'))
+    runtmp.sourmash("sketch", "dna", seqfile, "-p", "scaled=100")
+    ss = sourmash.load_one_signature(runtmp.output("short.fa.sig"))
     mh = ss.minhash
-    assert mh.moltype == 'DNA'
-
-    runtmp.sourmash('sig', 'kmers', '--sig', 'short.fa.sig',
-                    '--seq', seqfile,
-                    '--save-kmers', 'short.csv',
-                    '--save-sequences', 'matched.fa')
+    assert mh.moltype == "DNA"
+
+    runtmp.sourmash(
+        "sig",
+        "kmers",
+        "--sig",
+        "short.fa.sig",
+        "--seq",
+        seqfile,
+        "--save-kmers",
+        "short.csv",
+        "--save-sequences",
+        "matched.fa",
+    )
 
     out = runtmp.last_result.out
     print(out)
     err = runtmp.last_result.err
     print(err)
 
-    assert 'total hashes in merged signature: 5' in err
-    assert 'found 5 distinct matching hashes (100.0%)' in err
+    assert "total hashes in merged signature: 5" in err
+    assert "found 5 distinct matching hashes (100.0%)" in err
 
     # check FASTA output
-    assert os.path.exists(runtmp.output('matched.fa'))
-    with screed.open(runtmp.output('matched.fa')) as f:
+    assert os.path.exists(runtmp.output("matched.fa"))
+    with screed.open(runtmp.output("matched.fa")) as f:
         records = list(f)
     assert len(records) == 1
     assert len(records[0].sequence) == 1000, len(records[0].sequence)
@@ -4153,8 +4372,8 @@ def test_sig_kmers_1_dna_lowscaled(runtmp):
     assert seq_mh.similarity(mh) == 1.0
 
     # check CSV output w/k-mers and hashes etc
-    assert os.path.exists(runtmp.output('short.csv'))
-    with open(runtmp.output('short.csv'), newline='') as fp:
+    assert os.path.exists(runtmp.output("short.csv"))
+    with open(runtmp.output("short.csv"), newline="") as fp:
         r = csv.DictReader(fp)
         rows = list(r)
         assert len(rows) == 5
@@ -4162,37 +4381,45 @@ def test_sig_kmers_1_dna_lowscaled(runtmp):
     check_mh = mh.copy_and_clear()
     check_mh2 = mh.copy_and_clear()
     for row in rows:
-        check_mh.add_sequence(row['kmer'])
-        check_mh2.add_hash(int(row['hashval']))
+        check_mh.add_sequence(row["kmer"])
+        check_mh2.add_hash(int(row["hashval"]))
     assert check_mh.similarity(mh) == 1.0
     assert check_mh2.similarity(mh) == 1.0
 
 
 def test_sig_kmers_1_dna_num(runtmp):
     # test sig kmers on dna with a scaled of 100, so not all k-mers
-    seqfile = utils.get_test_data('short.fa')
+    seqfile = utils.get_test_data("short.fa")
 
-    runtmp.sourmash('sketch', 'dna', seqfile, '-p', 'num=50')
-    ss = sourmash.load_one_signature(runtmp.output('short.fa.sig'))
+    runtmp.sourmash("sketch", "dna", seqfile, "-p", "num=50")
+    ss = sourmash.load_one_signature(runtmp.output("short.fa.sig"))
     mh = ss.minhash
-    assert mh.moltype == 'DNA'
-
-    runtmp.sourmash('sig', 'kmers', '--sig', 'short.fa.sig',
-                    '--seq', seqfile,
-                    '--save-kmers', 'short.csv',
-                    '--save-sequences', 'matched.fa')
+    assert mh.moltype == "DNA"
+
+    runtmp.sourmash(
+        "sig",
+        "kmers",
+        "--sig",
+        "short.fa.sig",
+        "--seq",
+        seqfile,
+        "--save-kmers",
+        "short.csv",
+        "--save-sequences",
+        "matched.fa",
+    )
 
     out = runtmp.last_result.out
     print(out)
     err = runtmp.last_result.err
     print(err)
 
-    assert 'total hashes in merged signature: 50' in err
-    assert 'found 50 distinct matching hashes (100.0%)' in err
+    assert "total hashes in merged signature: 50" in err
+    assert "found 50 distinct matching hashes (100.0%)" in err
 
     # check FASTA output
-    assert os.path.exists(runtmp.output('matched.fa'))
-    with screed.open(runtmp.output('matched.fa')) as f:
+    assert os.path.exists(runtmp.output("matched.fa"))
+    with screed.open(runtmp.output("matched.fa")) as f:
         records = list(f)
     assert len(records) == 1
     assert len(records[0].sequence) == 1000, len(records[0].sequence)
@@ -4203,8 +4430,8 @@ def test_sig_kmers_1_dna_num(runtmp):
     assert seq_mh.similarity(mh) == 1.0
 
     # check CSV output w/k-mers and hashes etc
-    assert os.path.exists(runtmp.output('short.csv'))
-    with open(runtmp.output('short.csv'), newline='') as fp:
+    assert os.path.exists(runtmp.output("short.csv"))
+    with open(runtmp.output("short.csv"), newline="") as fp:
         r = csv.DictReader(fp)
         rows = list(r)
         assert len(rows) == 50
@@ -4212,37 +4439,46 @@ def test_sig_kmers_1_dna_num(runtmp):
     check_mh = mh.copy_and_clear()
     check_mh2 = mh.copy_and_clear()
     for row in rows:
-        check_mh.add_sequence(row['kmer'])
-        check_mh2.add_hash(int(row['hashval']))
+        check_mh.add_sequence(row["kmer"])
+        check_mh2.add_hash(int(row["hashval"]))
     assert check_mh.similarity(mh) == 1.0
     assert check_mh2.similarity(mh) == 1.0
 
 
 def test_sig_kmers_1_dna_translate_protein(runtmp):
     # test sig kmers on dna
-    seqfile = utils.get_test_data('short.fa')
+    seqfile = utils.get_test_data("short.fa")
 
-    runtmp.sourmash('sketch', 'translate', seqfile, '-p', 'scaled=1')
-    ss = sourmash.load_one_signature(runtmp.output('short.fa.sig'))
+    runtmp.sourmash("sketch", "translate", seqfile, "-p", "scaled=1")
+    ss = sourmash.load_one_signature(runtmp.output("short.fa.sig"))
     mh = ss.minhash
-    assert mh.moltype == 'protein'
-
-    runtmp.sourmash('sig', 'kmers', '--sig', 'short.fa.sig',
-                    '--seq', seqfile,
-                    '--save-kmers', 'short.csv',
-                    '--save-sequences', 'matched.fa', '--translate')
+    assert mh.moltype == "protein"
+
+    runtmp.sourmash(
+        "sig",
+        "kmers",
+        "--sig",
+        "short.fa.sig",
+        "--seq",
+        seqfile,
+        "--save-kmers",
+        "short.csv",
+        "--save-sequences",
+        "matched.fa",
+        "--translate",
+    )
 
     out = runtmp.last_result.out
     print(out)
     err = runtmp.last_result.err
     print(err)
 
-    assert 'total hashes in merged signature: 1942' in err
-    assert 'found 1942 distinct matching hashes (100.0%)' in err
+    assert "total hashes in merged signature: 1942" in err
+    assert "found 1942 distinct matching hashes (100.0%)" in err
 
     # check FASTA output
-    assert os.path.exists(runtmp.output('matched.fa'))
-    with screed.open(runtmp.output('matched.fa')) as f:
+    assert os.path.exists(runtmp.output("matched.fa"))
+    with screed.open(runtmp.output("matched.fa")) as f:
         records = list(f)
     assert len(records) == 1
     assert len(records[0].sequence) == 1000, len(records[0].sequence)
@@ -4253,8 +4489,8 @@ def test_sig_kmers_1_dna_translate_protein(runtmp):
     assert seq_mh.similarity(mh) == 1.0
 
     # check CSV output w/k-mers and hashes etc
-    assert os.path.exists(runtmp.output('short.csv'))
-    with open(runtmp.output('short.csv'), newline='') as fp:
+    assert os.path.exists(runtmp.output("short.csv"))
+    with open(runtmp.output("short.csv"), newline="") as fp:
         r = csv.DictReader(fp)
         rows = list(r)
         assert len(rows) == 1942
@@ -4262,37 +4498,46 @@ def test_sig_kmers_1_dna_translate_protein(runtmp):
     check_mh = mh.copy_and_clear()
     check_mh2 = mh.copy_and_clear()
     for row in rows:
-        check_mh.add_sequence(row['kmer'])
-        check_mh2.add_hash(int(row['hashval']))
+        check_mh.add_sequence(row["kmer"])
+        check_mh2.add_hash(int(row["hashval"]))
     assert check_mh.similarity(mh) == 1.0
     assert check_mh2.similarity(mh) == 1.0
 
 
 def test_sig_kmers_1_dna_translate_dayhoff(runtmp):
     # test sig kmers on dna
-    seqfile = utils.get_test_data('short.fa')
+    seqfile = utils.get_test_data("short.fa")
 
-    runtmp.sourmash('sketch', 'translate', seqfile, '-p', 'scaled=1,dayhoff')
-    ss = sourmash.load_one_signature(runtmp.output('short.fa.sig'))
+    runtmp.sourmash("sketch", "translate", seqfile, "-p", "scaled=1,dayhoff")
+    ss = sourmash.load_one_signature(runtmp.output("short.fa.sig"))
     mh = ss.minhash
-    assert mh.moltype == 'dayhoff'
-
-    runtmp.sourmash('sig', 'kmers', '--sig', 'short.fa.sig',
-                    '--seq', seqfile,
-                    '--save-kmers', 'short.csv',
-                    '--save-sequences', 'matched.fa', '--translate')
+    assert mh.moltype == "dayhoff"
+
+    runtmp.sourmash(
+        "sig",
+        "kmers",
+        "--sig",
+        "short.fa.sig",
+        "--seq",
+        seqfile,
+        "--save-kmers",
+        "short.csv",
+        "--save-sequences",
+        "matched.fa",
+        "--translate",
+    )
 
     out = runtmp.last_result.out
     print(out)
     err = runtmp.last_result.err
     print(err)
 
-    assert 'total hashes in merged signature: 1906' in err
-    assert 'found 1906 distinct matching hashes (100.0%)' in err
+    assert "total hashes in merged signature: 1906" in err
+    assert "found 1906 distinct matching hashes (100.0%)" in err
 
     # check FASTA output
-    assert os.path.exists(runtmp.output('matched.fa'))
-    with screed.open(runtmp.output('matched.fa')) as f:
+    assert os.path.exists(runtmp.output("matched.fa"))
+    with screed.open(runtmp.output("matched.fa")) as f:
         records = list(f)
     assert len(records) == 1
     assert len(records[0].sequence) == 1000, len(records[0].sequence)
@@ -4303,8 +4548,8 @@ def test_sig_kmers_1_dna_translate_dayhoff(runtmp):
     assert seq_mh.similarity(mh) == 1.0
 
     # check CSV output w/k-mers and hashes etc
-    assert os.path.exists(runtmp.output('short.csv'))
-    with open(runtmp.output('short.csv'), newline='') as fp:
+    assert os.path.exists(runtmp.output("short.csv"))
+    with open(runtmp.output("short.csv"), newline="") as fp:
         r = csv.DictReader(fp)
         rows = list(r)
         assert len(rows) == 1906
@@ -4312,37 +4557,46 @@ def test_sig_kmers_1_dna_translate_dayhoff(runtmp):
     check_mh = mh.copy_and_clear()
     check_mh2 = mh.copy_and_clear()
     for row in rows:
-        check_mh.add_sequence(row['kmer'])
-        check_mh2.add_hash(int(row['hashval']))
+        check_mh.add_sequence(row["kmer"])
+        check_mh2.add_hash(int(row["hashval"]))
     assert check_mh.similarity(mh) == 1.0
     assert check_mh2.similarity(mh) == 1.0
 
 
 def test_sig_kmers_1_dna_translate_hp(runtmp):
     # test sig kmers on dna
-    seqfile = utils.get_test_data('short.fa')
+    seqfile = utils.get_test_data("short.fa")
 
-    runtmp.sourmash('sketch', 'translate', seqfile, '-p', 'scaled=1,hp')
-    ss = sourmash.load_one_signature(runtmp.output('short.fa.sig'))
+    runtmp.sourmash("sketch", "translate", seqfile, "-p", "scaled=1,hp")
+    ss = sourmash.load_one_signature(runtmp.output("short.fa.sig"))
     mh = ss.minhash
-    assert mh.moltype == 'hp'
-
-    runtmp.sourmash('sig', 'kmers', '--sig', 'short.fa.sig',
-                    '--seq', seqfile,
-                    '--save-kmers', 'short.csv',
-                    '--save-sequences', 'matched.fa', '--translate')
+    assert mh.moltype == "hp"
+
+    runtmp.sourmash(
+        "sig",
+        "kmers",
+        "--sig",
+        "short.fa.sig",
+        "--seq",
+        seqfile,
+        "--save-kmers",
+        "short.csv",
+        "--save-sequences",
+        "matched.fa",
+        "--translate",
+    )
 
     out = runtmp.last_result.out
     print(out)
     err = runtmp.last_result.err
     print(err)
 
-    assert 'total hashes in merged signature: 1750' in err
-    assert 'found 1750 distinct matching hashes (100.0%)' in err
+    assert "total hashes in merged signature: 1750" in err
+    assert "found 1750 distinct matching hashes (100.0%)" in err
 
     # check FASTA output
-    assert os.path.exists(runtmp.output('matched.fa'))
-    with screed.open(runtmp.output('matched.fa')) as f:
+    assert os.path.exists(runtmp.output("matched.fa"))
+    with screed.open(runtmp.output("matched.fa")) as f:
         records = list(f)
     assert len(records) == 1
     assert len(records[0].sequence) == 1000, len(records[0].sequence)
@@ -4353,8 +4607,8 @@ def test_sig_kmers_1_dna_translate_hp(runtmp):
     assert seq_mh.similarity(mh) == 1.0
 
     # check CSV output w/k-mers and hashes etc
-    assert os.path.exists(runtmp.output('short.csv'))
-    with open(runtmp.output('short.csv'), newline='') as fp:
+    assert os.path.exists(runtmp.output("short.csv"))
+    with open(runtmp.output("short.csv"), newline="") as fp:
         r = csv.DictReader(fp)
         rows = list(r)
         assert len(rows) == 1750
@@ -4362,37 +4616,45 @@ def test_sig_kmers_1_dna_translate_hp(runtmp):
     check_mh = mh.copy_and_clear()
     check_mh2 = mh.copy_and_clear()
     for row in rows:
-        check_mh.add_sequence(row['kmer'])
-        check_mh2.add_hash(int(row['hashval']))
+        check_mh.add_sequence(row["kmer"])
+        check_mh2.add_hash(int(row["hashval"]))
     assert check_mh.similarity(mh) == 1.0
     assert check_mh2.similarity(mh) == 1.0
 
 
 def test_sig_kmers_2_protein(runtmp):
     # test out sig kmers on an faa file
-    seqfile = utils.get_test_data('ecoli.faa')
+    seqfile = utils.get_test_data("ecoli.faa")
 
-    runtmp.sourmash('sketch', 'protein', seqfile, '-p', 'scaled=1')
-    ss = sourmash.load_one_signature(runtmp.output('ecoli.faa.sig'))
+    runtmp.sourmash("sketch", "protein", seqfile, "-p", "scaled=1")
+    ss = sourmash.load_one_signature(runtmp.output("ecoli.faa.sig"))
     mh = ss.minhash
-    assert mh.moltype == 'protein'
-
-    runtmp.sourmash('sig', 'kmers', '--sig', 'ecoli.faa.sig',
-                    '--seq', seqfile,
-                    '--save-kmers', 'ecoli.csv',
-                    '--save-sequences', 'matched.fa')
+    assert mh.moltype == "protein"
+
+    runtmp.sourmash(
+        "sig",
+        "kmers",
+        "--sig",
+        "ecoli.faa.sig",
+        "--seq",
+        seqfile,
+        "--save-kmers",
+        "ecoli.csv",
+        "--save-sequences",
+        "matched.fa",
+    )
 
     out = runtmp.last_result.out
     print(out)
     err = runtmp.last_result.err
     print(err)
 
-    assert 'total hashes in merged signature: 1112' in err
-    assert 'found 1112 distinct matching hashes (100.0%)' in err
+    assert "total hashes in merged signature: 1112" in err
+    assert "found 1112 distinct matching hashes (100.0%)" in err
 
     # check FASTA output
-    assert os.path.exists(runtmp.output('matched.fa'))
-    with screed.open(runtmp.output('matched.fa')) as f:
+    assert os.path.exists(runtmp.output("matched.fa"))
+    with screed.open(runtmp.output("matched.fa")) as f:
         records = list(f)
     assert len(records) == 2
     assert len(records[0].sequence) == 820, len(records[0].sequence)
@@ -4404,8 +4666,8 @@ def test_sig_kmers_2_protein(runtmp):
     assert seq_mh.similarity(mh) == 1.0
 
     # check CSV output w/k-mers and hashes etc
-    assert os.path.exists(runtmp.output('ecoli.csv'))
-    with open(runtmp.output('ecoli.csv'), newline='') as fp:
+    assert os.path.exists(runtmp.output("ecoli.csv"))
+    with open(runtmp.output("ecoli.csv"), newline="") as fp:
         r = csv.DictReader(fp)
         rows = list(r)
         assert len(rows) == 1112
@@ -4413,37 +4675,45 @@ def test_sig_kmers_2_protein(runtmp):
     check_mh = mh.copy_and_clear()
     check_mh2 = mh.copy_and_clear()
     for row in rows:
-        check_mh.add_protein(row['kmer'])
-        check_mh2.add_hash(int(row['hashval']))
+        check_mh.add_protein(row["kmer"])
+        check_mh2.add_hash(int(row["hashval"]))
     assert check_mh.similarity(mh) == 1.0
     assert check_mh2.similarity(mh) == 1.0
 
 
 def test_sig_kmers_2_dayhoff(runtmp):
     # test out sig kmers on an faa file
-    seqfile = utils.get_test_data('ecoli.faa')
+    seqfile = utils.get_test_data("ecoli.faa")
 
-    runtmp.sourmash('sketch', 'protein', seqfile, '-p', 'scaled=1,dayhoff')
-    ss = sourmash.load_one_signature(runtmp.output('ecoli.faa.sig'))
+    runtmp.sourmash("sketch", "protein", seqfile, "-p", "scaled=1,dayhoff")
+    ss = sourmash.load_one_signature(runtmp.output("ecoli.faa.sig"))
     mh = ss.minhash
-    assert mh.moltype == 'dayhoff'
-
-    runtmp.sourmash('sig', 'kmers', '--sig', 'ecoli.faa.sig',
-                    '--seq', seqfile,
-                    '--save-kmers', 'ecoli.csv',
-                    '--save-sequences', 'matched.fa')
+    assert mh.moltype == "dayhoff"
+
+    runtmp.sourmash(
+        "sig",
+        "kmers",
+        "--sig",
+        "ecoli.faa.sig",
+        "--seq",
+        seqfile,
+        "--save-kmers",
+        "ecoli.csv",
+        "--save-sequences",
+        "matched.fa",
+    )
 
     out = runtmp.last_result.out
     print(out)
     err = runtmp.last_result.err
     print(err)
 
-    assert 'total hashes in merged signature: 1100' in err
-    assert 'found 1100 distinct matching hashes (100.0%)' in err
+    assert "total hashes in merged signature: 1100" in err
+    assert "found 1100 distinct matching hashes (100.0%)" in err
 
     # check FASTA output
-    assert os.path.exists(runtmp.output('matched.fa'))
-    with screed.open(runtmp.output('matched.fa')) as f:
+    assert os.path.exists(runtmp.output("matched.fa"))
+    with screed.open(runtmp.output("matched.fa")) as f:
         records = list(f)
     assert len(records) == 2
     assert len(records[0].sequence) == 820, len(records[0].sequence)
@@ -4455,8 +4725,8 @@ def test_sig_kmers_2_dayhoff(runtmp):
     assert seq_mh.similarity(mh) == 1.0
 
     # check CSV output w/k-mers and hashes etc
-    assert os.path.exists(runtmp.output('ecoli.csv'))
-    with open(runtmp.output('ecoli.csv'), newline='') as fp:
+    assert os.path.exists(runtmp.output("ecoli.csv"))
+    with open(runtmp.output("ecoli.csv"), newline="") as fp:
         r = csv.DictReader(fp)
         rows = list(r)
         assert len(rows) == 1100
@@ -4464,37 +4734,45 @@ def test_sig_kmers_2_dayhoff(runtmp):
     check_mh = mh.copy_and_clear()
     check_mh2 = mh.copy_and_clear()
     for row in rows:
-        check_mh.add_protein(row['kmer'])
-        check_mh2.add_hash(int(row['hashval']))
+        check_mh.add_protein(row["kmer"])
+        check_mh2.add_hash(int(row["hashval"]))
     assert check_mh.similarity(mh) == 1.0
     assert check_mh2.similarity(mh) == 1.0
 
 
 def test_sig_kmers_2_hp(runtmp):
     # test out sig kmers on an faa file
-    seqfile = utils.get_test_data('ecoli.faa')
+    seqfile = utils.get_test_data("ecoli.faa")
 
-    runtmp.sourmash('sketch', 'protein', seqfile, '-p', 'scaled=1,hp')
-    ss = sourmash.load_one_signature(runtmp.output('ecoli.faa.sig'))
+    runtmp.sourmash("sketch", "protein", seqfile, "-p", "scaled=1,hp")
+    ss = sourmash.load_one_signature(runtmp.output("ecoli.faa.sig"))
     mh = ss.minhash
-    assert mh.moltype == 'hp'
-
-    runtmp.sourmash('sig', 'kmers', '--sig', 'ecoli.faa.sig',
-                    '--seq', seqfile,
-                    '--save-kmers', 'ecoli.csv',
-                    '--save-sequences', 'matched.fa')
+    assert mh.moltype == "hp"
+
+    runtmp.sourmash(
+        "sig",
+        "kmers",
+        "--sig",
+        "ecoli.faa.sig",
+        "--seq",
+        seqfile,
+        "--save-kmers",
+        "ecoli.csv",
+        "--save-sequences",
+        "matched.fa",
+    )
 
     out = runtmp.last_result.out
     print(out)
     err = runtmp.last_result.err
     print(err)
 
-    assert 'total hashes in merged signature: 1048' in err
-    assert 'found 1048 distinct matching hashes (100.0%)' in err
+    assert "total hashes in merged signature: 1048" in err
+    assert "found 1048 distinct matching hashes (100.0%)" in err
 
     # check FASTA output
-    assert os.path.exists(runtmp.output('matched.fa'))
-    with screed.open(runtmp.output('matched.fa')) as f:
+    assert os.path.exists(runtmp.output("matched.fa"))
+    with screed.open(runtmp.output("matched.fa")) as f:
         records = list(f)
     assert len(records) == 2
     assert len(records[0].sequence) == 820, len(records[0].sequence)
@@ -4506,8 +4784,8 @@ def test_sig_kmers_2_hp(runtmp):
     assert seq_mh.similarity(mh) == 1.0
 
     # check CSV output w/k-mers and hashes etc
-    assert os.path.exists(runtmp.output('ecoli.csv'))
-    with open(runtmp.output('ecoli.csv'), newline='') as fp:
+    assert os.path.exists(runtmp.output("ecoli.csv"))
+    with open(runtmp.output("ecoli.csv"), newline="") as fp:
         r = csv.DictReader(fp)
         rows = list(r)
         assert len(rows) == 1048
@@ -4515,33 +4793,33 @@ def test_sig_kmers_2_hp(runtmp):
     check_mh = mh.copy_and_clear()
     check_mh2 = mh.copy_and_clear()
     for row in rows:
-        check_mh.add_protein(row['kmer'])
-        check_mh2.add_hash(int(row['hashval']))
+        check_mh.add_protein(row["kmer"])
+        check_mh2.add_hash(int(row["hashval"]))
     assert check_mh.similarity(mh) == 1.0
     assert check_mh2.similarity(mh) == 1.0
 
 
 def test_sig_check_1(runtmp):
     # basic check functionality
-    sigfiles = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    picklist = utils.get_test_data('gather/salmonella-picklist.csv')
+    sigfiles = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    picklist = utils.get_test_data("gather/salmonella-picklist.csv")
 
-    runtmp.sourmash('sig', 'check', *sigfiles,
-                    "--picklist", f"{picklist}::manifest",
-                    "-m", "mf.csv")
+    runtmp.sourmash(
+        "sig", "check", *sigfiles, "--picklist", f"{picklist}::manifest", "-m", "mf.csv"
+    )
 
-    out_mf = runtmp.output('mf.csv')
+    out_mf = runtmp.output("mf.csv")
     assert os.path.exists(out_mf)
 
     # all should match.
-    with open(out_mf, newline='') as fp:
+    with open(out_mf, newline="") as fp:
         mf = CollectionManifest.load_from_csv(fp)
     assert len(mf) == 24
 
     idx = sourmash.load_file_as_index(out_mf)
     siglist = list(idx.signatures())
     assert len(siglist) == 24
-    ksizes = set([ ss.minhash.ksize for ss in siglist ])
+    ksizes = set([ss.minhash.ksize for ss in siglist])
     assert len(ksizes) == 3
     assert 11 in ksizes
     assert 21 in ksizes
@@ -4550,25 +4828,31 @@ def test_sig_check_1(runtmp):
 
 def test_sig_check_1_mf_csv_gz(runtmp):
     # basic check functionality, with gzipped manifest output
-    sigfiles = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    picklist = utils.get_test_data('gather/salmonella-picklist.csv')
-
-    runtmp.sourmash('sig', 'check', *sigfiles,
-                    "--picklist", f"{picklist}::manifest",
-                    "-m", "mf.csv.gz")
-
-    out_mf = runtmp.output('mf.csv.gz')
+    sigfiles = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    picklist = utils.get_test_data("gather/salmonella-picklist.csv")
+
+    runtmp.sourmash(
+        "sig",
+        "check",
+        *sigfiles,
+        "--picklist",
+        f"{picklist}::manifest",
+        "-m",
+        "mf.csv.gz",
+    )
+
+    out_mf = runtmp.output("mf.csv.gz")
     assert os.path.exists(out_mf)
 
     # all should match.
-    with gzip.open(out_mf, "rt", newline='') as fp:
+    with gzip.open(out_mf, "rt", newline="") as fp:
         mf = CollectionManifest.load_from_csv(fp)
     assert len(mf) == 24
 
     idx = sourmash.load_file_as_index(out_mf)
     siglist = list(idx.signatures())
     assert len(siglist) == 24
-    ksizes = set([ ss.minhash.ksize for ss in siglist ])
+    ksizes = set([ss.minhash.ksize for ss in siglist])
     assert len(ksizes) == 3
     assert 11 in ksizes
     assert 21 in ksizes
@@ -4577,30 +4861,36 @@ def test_sig_check_1_mf_csv_gz(runtmp):
 
 def test_sig_check_1_gz(runtmp):
     # basic check functionality with gzipped picklist
-    sigfiles = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    picklist = utils.get_test_data('gather/salmonella-picklist.csv')
-    picklist_gz = runtmp.output('salmonella.csv.gz')
+    sigfiles = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    picklist = utils.get_test_data("gather/salmonella-picklist.csv")
+    picklist_gz = runtmp.output("salmonella.csv.gz")
 
     with gzip.open(picklist_gz, "w") as outfp:
         with open(picklist, "rb") as infp:
             outfp.write(infp.read())
 
-    runtmp.sourmash('sig', 'check', *sigfiles,
-                    "--picklist", "salmonella.csv.gz::manifest",
-                    "-m", "mf.csv")
-
-    out_mf = runtmp.output('mf.csv')
+    runtmp.sourmash(
+        "sig",
+        "check",
+        *sigfiles,
+        "--picklist",
+        "salmonella.csv.gz::manifest",
+        "-m",
+        "mf.csv",
+    )
+
+    out_mf = runtmp.output("mf.csv")
     assert os.path.exists(out_mf)
 
     # all should match.
-    with open(out_mf, newline='') as fp:
+    with open(out_mf, newline="") as fp:
         mf = CollectionManifest.load_from_csv(fp)
     assert len(mf) == 24
 
     idx = sourmash.load_file_as_index(out_mf)
     siglist = list(idx.signatures())
     assert len(siglist) == 24
-    ksizes = set([ ss.minhash.ksize for ss in siglist ])
+    ksizes = set([ss.minhash.ksize for ss in siglist])
     assert len(ksizes) == 3
     assert 11 in ksizes
     assert 21 in ksizes
@@ -4609,25 +4899,32 @@ def test_sig_check_1_gz(runtmp):
 
 def test_sig_check_1_nofail(runtmp):
     # basic check functionality with --fail-if-missing
-    sigfiles = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    picklist = utils.get_test_data('gather/salmonella-picklist.csv')
-
-    runtmp.sourmash('sig', 'check', *sigfiles,
-                    "--picklist", f"{picklist}::manifest",
-                    "-m", "mf.csv", '--fail-if-missing')
-
-    out_mf = runtmp.output('mf.csv')
+    sigfiles = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    picklist = utils.get_test_data("gather/salmonella-picklist.csv")
+
+    runtmp.sourmash(
+        "sig",
+        "check",
+        *sigfiles,
+        "--picklist",
+        f"{picklist}::manifest",
+        "-m",
+        "mf.csv",
+        "--fail-if-missing",
+    )
+
+    out_mf = runtmp.output("mf.csv")
     assert os.path.exists(out_mf)
 
     # all should match.
-    with open(out_mf, newline='') as fp:
+    with open(out_mf, newline="") as fp:
         mf = CollectionManifest.load_from_csv(fp)
     assert len(mf) == 24
 
     idx = sourmash.load_file_as_index(out_mf)
     siglist = list(idx.signatures())
     assert len(siglist) == 24
-    ksizes = set([ ss.minhash.ksize for ss in siglist ])
+    ksizes = set([ss.minhash.ksize for ss in siglist])
     assert len(ksizes) == 3
     assert 11 in ksizes
     assert 21 in ksizes
@@ -4636,44 +4933,54 @@ def test_sig_check_1_nofail(runtmp):
 
 def test_sig_check_1_no_picklist(runtmp):
     # basic check functionality
-    sigfiles = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    picklist = utils.get_test_data('gather/salmonella-picklist.csv')
+    sigfiles = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    utils.get_test_data("gather/salmonella-picklist.csv")
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.sourmash('sig', 'check', *sigfiles)
+        runtmp.sourmash("sig", "check", *sigfiles)
 
     assert "No picklist provided?! Exiting." in str(exc)
 
 
-@pytest.mark.parametrize("column, coltype",
-                         (('md5', 'md5'),
-                          ('md5', 'md5prefix8'),
-                          ('name', 'name'),
-                          ('name', 'ident'),
-                          ('name', 'identprefix'),
-                          ))
+@pytest.mark.parametrize(
+    "column, coltype",
+    (
+        ("md5", "md5"),
+        ("md5", "md5prefix8"),
+        ("name", "name"),
+        ("name", "ident"),
+        ("name", "identprefix"),
+    ),
+)
 def test_sig_check_1_column(runtmp, column, coltype):
     # basic check functionality for various columns/coltypes
-    sigfiles = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    picklist = utils.get_test_data('gather/salmonella-picklist.csv')
-
-    runtmp.sourmash('sig', 'check', *sigfiles,
-                    "--picklist", f"{picklist}:{column}:{coltype}",
-                    "-m", "mf.csv",
-                    "-o", "missing.csv")
-
-    out_mf = runtmp.output('mf.csv')
+    sigfiles = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    picklist = utils.get_test_data("gather/salmonella-picklist.csv")
+
+    runtmp.sourmash(
+        "sig",
+        "check",
+        *sigfiles,
+        "--picklist",
+        f"{picklist}:{column}:{coltype}",
+        "-m",
+        "mf.csv",
+        "-o",
+        "missing.csv",
+    )
+
+    out_mf = runtmp.output("mf.csv")
     assert os.path.exists(out_mf)
 
     # all should match.
-    with open(out_mf, newline='') as fp:
+    with open(out_mf, newline="") as fp:
         mf = CollectionManifest.load_from_csv(fp)
     assert len(mf) == 24
 
     idx = sourmash.load_file_as_index(out_mf)
     siglist = list(idx.signatures())
     assert len(siglist) == 24
-    ksizes = set([ ss.minhash.ksize for ss in siglist ])
+    ksizes = set([ss.minhash.ksize for ss in siglist])
     assert len(ksizes) == 3
     assert 11 in ksizes
     assert 21 in ksizes
@@ -4682,113 +4989,133 @@ def test_sig_check_1_column(runtmp, column, coltype):
 
 def test_sig_check_1_diff_col_name(runtmp):
     # 'sig check' with 'name2' column instead of default name
-    sigfiles = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    picklist = utils.get_test_data('gather/salmonella-picklist-diffcolumn.csv')
-
-    runtmp.sourmash('sig', 'check', *sigfiles,
-                    "--picklist", f"{picklist}:name2:name",
-                    "-o", "missing.csv",
-                    '-m', 'mf.csv')
-
-    out_mf = runtmp.output('mf.csv')
+    sigfiles = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    picklist = utils.get_test_data("gather/salmonella-picklist-diffcolumn.csv")
+
+    runtmp.sourmash(
+        "sig",
+        "check",
+        *sigfiles,
+        "--picklist",
+        f"{picklist}:name2:name",
+        "-o",
+        "missing.csv",
+        "-m",
+        "mf.csv",
+    )
+
+    out_mf = runtmp.output("mf.csv")
     assert os.path.exists(out_mf)
 
-    missing_csv = runtmp.output('missing.csv')
+    missing_csv = runtmp.output("missing.csv")
     assert os.path.exists(missing_csv)
 
     # should be 24 matching manifest rows
-    with open(out_mf, newline='') as fp:
+    with open(out_mf, newline="") as fp:
         mf = CollectionManifest.load_from_csv(fp)
     assert len(mf) == 24
 
     # internal locations should match
     sigfile_set = set(sigfiles)
     for row in mf.rows:
-        assert row['internal_location'] in sigfile_set
+        assert row["internal_location"] in sigfile_set
 
     idx = sourmash.load_file_as_index(out_mf)
     siglist = list(idx.signatures())
     assert len(siglist) == 24
-    ksizes = set([ ss.minhash.ksize for ss in siglist ])
+    ksizes = set([ss.minhash.ksize for ss in siglist])
     assert len(ksizes) == 3
     assert 11 in ksizes
     assert 21 in ksizes
     assert 31 in ksizes
 
     # should be one non-matching picklist row
-    with open(missing_csv, newline='') as fp:
+    with open(missing_csv, newline="") as fp:
         rows = list(csv.reader(fp))
-    assert len(rows) == 2       # header row + data row
-    assert rows[1][0] == 'NOT THERE'
+    assert len(rows) == 2  # header row + data row
+    assert rows[1][0] == "NOT THERE"
 
 
 def test_sig_check_1_diff_col_name_zip(runtmp):
     # 'sig check' with 'name2' column instead of default name, on a zip file
-    sigfiles = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    picklist = utils.get_test_data('gather/salmonella-picklist-diffcolumn.csv')
+    sigfiles = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    picklist = utils.get_test_data("gather/salmonella-picklist-diffcolumn.csv")
 
     # first create a zip db
-    runtmp.sourmash('sig', 'cat', *sigfiles, '-o', 'gcf.zip')
+    runtmp.sourmash("sig", "cat", *sigfiles, "-o", "gcf.zip")
 
     # now run against this zip
-    runtmp.sourmash('sig', 'check', 'gcf.zip',
-                    "--picklist", f"{picklist}:name2:name",
-                    "-o", "missing.csv",
-                    '-m', 'mf.csv')
-
-    out_mf = runtmp.output('mf.csv')
+    runtmp.sourmash(
+        "sig",
+        "check",
+        "gcf.zip",
+        "--picklist",
+        f"{picklist}:name2:name",
+        "-o",
+        "missing.csv",
+        "-m",
+        "mf.csv",
+    )
+
+    out_mf = runtmp.output("mf.csv")
     assert os.path.exists(out_mf)
 
-    missing_csv = runtmp.output('missing.csv')
+    missing_csv = runtmp.output("missing.csv")
     assert os.path.exists(missing_csv)
 
     # should be 24 matching manifest rows
-    with open(out_mf, newline='') as fp:
+    with open(out_mf, newline="") as fp:
         mf = CollectionManifest.load_from_csv(fp)
     assert len(mf) == 24
 
     # internal locations should all point to zip
-    ilocs = set(( row['internal_location'] for row in mf.rows ))
+    ilocs = set(row["internal_location"] for row in mf.rows)
     assert len(ilocs) == 1
 
     # can we get 'em?
     idx = sourmash.load_file_as_index(out_mf)
     siglist = list(idx.signatures())
     assert len(siglist) == 24
-    ksizes = set([ ss.minhash.ksize for ss in siglist ])
+    ksizes = set([ss.minhash.ksize for ss in siglist])
     assert len(ksizes) == 3
     assert 11 in ksizes
     assert 21 in ksizes
     assert 31 in ksizes
 
     # should be one non-matching picklist row
-    with open(missing_csv, newline='') as fp:
+    with open(missing_csv, newline="") as fp:
         rows = list(csv.reader(fp))
-    assert len(rows) == 2       # header row + data row
-    assert rows[1][0] == 'NOT THERE'
+    assert len(rows) == 2  # header row + data row
+    assert rows[1][0] == "NOT THERE"
 
 
 def test_sig_check_1_diff_col_name_exclude(runtmp):
     # 'sig check' with 'name2' column, :exclude picklist
-    sigfiles = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    picklist = utils.get_test_data('gather/salmonella-picklist-diffcolumn.csv')
-
-    runtmp.sourmash('sig', 'check', *sigfiles,
-                    "--picklist", f"{picklist}:name2:name:exclude",
-                    '-m', 'mf.csv')
-
-    out_mf = runtmp.output('mf.csv')
+    sigfiles = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    picklist = utils.get_test_data("gather/salmonella-picklist-diffcolumn.csv")
+
+    runtmp.sourmash(
+        "sig",
+        "check",
+        *sigfiles,
+        "--picklist",
+        f"{picklist}:name2:name:exclude",
+        "-m",
+        "mf.csv",
+    )
+
+    out_mf = runtmp.output("mf.csv")
     assert os.path.exists(out_mf)
 
     # should be 12 matching manifest rows
-    with open(out_mf, newline='') as fp:
+    with open(out_mf, newline="") as fp:
         mf = CollectionManifest.load_from_csv(fp)
     assert len(mf) == 12
 
     idx = sourmash.load_file_as_index(out_mf)
     siglist = list(idx.signatures())
     assert len(siglist) == 12
-    ksizes = set([ ss.minhash.ksize for ss in siglist ])
+    ksizes = set([ss.minhash.ksize for ss in siglist])
     assert len(ksizes) == 3
     assert 11 in ksizes
     assert 21 in ksizes
@@ -4797,72 +5124,98 @@ def test_sig_check_1_diff_col_name_exclude(runtmp):
 
 def test_sig_check_1_ksize(runtmp):
     # basic check functionality with selection for ksize
-    sigfiles = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    picklist = utils.get_test_data('gather/salmonella-picklist.csv')
-
-    runtmp.sourmash('sig', 'check', *sigfiles, '-k', '31',
-                    "--picklist", f"{picklist}::manifest",
-                    "-m", "mf.csv")
-
-    out_mf = runtmp.output('mf.csv')
+    sigfiles = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    picklist = utils.get_test_data("gather/salmonella-picklist.csv")
+
+    runtmp.sourmash(
+        "sig",
+        "check",
+        *sigfiles,
+        "-k",
+        "31",
+        "--picklist",
+        f"{picklist}::manifest",
+        "-m",
+        "mf.csv",
+    )
+
+    out_mf = runtmp.output("mf.csv")
     assert os.path.exists(out_mf)
 
     # 8 of the 24 should match.
-    with open(out_mf, newline='') as fp:
+    with open(out_mf, newline="") as fp:
         mf = CollectionManifest.load_from_csv(fp)
     assert len(mf) == 8
 
     idx = sourmash.load_file_as_index(out_mf)
     siglist = list(idx.signatures())
     assert len(siglist) == 8
-    ksizes = set([ ss.minhash.ksize for ss in siglist ])
+    ksizes = set([ss.minhash.ksize for ss in siglist])
     assert len(ksizes) == 1
     assert 31 in ksizes
 
 
 def test_sig_check_1_ksize_output_sql(runtmp):
     # basic check functionality with selection for ksize
-    sigfiles = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    picklist = utils.get_test_data('gather/salmonella-picklist.csv')
-
-    runtmp.sourmash('sig', 'check', *sigfiles, '-k', '31',
-                    "--picklist", f"{picklist}::manifest",
-                    "-m", "mf.mfsql", "-F", "sql")
-
-    out_mf = runtmp.output('mf.mfsql')
+    sigfiles = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    picklist = utils.get_test_data("gather/salmonella-picklist.csv")
+
+    runtmp.sourmash(
+        "sig",
+        "check",
+        *sigfiles,
+        "-k",
+        "31",
+        "--picklist",
+        f"{picklist}::manifest",
+        "-m",
+        "mf.mfsql",
+        "-F",
+        "sql",
+    )
+
+    out_mf = runtmp.output("mf.mfsql")
     assert os.path.exists(out_mf)
 
     # 8 of the 24 should match.
     mf = CollectionManifest.load_from_filename(out_mf)
     assert len(mf) == 8
-    assert mf.conn              # check that it's a sqlite manifest! hacky...
+    assert mf.conn  # check that it's a sqlite manifest! hacky...
 
     idx = sourmash.load_file_as_index(out_mf)
     siglist = list(idx.signatures())
     assert len(siglist) == 8
-    ksizes = set([ ss.minhash.ksize for ss in siglist ])
+    ksizes = set([ss.minhash.ksize for ss in siglist])
     assert len(ksizes) == 1
     assert 31 in ksizes
 
 
 def test_sig_check_2_output_missing(runtmp):
     # output missing all as identical to input picklist
-    sigfiles = utils.get_test_data('gather/combined.sig')
-    picklist = utils.get_test_data('gather/salmonella-picklist.csv')
-
-    runtmp.sourmash('sig', 'check', sigfiles,
-                    "--picklist", f"{picklist}::manifest",
-                    "-o", "missing.csv", "-m", "mf.csv")
-
-    out_csv = runtmp.output('missing.csv')
+    sigfiles = utils.get_test_data("gather/combined.sig")
+    picklist = utils.get_test_data("gather/salmonella-picklist.csv")
+
+    runtmp.sourmash(
+        "sig",
+        "check",
+        sigfiles,
+        "--picklist",
+        f"{picklist}::manifest",
+        "-o",
+        "missing.csv",
+        "-m",
+        "mf.csv",
+    )
+
+    out_csv = runtmp.output("missing.csv")
     assert os.path.exists(out_csv)
 
-    mf_csv = runtmp.output('mf.csv')
+    mf_csv = runtmp.output("mf.csv")
     assert not os.path.exists(mf_csv)
     assert "not saving matching manifest" in runtmp.last_result.err
 
     # everything is missing with 'combined.sig'
-    with open(out_csv, newline='') as fp:
+    with open(out_csv, newline="") as fp:
         r = csv.DictReader(fp)
         rows = list(r)
 
@@ -4871,51 +5224,67 @@ def test_sig_check_2_output_missing(runtmp):
 
 def test_sig_check_2_output_missing_error_exit(runtmp):
     # output missing all as identical to input picklist
-    sigfiles = utils.get_test_data('gather/combined.sig')
-    picklist = utils.get_test_data('gather/salmonella-picklist.csv')
+    sigfiles = utils.get_test_data("gather/combined.sig")
+    picklist = utils.get_test_data("gather/salmonella-picklist.csv")
 
     # should error exit...
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'check', sigfiles,
-                        "--picklist", f"{picklist}::manifest",
-                        "-o", "missing.csv", '--fail')
+        runtmp.sourmash(
+            "sig",
+            "check",
+            sigfiles,
+            "--picklist",
+            f"{picklist}::manifest",
+            "-o",
+            "missing.csv",
+            "--fail",
+        )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
     # ...and also output stuff!
-    out_csv = runtmp.output('missing.csv')
+    out_csv = runtmp.output("missing.csv")
     assert os.path.exists(out_csv)
 
     # everything is missing with 'combined.sig'
-    with open(out_csv, newline='') as fp:
+    with open(out_csv, newline="") as fp:
         r = csv.DictReader(fp)
         rows = list(r)
 
     assert len(rows) == 24
 
 
-@pytest.mark.parametrize("column, coltype",
-                         (('md5', 'md5'),
-                          ('md5', 'md5prefix8'),
-                          ('name', 'name'),
-                          ('name', 'ident'),
-                          ('name', 'identprefix'),
-                          ))
+@pytest.mark.parametrize(
+    "column, coltype",
+    (
+        ("md5", "md5"),
+        ("md5", "md5prefix8"),
+        ("name", "name"),
+        ("name", "ident"),
+        ("name", "identprefix"),
+    ),
+)
 def test_sig_check_2_output_missing_column(runtmp, column, coltype):
     # output missing all as identical to input picklist
-    sigfiles = utils.get_test_data('gather/combined.sig')
-    picklist = utils.get_test_data('gather/salmonella-picklist.csv')
-
-    runtmp.sourmash('sig', 'check', sigfiles,
-                    "--picklist", f"{picklist}::manifest",
-                    "-o", "missing.csv")
-
-    out_csv = runtmp.output('missing.csv')
+    sigfiles = utils.get_test_data("gather/combined.sig")
+    picklist = utils.get_test_data("gather/salmonella-picklist.csv")
+
+    runtmp.sourmash(
+        "sig",
+        "check",
+        sigfiles,
+        "--picklist",
+        f"{picklist}::manifest",
+        "-o",
+        "missing.csv",
+    )
+
+    out_csv = runtmp.output("missing.csv")
     assert os.path.exists(out_csv)
 
     # everything is missing with 'combined.sig'
-    with open(out_csv, newline='') as fp:
+    with open(out_csv, newline="") as fp:
         r = csv.DictReader(fp)
         rows = list(r)
 
@@ -4924,25 +5293,33 @@ def test_sig_check_2_output_missing_column(runtmp, column, coltype):
 
 def test_sig_check_2_output_missing_exclude(runtmp):
     # 'exclude' with '-o' shouldn't work
-    sigfiles = utils.get_test_data('gather/combined.sig')
-    picklist = utils.get_test_data('gather/salmonella-picklist.csv')
+    sigfiles = utils.get_test_data("gather/combined.sig")
+    picklist = utils.get_test_data("gather/salmonella-picklist.csv")
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.sourmash('sig', 'check', sigfiles,
-                        "--picklist", f"{picklist}:name:name:exclude",
-                        "-o", "missing.csv")
-
-    assert "** ERROR: Cannot use an 'exclude' picklist with '-o/--output-missing'" in str(exc)
+        runtmp.sourmash(
+            "sig",
+            "check",
+            sigfiles,
+            "--picklist",
+            f"{picklist}:name:name:exclude",
+            "-o",
+            "missing.csv",
+        )
+
+    assert (
+        "** ERROR: Cannot use an 'exclude' picklist with '-o/--output-missing'"
+        in str(exc)
+    )
 
 
 def test_sig_check_3_no_manifest(runtmp):
     # fail check when no manifest, by default
-    sbt = utils.get_test_data('v6.sbt.zip')
-    picklist = utils.get_test_data('v6.sbt.zip.mf.csv')
+    sbt = utils.get_test_data("v6.sbt.zip")
+    picklist = utils.get_test_data("v6.sbt.zip.mf.csv")
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('sig', 'check', sbt,
-                            '--picklist', f"{picklist}::manifest")
+    with pytest.raises(SourmashCommandFailed):
+        runtmp.run_sourmash("sig", "check", sbt, "--picklist", f"{picklist}::manifest")
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
@@ -4953,12 +5330,21 @@ def test_sig_check_3_no_manifest(runtmp):
 
 def test_sig_check_3_no_manifest_ok(runtmp):
     # generate manifest if --no-require-manifest
-    sbt = utils.get_test_data('v6.sbt.zip')
-    picklist = utils.get_test_data('v6.sbt.zip.mf.csv')
-
-    runtmp.run_sourmash('sig', 'check', sbt, "--no-require-manifest",
-                        '--picklist', f"{picklist}::manifest")
+    sbt = utils.get_test_data("v6.sbt.zip")
+    picklist = utils.get_test_data("v6.sbt.zip.mf.csv")
+
+    runtmp.run_sourmash(
+        "sig",
+        "check",
+        sbt,
+        "--no-require-manifest",
+        "--picklist",
+        f"{picklist}::manifest",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
-    assert "for given picklist, found 7 matches to 7 distinct values" in runtmp.last_result.err
+    assert (
+        "for given picklist, found 7 matches to 7 distinct values"
+        in runtmp.last_result.err
+    )
diff --git a/tests/test_cmd_signature_collect.py b/tests/test_cmd_signature_collect.py
index 61f703080f..edd7c16a29 100644
--- a/tests/test_cmd_signature_collect.py
+++ b/tests/test_cmd_signature_collect.py
@@ -15,13 +15,13 @@
 
 def test_sig_collect_0_nothing(runtmp, manifest_db_format):
     # run with just output
-    ext = 'sqlmf' if manifest_db_format == 'sql' else 'csv'
-    if manifest_db_format != 'sql': return
+    ext = "sqlmf" if manifest_db_format == "sql" else "csv"
+    if manifest_db_format != "sql":
+        return
 
-    runtmp.sourmash('sig', 'collect', '-o', f'mf.{ext}',
-                    '-F', manifest_db_format)
+    runtmp.sourmash("sig", "collect", "-o", f"mf.{ext}", "-F", manifest_db_format)
 
-    manifest_fn = runtmp.output(f'mf.{ext}')
+    manifest_fn = runtmp.output(f"mf.{ext}")
     manifest = BaseCollectionManifest.load_from_filename(manifest_fn)
 
     assert len(manifest) == 0
@@ -29,124 +29,125 @@ def test_sig_collect_0_nothing(runtmp, manifest_db_format):
 
 def test_sig_collect_1_zipfile(runtmp, manifest_db_format):
     # collect a manifest from a .zip file
-    protzip = utils.get_test_data('prot/protein.zip')
+    protzip = utils.get_test_data("prot/protein.zip")
 
-    ext = 'sqlmf' if manifest_db_format == 'sql' else 'csv'
+    ext = "sqlmf" if manifest_db_format == "sql" else "csv"
 
-    runtmp.sourmash('sig', 'collect', protzip, '-o', f'mf.{ext}',
-                    '-F', manifest_db_format)
+    runtmp.sourmash(
+        "sig", "collect", protzip, "-o", f"mf.{ext}", "-F", manifest_db_format
+    )
 
-    manifest_fn = runtmp.output(f'mf.{ext}')
+    manifest_fn = runtmp.output(f"mf.{ext}")
     manifest = BaseCollectionManifest.load_from_filename(manifest_fn)
 
     assert len(manifest) == 2
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list
-    assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "16869d2c8a1d29d1c8e56f5c561e585e" in md5_list
+    assert "120d311cc785cc9d0df9dc0646b2b857" in md5_list
 
 
 def test_sig_collect_1_zipfile_csv_gz(runtmp):
     # collect a manifest from a .zip file, save to csv.gz
-    protzip = utils.get_test_data('prot/protein.zip')
+    protzip = utils.get_test_data("prot/protein.zip")
 
-    runtmp.sourmash('sig', 'collect', protzip, '-o', 'mf.csv.gz',
-                    '-F', 'csv')
+    runtmp.sourmash("sig", "collect", protzip, "-o", "mf.csv.gz", "-F", "csv")
 
-    manifest_fn = runtmp.output('mf.csv.gz')
+    manifest_fn = runtmp.output("mf.csv.gz")
 
     # gzip, yes?
-    print('XXX', manifest_fn)
-    with gzip.open(manifest_fn, 'rt', newline='') as fp:
+    print("XXX", manifest_fn)
+    with gzip.open(manifest_fn, "rt", newline="") as fp:
         fp.read()
 
     manifest = BaseCollectionManifest.load_from_filename(manifest_fn)
 
     assert len(manifest) == 2
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list
-    assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "16869d2c8a1d29d1c8e56f5c561e585e" in md5_list
+    assert "120d311cc785cc9d0df9dc0646b2b857" in md5_list
 
 
 def test_sig_collect_1_zipfile_csv_gz_roundtrip(runtmp):
     # collect a manifest from a .zip file, save to csv.gz; then load again
-    protzip = utils.get_test_data('prot/protein.zip')
+    protzip = utils.get_test_data("prot/protein.zip")
 
-    runtmp.sourmash('sig', 'collect', protzip, '-o', 'mf.csv.gz',
-                    '-F', 'csv')
+    runtmp.sourmash("sig", "collect", protzip, "-o", "mf.csv.gz", "-F", "csv")
 
-    manifest_fn = runtmp.output('mf.csv.gz')
+    manifest_fn = runtmp.output("mf.csv.gz")
 
     # gzip, yes?
-    print('XXX', manifest_fn)
-    with gzip.open(manifest_fn, 'rt', newline='') as fp:
+    print("XXX", manifest_fn)
+    with gzip.open(manifest_fn, "rt", newline="") as fp:
         fp.read()
 
     manifest = BaseCollectionManifest.load_from_filename(manifest_fn)
 
     assert len(manifest) == 2
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list
-    assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "16869d2c8a1d29d1c8e56f5c561e585e" in md5_list
+    assert "120d311cc785cc9d0df9dc0646b2b857" in md5_list
 
     # can we read a csv.gz?
-    runtmp.sourmash('sig', 'collect', 'mf.csv.gz', '-o', 'mf2.csv',
-                    '-F', 'csv')
+    runtmp.sourmash("sig", "collect", "mf.csv.gz", "-o", "mf2.csv", "-F", "csv")
 
-    manifest_fn2 = runtmp.output('mf2.csv')
+    manifest_fn2 = runtmp.output("mf2.csv")
     manifest2 = BaseCollectionManifest.load_from_filename(manifest_fn2)
 
     assert len(manifest2) == 2
-    md5_list = [ row['md5'] for row in manifest2.rows ]
-    assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list
-    assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list
-
+    md5_list = [row["md5"] for row in manifest2.rows]
+    assert "16869d2c8a1d29d1c8e56f5c561e585e" in md5_list
+    assert "120d311cc785cc9d0df9dc0646b2b857" in md5_list
 
 
 def test_sig_collect_2_exists_fail(runtmp, manifest_db_format):
     # collect a manifest from two .zip files
-    protzip = utils.get_test_data('prot/protein.zip')
-    allzip = utils.get_test_data('prot/protein.zip')
+    protzip = utils.get_test_data("prot/protein.zip")
+    allzip = utils.get_test_data("prot/protein.zip")
 
-    ext = 'sqlmf' if manifest_db_format == 'sql' else 'csv'
+    ext = "sqlmf" if manifest_db_format == "sql" else "csv"
 
-    runtmp.sourmash('sig', 'collect', protzip, '-o', f'mf.{ext}',
-                    '-F', manifest_db_format)
+    runtmp.sourmash(
+        "sig", "collect", protzip, "-o", f"mf.{ext}", "-F", manifest_db_format
+    )
 
-    manifest_fn = runtmp.output(f'mf.{ext}')
+    manifest_fn = runtmp.output(f"mf.{ext}")
     manifest = BaseCollectionManifest.load_from_filename(manifest_fn)
 
     assert len(manifest) == 2
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list
-    assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "16869d2c8a1d29d1c8e56f5c561e585e" in md5_list
+    assert "120d311cc785cc9d0df9dc0646b2b857" in md5_list
 
     # now run with same filename - should fail
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'collect', allzip, '-o', manifest_fn,
-                        '-F', manifest_db_format)
+        runtmp.sourmash(
+            "sig", "collect", allzip, "-o", manifest_fn, "-F", manifest_db_format
+        )
 
 
 def test_sig_collect_2_exists_merge(runtmp, manifest_db_format):
     # collect a manifest from two .zip files
-    protzip = utils.get_test_data('prot/protein.zip')
-    allzip = utils.get_test_data('prot/all.zip')
+    protzip = utils.get_test_data("prot/protein.zip")
+    allzip = utils.get_test_data("prot/all.zip")
 
-    ext = 'sqlmf' if manifest_db_format == 'sql' else 'csv'
+    ext = "sqlmf" if manifest_db_format == "sql" else "csv"
 
-    runtmp.sourmash('sig', 'collect', protzip, '-o', f'mf.{ext}',
-                    '-F', manifest_db_format)
+    runtmp.sourmash(
+        "sig", "collect", protzip, "-o", f"mf.{ext}", "-F", manifest_db_format
+    )
 
-    manifest_fn = runtmp.output(f'mf.{ext}')
+    manifest_fn = runtmp.output(f"mf.{ext}")
     manifest = BaseCollectionManifest.load_from_filename(manifest_fn)
 
     assert len(manifest) == 2
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list
-    assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "16869d2c8a1d29d1c8e56f5c561e585e" in md5_list
+    assert "120d311cc785cc9d0df9dc0646b2b857" in md5_list
 
     # now run with same filename - should merge
-    runtmp.sourmash('sig', 'collect', allzip, '-o', manifest_fn,
-                    '-F', manifest_db_format, '--merge')
+    runtmp.sourmash(
+        "sig", "collect", allzip, "-o", manifest_fn, "-F", manifest_db_format, "--merge"
+    )
 
     manifest = BaseCollectionManifest.load_from_filename(manifest_fn)
     assert len(manifest) == 10
@@ -154,67 +155,68 @@ def test_sig_collect_2_exists_merge(runtmp, manifest_db_format):
 
 def test_sig_collect_2_exists_sql_merge_csv(runtmp, manifest_db_format):
     # try to merge csv into sql
-    protzip = utils.get_test_data('prot/protein.zip')
-    allzip = utils.get_test_data('prot/all.zip')
+    protzip = utils.get_test_data("prot/protein.zip")
+    allzip = utils.get_test_data("prot/all.zip")
 
-    ext = 'sqlmf'
+    ext = "sqlmf"
 
     # save as sql...
-    runtmp.sourmash('sig', 'collect', protzip, '-o', f'mf.{ext}',
-                    '-F', 'sql')
+    runtmp.sourmash("sig", "collect", protzip, "-o", f"mf.{ext}", "-F", "sql")
 
-    manifest_fn = runtmp.output(f'mf.{ext}')
+    manifest_fn = runtmp.output(f"mf.{ext}")
     manifest = BaseCollectionManifest.load_from_filename(manifest_fn)
 
     assert len(manifest) == 2
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list
-    assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "16869d2c8a1d29d1c8e56f5c561e585e" in md5_list
+    assert "120d311cc785cc9d0df9dc0646b2b857" in md5_list
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'collect', allzip, '-o', manifest_fn,
-                        '-F', 'csv', '--merge')
+        runtmp.sourmash(
+            "sig", "collect", allzip, "-o", manifest_fn, "-F", "csv", "--merge"
+        )
 
     assert "ERROR loading" in runtmp.last_result.err
 
 
 def test_sig_collect_2_exists_csv_merge_sql(runtmp):
     # try to merge sql into csv
-    protzip = utils.get_test_data('prot/protein.zip')
-    allzip = utils.get_test_data('prot/all.zip')
+    protzip = utils.get_test_data("prot/protein.zip")
+    allzip = utils.get_test_data("prot/all.zip")
 
-    ext = 'csv'
+    ext = "csv"
 
     # save as csv...
-    runtmp.sourmash('sig', 'collect', protzip, '-o', f'mf.{ext}',
-                    '-F', 'csv')
+    runtmp.sourmash("sig", "collect", protzip, "-o", f"mf.{ext}", "-F", "csv")
 
-    manifest_fn = runtmp.output(f'mf.{ext}')
+    manifest_fn = runtmp.output(f"mf.{ext}")
     manifest = BaseCollectionManifest.load_from_filename(manifest_fn)
 
     assert len(manifest) == 2
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list
-    assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "16869d2c8a1d29d1c8e56f5c561e585e" in md5_list
+    assert "120d311cc785cc9d0df9dc0646b2b857" in md5_list
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'collect', allzip, '-o', manifest_fn,
-                        '-F', 'sql', '--merge')
+        runtmp.sourmash(
+            "sig", "collect", allzip, "-o", manifest_fn, "-F", "sql", "--merge"
+        )
 
     assert "ERROR loading" in runtmp.last_result.err
 
 
 def test_sig_collect_2_no_exists_merge(runtmp, manifest_db_format):
     # test 'merge' when args.output doesn't already exist => warning
-    protzip = utils.get_test_data('prot/protein.zip')
-    allzip = utils.get_test_data('prot/all.zip')
+    utils.get_test_data("prot/protein.zip")
+    allzip = utils.get_test_data("prot/all.zip")
 
-    ext = 'sqlmf' if manifest_db_format == 'sql' else 'csv'
-    manifest_fn = runtmp.output(f'mf.{ext}')
+    ext = "sqlmf" if manifest_db_format == "sql" else "csv"
+    manifest_fn = runtmp.output(f"mf.{ext}")
 
     # run with --merge but no previous:
-    runtmp.sourmash('sig', 'collect', allzip, '-o', manifest_fn,
-                    '-F', manifest_db_format, '--merge')
+    runtmp.sourmash(
+        "sig", "collect", allzip, "-o", manifest_fn, "-F", manifest_db_format, "--merge"
+    )
 
     manifest = BaseCollectionManifest.load_from_filename(manifest_fn)
     assert len(manifest) == 8
@@ -226,28 +228,37 @@ def test_sig_collect_2_no_exists_merge(runtmp, manifest_db_format):
 
 def test_sig_collect_3_multiple(runtmp, manifest_db_format):
     # collect a manifest from two .zip files
-    protzip = utils.get_test_data('prot/protein.zip')
-    hpzip = utils.get_test_data('prot/hp.zip')
-    dayzip = utils.get_test_data('prot/dayhoff.zip')
-
-    ext = 'sqlmf' if manifest_db_format == 'sql' else 'csv'
-
-    runtmp.sourmash('sig', 'collect', protzip, hpzip, dayzip,
-                    '-o', f'mf.{ext}', '-F', manifest_db_format)
-
-    manifest_fn = runtmp.output(f'mf.{ext}')
+    protzip = utils.get_test_data("prot/protein.zip")
+    hpzip = utils.get_test_data("prot/hp.zip")
+    dayzip = utils.get_test_data("prot/dayhoff.zip")
+
+    ext = "sqlmf" if manifest_db_format == "sql" else "csv"
+
+    runtmp.sourmash(
+        "sig",
+        "collect",
+        protzip,
+        hpzip,
+        dayzip,
+        "-o",
+        f"mf.{ext}",
+        "-F",
+        manifest_db_format,
+    )
+
+    manifest_fn = runtmp.output(f"mf.{ext}")
     manifest = BaseCollectionManifest.load_from_filename(manifest_fn)
 
     assert len(manifest) == 6
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list
-    assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list
-    assert 'ea2a1ad233c2908529d124a330bcb672' in md5_list
-    assert 'bb0e6d90df01b7bd5d0956a5f9e3ed12' in md5_list
-    assert 'fbca5e5211e4d58427997fd5c8343e9a' in md5_list
-    assert '1cbd888bf910f83ad8f1715509183223' in md5_list
-
-    locations = set([ row['internal_location'] for row in manifest.rows ])
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "16869d2c8a1d29d1c8e56f5c561e585e" in md5_list
+    assert "120d311cc785cc9d0df9dc0646b2b857" in md5_list
+    assert "ea2a1ad233c2908529d124a330bcb672" in md5_list
+    assert "bb0e6d90df01b7bd5d0956a5f9e3ed12" in md5_list
+    assert "fbca5e5211e4d58427997fd5c8343e9a" in md5_list
+    assert "1cbd888bf910f83ad8f1715509183223" in md5_list
+
+    locations = set([row["internal_location"] for row in manifest.rows])
     assert protzip in locations
     assert hpzip in locations
     assert dayzip in locations
@@ -256,34 +267,42 @@ def test_sig_collect_3_multiple(runtmp, manifest_db_format):
 
 def test_sig_collect_3_multiple_use_fromfile(runtmp, manifest_db_format):
     # collect a manifest from two .zip files using --from-file
-    protzip = utils.get_test_data('prot/protein.zip')
-    hpzip = utils.get_test_data('prot/hp.zip')
-    dayzip = utils.get_test_data('prot/dayhoff.zip')
+    protzip = utils.get_test_data("prot/protein.zip")
+    hpzip = utils.get_test_data("prot/hp.zip")
+    dayzip = utils.get_test_data("prot/dayhoff.zip")
 
-    ext = 'sqlmf' if manifest_db_format == 'sql' else 'csv'
+    ext = "sqlmf" if manifest_db_format == "sql" else "csv"
 
-    fromfile = runtmp.output('fromfile.txt')
-    with open(fromfile, 'wt') as fp:
+    fromfile = runtmp.output("fromfile.txt")
+    with open(fromfile, "w") as fp:
         print(protzip, file=fp)
         print(hpzip, file=fp)
         print(dayzip, file=fp)
 
-    runtmp.sourmash('sig', 'collect', '--from-file', 'fromfile.txt',
-                    '-o', f'mf.{ext}', '-F', manifest_db_format)
-
-    manifest_fn = runtmp.output(f'mf.{ext}')
+    runtmp.sourmash(
+        "sig",
+        "collect",
+        "--from-file",
+        "fromfile.txt",
+        "-o",
+        f"mf.{ext}",
+        "-F",
+        manifest_db_format,
+    )
+
+    manifest_fn = runtmp.output(f"mf.{ext}")
     manifest = BaseCollectionManifest.load_from_filename(manifest_fn)
 
     assert len(manifest) == 6
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list
-    assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list
-    assert 'ea2a1ad233c2908529d124a330bcb672' in md5_list
-    assert 'bb0e6d90df01b7bd5d0956a5f9e3ed12' in md5_list
-    assert 'fbca5e5211e4d58427997fd5c8343e9a' in md5_list
-    assert '1cbd888bf910f83ad8f1715509183223' in md5_list
-
-    locations = set([ row['internal_location'] for row in manifest.rows ])
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "16869d2c8a1d29d1c8e56f5c561e585e" in md5_list
+    assert "120d311cc785cc9d0df9dc0646b2b857" in md5_list
+    assert "ea2a1ad233c2908529d124a330bcb672" in md5_list
+    assert "bb0e6d90df01b7bd5d0956a5f9e3ed12" in md5_list
+    assert "fbca5e5211e4d58427997fd5c8343e9a" in md5_list
+    assert "1cbd888bf910f83ad8f1715509183223" in md5_list
+
+    locations = set([row["internal_location"] for row in manifest.rows])
     assert protzip in locations
     assert hpzip in locations
     assert dayzip in locations
@@ -292,23 +311,24 @@ def test_sig_collect_3_multiple_use_fromfile(runtmp, manifest_db_format):
 
 def test_sig_collect_4_multiple_from_sig(runtmp, manifest_db_format):
     # collect a manifest from sig files
-    sig43 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig43 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
-    ext = 'sqlmf' if manifest_db_format == 'sql' else 'csv'
+    ext = "sqlmf" if manifest_db_format == "sql" else "csv"
 
-    runtmp.sourmash('sig', 'collect', sig43, sig63,
-                    '-o', f'mf.{ext}', '-F', manifest_db_format)
+    runtmp.sourmash(
+        "sig", "collect", sig43, sig63, "-o", f"mf.{ext}", "-F", manifest_db_format
+    )
 
-    manifest_fn = runtmp.output(f'mf.{ext}')
+    manifest_fn = runtmp.output(f"mf.{ext}")
     manifest = BaseCollectionManifest.load_from_filename(manifest_fn)
 
     assert len(manifest) == 2
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '09a08691ce52952152f0e866a59f6261' in md5_list
-    assert '38729c6374925585db28916b82a6f513' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "09a08691ce52952152f0e866a59f6261" in md5_list
+    assert "38729c6374925585db28916b82a6f513" in md5_list
 
-    locations = set([ row['internal_location'] for row in manifest.rows ])
+    locations = set([row["internal_location"] for row in manifest.rows])
     assert sig43 in locations
     assert sig63 in locations
     assert len(locations) == 2, locations
@@ -316,89 +336,115 @@ def test_sig_collect_4_multiple_from_sig(runtmp, manifest_db_format):
 
 def test_sig_collect_4_multiple_from_sig_abspath(runtmp, manifest_db_format):
     # collect a manifest from sig files, forcing abspath
-    sig43 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-
-    shutil.copyfile(sig43, runtmp.output('47.fa.sig'))
-    shutil.copyfile(sig63, runtmp.output('63.fa.sig'))
-
-    ext = 'sqlmf' if manifest_db_format == 'sql' else 'csv'
-
-    runtmp.sourmash('sig', 'collect', '47.fa.sig', '63.fa.sig', '--abspath',
-                    '-o', f'mf.{ext}', '-F', manifest_db_format)
+    sig43 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+
+    shutil.copyfile(sig43, runtmp.output("47.fa.sig"))
+    shutil.copyfile(sig63, runtmp.output("63.fa.sig"))
+
+    ext = "sqlmf" if manifest_db_format == "sql" else "csv"
+
+    runtmp.sourmash(
+        "sig",
+        "collect",
+        "47.fa.sig",
+        "63.fa.sig",
+        "--abspath",
+        "-o",
+        f"mf.{ext}",
+        "-F",
+        manifest_db_format,
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    manifest_fn = runtmp.output(f'mf.{ext}')
+    manifest_fn = runtmp.output(f"mf.{ext}")
     manifest = BaseCollectionManifest.load_from_filename(manifest_fn)
 
     assert len(manifest) == 2
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '09a08691ce52952152f0e866a59f6261' in md5_list
-    assert '38729c6374925585db28916b82a6f513' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "09a08691ce52952152f0e866a59f6261" in md5_list
+    assert "38729c6374925585db28916b82a6f513" in md5_list
 
-    locations = set([ row['internal_location'] for row in manifest.rows ])
+    locations = set([row["internal_location"] for row in manifest.rows])
     print(locations)
     assert len(locations) == 2, locations
 
     for xx in locations:
-        assert xx.startswith('/')
+        assert xx.startswith("/")
 
 
 def test_sig_collect_4_multiple_no_abspath(runtmp, manifest_db_format):
     # collect a manifest from sig files, no abspath
-    sig43 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig43 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     # copy files to tmp, where they will not have full paths
-    shutil.copyfile(sig43, runtmp.output('47.fa.sig'))
-    shutil.copyfile(sig63, runtmp.output('63.fa.sig'))
-
-    ext = 'sqlmf' if manifest_db_format == 'sql' else 'csv'
-
-    runtmp.sourmash('sig', 'collect', '47.fa.sig', '63.fa.sig',
-                    '-o', f'mf.{ext}', '-F', manifest_db_format)
-
-    manifest_fn = runtmp.output(f'mf.{ext}')
+    shutil.copyfile(sig43, runtmp.output("47.fa.sig"))
+    shutil.copyfile(sig63, runtmp.output("63.fa.sig"))
+
+    ext = "sqlmf" if manifest_db_format == "sql" else "csv"
+
+    runtmp.sourmash(
+        "sig",
+        "collect",
+        "47.fa.sig",
+        "63.fa.sig",
+        "-o",
+        f"mf.{ext}",
+        "-F",
+        manifest_db_format,
+    )
+
+    manifest_fn = runtmp.output(f"mf.{ext}")
     manifest = BaseCollectionManifest.load_from_filename(manifest_fn)
 
     assert len(manifest) == 2
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '09a08691ce52952152f0e866a59f6261' in md5_list
-    assert '38729c6374925585db28916b82a6f513' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "09a08691ce52952152f0e866a59f6261" in md5_list
+    assert "38729c6374925585db28916b82a6f513" in md5_list
 
-    locations = set([ row['internal_location'] for row in manifest.rows ])
+    locations = set([row["internal_location"] for row in manifest.rows])
     print(locations)
     assert len(locations) == 2, locations
-    assert '47.fa.sig' in locations
-    assert '63.fa.sig' in locations
+    assert "47.fa.sig" in locations
+    assert "63.fa.sig" in locations
 
 
 def test_sig_collect_5_no_manifest_sbt_fail(runtmp, manifest_db_format):
     # collect a manifest from files that don't have one
-    sbt_zip = utils.get_test_data('v6.sbt.zip')
+    sbt_zip = utils.get_test_data("v6.sbt.zip")
 
-    ext = 'sqlmf' if manifest_db_format == 'sql' else 'csv'
+    ext = "sqlmf" if manifest_db_format == "sql" else "csv"
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'collect', sbt_zip,
-                        '-o', f'mf.{ext}', '-F', manifest_db_format)
+        runtmp.sourmash(
+            "sig", "collect", sbt_zip, "-o", f"mf.{ext}", "-F", manifest_db_format
+        )
 
 
 def test_sig_collect_5_no_manifest_sbt_succeed(runtmp, manifest_db_format):
     # generate a manifest from files that don't have one when --no-require
-    sbt_zip = utils.get_test_data('v6.sbt.zip')
-
-    ext = 'sqlmf' if manifest_db_format == 'sql' else 'csv'
-
-    runtmp.sourmash('sig', 'collect', sbt_zip, '--no-require-manifest',
-                    '-o', f'mf.{ext}', '-F', manifest_db_format)
-
-    manifest_fn = runtmp.output(f'mf.{ext}')
+    sbt_zip = utils.get_test_data("v6.sbt.zip")
+
+    ext = "sqlmf" if manifest_db_format == "sql" else "csv"
+
+    runtmp.sourmash(
+        "sig",
+        "collect",
+        sbt_zip,
+        "--no-require-manifest",
+        "-o",
+        f"mf.{ext}",
+        "-F",
+        manifest_db_format,
+    )
+
+    manifest_fn = runtmp.output(f"mf.{ext}")
     manifest = BaseCollectionManifest.load_from_filename(manifest_fn)
 
     assert len(manifest) == 7
-    locations = set([ row['internal_location'] for row in manifest.rows ])
+    locations = set([row["internal_location"] for row in manifest.rows])
     assert len(locations) == 1, locations
     assert sbt_zip in locations
diff --git a/tests/test_cmd_signature_fileinfo.py b/tests/test_cmd_signature_fileinfo.py
index 33bd649748..25e29a5b4f 100644
--- a/tests/test_cmd_signature_fileinfo.py
+++ b/tests/test_cmd_signature_fileinfo.py
@@ -16,10 +16,10 @@
 
 def test_fileinfo_1_sig(runtmp):
     # get basic info on a signature
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
 
-    shutil.copyfile(sig47, runtmp.output('sig47.sig'))
-    runtmp.run_sourmash('sig', 'fileinfo', 'sig47.sig')
+    shutil.copyfile(sig47, runtmp.output("sig47.sig"))
+    runtmp.run_sourmash("sig", "fileinfo", "sig47.sig")
 
     out = runtmp.last_result.out
     print(runtmp.last_result.out)
@@ -40,10 +40,10 @@ def test_fileinfo_1_sig(runtmp):
 
 def test_fileinfo_1_sig_summarize(runtmp):
     # get basic info on a signature with 'summarize' as alias for fileinfo
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
 
-    shutil.copyfile(sig47, runtmp.output('sig47.sig'))
-    runtmp.run_sourmash('sig', 'summarize', 'sig47.sig')
+    shutil.copyfile(sig47, runtmp.output("sig47.sig"))
+    runtmp.run_sourmash("sig", "summarize", "sig47.sig")
 
     out = runtmp.last_result.out
     print(runtmp.last_result.out)
@@ -64,10 +64,10 @@ def test_fileinfo_1_sig_summarize(runtmp):
 
 def test_fileinfo_1_sig_abund(runtmp):
     # get basic info on a signature with abundance
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
 
-    shutil.copyfile(sig47, runtmp.output('sig47.sig'))
-    runtmp.run_sourmash('sig', 'fileinfo', 'sig47.sig')
+    shutil.copyfile(sig47, runtmp.output("sig47.sig"))
+    runtmp.run_sourmash("sig", "fileinfo", "sig47.sig")
 
     out = runtmp.last_result.out
     print(runtmp.last_result.out)
@@ -88,10 +88,10 @@ def test_fileinfo_1_sig_abund(runtmp):
 
 def test_fileinfo_2_lca(runtmp):
     # get basic info on an LCA database
-    prot = utils.get_test_data('prot/protein.lca.json.gz')
+    prot = utils.get_test_data("prot/protein.lca.json.gz")
 
-    shutil.copyfile(prot, runtmp.output('protein.lca.json.gz'))
-    runtmp.run_sourmash('sig', 'fileinfo', 'protein.lca.json.gz')
+    shutil.copyfile(prot, runtmp.output("protein.lca.json.gz"))
+    runtmp.run_sourmash("sig", "fileinfo", "protein.lca.json.gz")
 
     out = runtmp.last_result.out
     print(runtmp.last_result.out)
@@ -112,10 +112,10 @@ def test_fileinfo_2_lca(runtmp):
 
 def test_fileinfo_3_sbt_zip(runtmp):
     # test on an SBT.zip
-    prot = utils.get_test_data('prot/protein.sbt.zip')
+    prot = utils.get_test_data("prot/protein.sbt.zip")
 
-    shutil.copyfile(prot, runtmp.output('protein.sbt.zip'))
-    runtmp.run_sourmash('sig', 'fileinfo', 'protein.sbt.zip')
+    shutil.copyfile(prot, runtmp.output("protein.sbt.zip"))
+    runtmp.run_sourmash("sig", "fileinfo", "protein.sbt.zip")
 
     out = runtmp.last_result.out
     print(runtmp.last_result.out)
@@ -136,10 +136,10 @@ def test_fileinfo_3_sbt_zip(runtmp):
 
 def test_fileinfo_4_zip(runtmp):
     # test on a ZipFileLinearIndex
-    prot = utils.get_test_data('prot/all.zip')
+    prot = utils.get_test_data("prot/all.zip")
 
-    shutil.copyfile(prot, runtmp.output('all.zip'))
-    runtmp.run_sourmash('sig', 'fileinfo', 'all.zip')
+    shutil.copyfile(prot, runtmp.output("all.zip"))
+    runtmp.run_sourmash("sig", "fileinfo", "all.zip")
 
     out = runtmp.last_result.out
     print(runtmp.last_result.out)
@@ -163,10 +163,10 @@ def test_fileinfo_4_zip(runtmp):
 
 def test_fileinfo_4_zip_json_out(runtmp):
     # check --json-out
-    prot = utils.get_test_data('prot/all.zip')
+    prot = utils.get_test_data("prot/all.zip")
 
-    shutil.copyfile(prot, runtmp.output('all.zip'))
-    runtmp.run_sourmash('sig', 'fileinfo', 'all.zip', '--json-out')
+    shutil.copyfile(prot, runtmp.output("all.zip"))
+    runtmp.run_sourmash("sig", "fileinfo", "all.zip", "--json-out")
 
     out = runtmp.last_result.out
     print(runtmp.last_result.out)
@@ -174,30 +174,62 @@ def test_fileinfo_4_zip_json_out(runtmp):
     # should succeed as loading as JSON, with correct info
     vals = json.loads(out)
 
-    assert vals['has_manifest']
-    assert vals['is_database']
-    assert vals['num_sketches'] == 8
-    assert vals['path_filetype'] == 'ZipFileLinearIndex'
-    assert vals['total_hashes'] == 31758
-
-    d1 = {'ksize': 19, 'moltype': 'dayhoff', 'scaled': 100, 'num': 0, 'abund': False, 'count': 2, 'n_hashes': 7945}
-    d2 = {'ksize': 19, 'moltype': 'hp', 'scaled': 100, 'num': 0, 'abund': False, 'count': 2, 'n_hashes': 5184}
-    d3 = {'ksize': 19, 'moltype': 'protein', 'scaled': 100, 'num': 0, 'abund': False, 'count': 2, 'n_hashes': 8214}
-    d4 = {'ksize': 31, 'moltype': 'DNA', 'scaled': 1000, 'num': 0, 'abund': False, 'count': 2, 'n_hashes': 10415}
-
-    assert d1 in vals['sketch_info']
-    assert d2 in vals['sketch_info']
-    assert d3 in vals['sketch_info']
-    assert d4 in vals['sketch_info']
-    assert len(vals['sketch_info']) == 4
+    assert vals["has_manifest"]
+    assert vals["is_database"]
+    assert vals["num_sketches"] == 8
+    assert vals["path_filetype"] == "ZipFileLinearIndex"
+    assert vals["total_hashes"] == 31758
+
+    d1 = {
+        "ksize": 19,
+        "moltype": "dayhoff",
+        "scaled": 100,
+        "num": 0,
+        "abund": False,
+        "count": 2,
+        "n_hashes": 7945,
+    }
+    d2 = {
+        "ksize": 19,
+        "moltype": "hp",
+        "scaled": 100,
+        "num": 0,
+        "abund": False,
+        "count": 2,
+        "n_hashes": 5184,
+    }
+    d3 = {
+        "ksize": 19,
+        "moltype": "protein",
+        "scaled": 100,
+        "num": 0,
+        "abund": False,
+        "count": 2,
+        "n_hashes": 8214,
+    }
+    d4 = {
+        "ksize": 31,
+        "moltype": "DNA",
+        "scaled": 1000,
+        "num": 0,
+        "abund": False,
+        "count": 2,
+        "n_hashes": 10415,
+    }
+
+    assert d1 in vals["sketch_info"]
+    assert d2 in vals["sketch_info"]
+    assert d3 in vals["sketch_info"]
+    assert d4 in vals["sketch_info"]
+    assert len(vals["sketch_info"]) == 4
 
 
 def test_fileinfo_4_zip_rebuild(runtmp):
     # test --rebuild
-    prot = utils.get_test_data('prot/all.zip')
+    prot = utils.get_test_data("prot/all.zip")
 
-    shutil.copyfile(prot, runtmp.output('all.zip'))
-    runtmp.run_sourmash('sig', 'fileinfo', 'all.zip', '--rebuild')
+    shutil.copyfile(prot, runtmp.output("all.zip"))
+    runtmp.run_sourmash("sig", "fileinfo", "all.zip", "--rebuild")
 
     out = runtmp.last_result.out
     print(runtmp.last_result.out)
@@ -224,12 +256,12 @@ def test_fileinfo_4_zip_rebuild(runtmp):
 
 def test_fileinfo_5_dir(runtmp):
     # test on a directory
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
 
-    os.mkdir(runtmp.output('subdir'))
+    os.mkdir(runtmp.output("subdir"))
 
-    shutil.copyfile(sig47, runtmp.output('subdir/sig47.sig'))
-    runtmp.run_sourmash('sig', 'fileinfo', 'subdir/')
+    shutil.copyfile(sig47, runtmp.output("subdir/sig47.sig"))
+    runtmp.run_sourmash("sig", "fileinfo", "subdir/")
 
     out = runtmp.last_result.out
     print(runtmp.last_result.out)
@@ -250,13 +282,13 @@ def test_fileinfo_5_dir(runtmp):
 
 def test_fileinfo_6_pathlist(runtmp):
     # test on a pathlist
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     shutil.copyfile(sig47, runtmp.output("47.fa.sig"))
 
-    with open(runtmp.output('pathlist.txt'), 'wt') as fp:
+    with open(runtmp.output("pathlist.txt"), "w") as fp:
         fp.write("47.fa.sig\n")
 
-    runtmp.run_sourmash('sig', 'fileinfo', 'pathlist.txt')
+    runtmp.run_sourmash("sig", "fileinfo", "pathlist.txt")
 
     out = runtmp.last_result.out
     print(runtmp.last_result.out)
@@ -275,13 +307,22 @@ def test_fileinfo_6_pathlist(runtmp):
         assert line.strip() in out
 
 
-@pytest.mark.parametrize("db", ['v6.sbt.json', 'v5.sbt.json', 'v4.sbt.json',
-                                'v3.sbt.json', 'v2.sbt.json', 'v1.sbt.json'])
+@pytest.mark.parametrize(
+    "db",
+    [
+        "v6.sbt.json",
+        "v5.sbt.json",
+        "v4.sbt.json",
+        "v3.sbt.json",
+        "v2.sbt.json",
+        "v1.sbt.json",
+    ],
+)
 def test_fileinfo_7_sbt_json(runtmp, db):
     # test on multiple versions of SBT JSON files
     dbfile = utils.get_test_data(db)
 
-    runtmp.run_sourmash('sig', 'fileinfo', dbfile)
+    runtmp.run_sourmash("sig", "fileinfo", dbfile)
 
     out = runtmp.last_result.out
     print(runtmp.last_result.out)
@@ -302,11 +343,13 @@ def test_fileinfo_7_sbt_json(runtmp, db):
 
 def test_sig_fileinfo_stdin(runtmp):
     # test on stdin
-    sig = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    with open(sig, 'rt') as fp:
+    sig = utils.get_test_data(
+        "prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    with open(sig) as fp:
         data = fp.read()
 
-    runtmp.run_sourmash('sig', 'fileinfo', '-', stdin_data=data)
+    runtmp.run_sourmash("sig", "fileinfo", "-", stdin_data=data)
 
     out = runtmp.last_result.out
     print(out)
@@ -328,53 +371,56 @@ def test_sig_fileinfo_stdin(runtmp):
 def test_sig_fileinfo_does_not_exist(runtmp):
     # test on file that does not exist
     with pytest.raises(SourmashCommandFailed):
-        runtmp.run_sourmash('sig', 'fileinfo', 'does-not-exist')
+        runtmp.run_sourmash("sig", "fileinfo", "does-not-exist")
 
-    assert "Cannot open 'does-not-exist' as a sourmash signature collection" in runtmp.last_result.err
+    assert (
+        "Cannot open 'does-not-exist' as a sourmash signature collection"
+        in runtmp.last_result.err
+    )
 
 
 def test_sig_fileinfo_8_manifest_works(runtmp):
     # test on a manifest with relative paths, in proper location
-    mf = utils.get_test_data('scaled/mf.csv')
-    runtmp.sourmash('sig', 'fileinfo', mf)
+    mf = utils.get_test_data("scaled/mf.csv")
+    runtmp.sourmash("sig", "fileinfo", mf)
 
     out = runtmp.last_result.out
     print(out)
 
-    assert '15 sketches with DNA, k=31, scaled=10000           717 total hashes' in out
-    assert 'num signatures: 15' in out
-    assert 'has manifest? yes' in out
-    assert 'is database? yes' in out
-    assert 'path filetype: StandaloneManifestIndex' in out
+    assert "15 sketches with DNA, k=31, scaled=10000           717 total hashes" in out
+    assert "num signatures: 15" in out
+    assert "has manifest? yes" in out
+    assert "is database? yes" in out
+    assert "path filetype: StandaloneManifestIndex" in out
 
 
 def test_sig_fileinfo_8_manifest_works_when_moved(runtmp):
     # test on a manifest with relative paths, when in wrong place
     # note: this works, unlike 'describe', because all the necessary info
     # for 'fileinfo' is in the manifest.
-    mf = utils.get_test_data('scaled/mf.csv')
-    shutil.copyfile(mf, runtmp.output('mf.csv'))
+    mf = utils.get_test_data("scaled/mf.csv")
+    shutil.copyfile(mf, runtmp.output("mf.csv"))
 
-    runtmp.sourmash('sig', 'fileinfo', 'mf.csv')
+    runtmp.sourmash("sig", "fileinfo", "mf.csv")
 
     out = runtmp.last_result.out
     print(out)
 
-    assert '15 sketches with DNA, k=31, scaled=10000           717 total hashes' in out
-    assert 'num signatures: 15' in out
-    assert 'has manifest? yes' in out
-    assert 'is database? yes' in out
-    assert 'path filetype: StandaloneManifestIndex' in out
+    assert "15 sketches with DNA, k=31, scaled=10000           717 total hashes" in out
+    assert "num signatures: 15" in out
+    assert "has manifest? yes" in out
+    assert "is database? yes" in out
+    assert "path filetype: StandaloneManifestIndex" in out
 
 
 def test_sig_fileinfo_9_sqldb_make(runtmp):
     # make a sqldb and run fileinfo on it
-    gcf_all = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    sqldb = runtmp.output('some.sqldb')
+    gcf_all = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    sqldb = runtmp.output("some.sqldb")
 
-    runtmp.sourmash('sig', 'cat', '-k', '31', *gcf_all, '-o', sqldb)
+    runtmp.sourmash("sig", "cat", "-k", "31", *gcf_all, "-o", sqldb)
 
-    runtmp.sourmash('sig', 'fileinfo', sqldb)
+    runtmp.sourmash("sig", "fileinfo", sqldb)
 
     err = runtmp.last_result.err
     print(err)
@@ -387,8 +433,8 @@ def test_sig_fileinfo_9_sqldb_make(runtmp):
 
 def test_sig_fileinfo_9_sqldb_exists(runtmp):
     # run fileinfo on existing sqldb
-    sqldb = utils.get_test_data('sqlite/index.sqldb')
-    runtmp.sourmash('sig', 'fileinfo', sqldb)
+    sqldb = utils.get_test_data("sqlite/index.sqldb")
+    runtmp.sourmash("sig", "fileinfo", sqldb)
 
     err = runtmp.last_result.err
     print(err)
@@ -397,13 +443,15 @@ def test_sig_fileinfo_9_sqldb_exists(runtmp):
     print(out)
 
     assert "path filetype: SqliteIndex" in out
-    assert "2 sketches with DNA, k=31, scaled=1000             10415 total hashes" in out
+    assert (
+        "2 sketches with DNA, k=31, scaled=1000             10415 total hashes" in out
+    )
 
 
 def test_sig_fileinfo_9_sql_manifest(runtmp):
     # run fileinfo on existing sqldb
-    sqldb = utils.get_test_data('sqlite/prot.sqlmf')
-    runtmp.sourmash('sig', 'fileinfo', sqldb)
+    sqldb = utils.get_test_data("sqlite/prot.sqlmf")
+    runtmp.sourmash("sig", "fileinfo", sqldb)
 
     err = runtmp.last_result.err
     print(err)
@@ -421,8 +469,8 @@ def test_sig_fileinfo_9_sql_manifest(runtmp):
 
 def test_sig_fileinfo_9_sql_lca_db(runtmp):
     # run fileinfo on existing sqldb
-    sqldb = utils.get_test_data('sqlite/lca.sqldb')
-    runtmp.sourmash('sig', 'fileinfo', sqldb)
+    sqldb = utils.get_test_data("sqlite/lca.sqldb")
+    runtmp.sourmash("sig", "fileinfo", sqldb)
 
     err = runtmp.last_result.err
     print(err)
diff --git a/tests/test_cmd_signature_grep.py b/tests/test_cmd_signature_grep.py
index 17dd5ee2dc..fa1a5b7dfb 100644
--- a/tests/test_cmd_signature_grep.py
+++ b/tests/test_cmd_signature_grep.py
@@ -18,299 +18,308 @@
 
 def test_grep_1_sig_name(runtmp):
     # search on substring in name
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
 
-    runtmp.run_sourmash('sig', 'grep', 'Shewanella', sig47)
+    runtmp.run_sourmash("sig", "grep", "Shewanella", sig47)
 
     out = runtmp.last_result.out
     ss = load_signatures(out)
     ss = list(ss)
     assert len(ss) == 1
     ss = ss[0]
-    assert 'Shewanella' in ss.name
-    assert ss.md5sum() == '09a08691ce52952152f0e866a59f6261'
+    assert "Shewanella" in ss.name
+    assert ss.md5sum() == "09a08691ce52952152f0e866a59f6261"
 
 
 def test_grep_1_sig_name_case_sensitive(runtmp):
     # search on substring in name
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.run_sourmash('sig', 'grep', 'shewanella', sig47)
+        runtmp.run_sourmash("sig", "grep", "shewanella", sig47)
 
 
 def test_grep_1_sig_name_case_insensitive(runtmp):
     # search on substring in name, case insensitive
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
 
-    runtmp.run_sourmash('sig', 'grep', '-i', 'shewanella', sig47)
+    runtmp.run_sourmash("sig", "grep", "-i", "shewanella", sig47)
 
     out = runtmp.last_result.out
     ss = load_signatures(out)
     ss = list(ss)
     assert len(ss) == 1
     ss = ss[0]
-    assert 'Shewanella' in ss.name
-    assert ss.md5sum() == '09a08691ce52952152f0e866a59f6261'
+    assert "Shewanella" in ss.name
+    assert ss.md5sum() == "09a08691ce52952152f0e866a59f6261"
 
 
 def test_grep_1_sig_name_exclude(runtmp):
     # search on substring in name, case insensitive
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
 
     # no matches!
     with pytest.raises(SourmashCommandFailed):
-        runtmp.run_sourmash('sig', 'grep', '-v', 'Shewanella', sig47)
+        runtmp.run_sourmash("sig", "grep", "-v", "Shewanella", sig47)
 
 
 def test_grep_2_sig_md5(runtmp):
     # search on substring in md5
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
 
-    runtmp.run_sourmash('sig', 'grep', 'ce52952152f0', sig47)
+    runtmp.run_sourmash("sig", "grep", "ce52952152f0", sig47)
 
     out = runtmp.last_result.out
     ss = load_signatures(out)
     ss = list(ss)
     assert len(ss) == 1
     ss = ss[0]
-    assert ss.md5sum() == '09a08691ce52952152f0e866a59f6261'
+    assert ss.md5sum() == "09a08691ce52952152f0e866a59f6261"
 
 
 def test_grep_2_sig_md5_case_sensitive(runtmp):
     # case sensitive no match
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.run_sourmash('sig', 'grep', 'CE52952152f0', sig47)
+        runtmp.run_sourmash("sig", "grep", "CE52952152f0", sig47)
 
 
 def test_grep_2_sig_md5_case_insensitive(runtmp):
     # search on substring in md5, case insensitive
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
 
-    runtmp.run_sourmash('sig', 'grep', '-i', 'CE52952152f0', sig47)
+    runtmp.run_sourmash("sig", "grep", "-i", "CE52952152f0", sig47)
 
     out = runtmp.last_result.out
     ss = load_signatures(out)
     ss = list(ss)
     assert len(ss) == 1
     ss = ss[0]
-    assert ss.md5sum() == '09a08691ce52952152f0e866a59f6261'
+    assert ss.md5sum() == "09a08691ce52952152f0e866a59f6261"
 
 
 def test_grep_3_filename(runtmp):
     # filename match
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
 
-    runtmp.run_sourmash('sig', 'grep', '47.fa', sig47)
+    runtmp.run_sourmash("sig", "grep", "47.fa", sig47)
 
     out = runtmp.last_result.out
     ss = load_signatures(out)
     ss = list(ss)
     assert len(ss) == 1
     ss = ss[0]
-    assert '47.fa' in ss.filename
-    assert ss.md5sum() == '09a08691ce52952152f0e866a59f6261'
+    assert "47.fa" in ss.filename
+    assert ss.md5sum() == "09a08691ce52952152f0e866a59f6261"
 
 
 def test_grep_3_filename_regexp(runtmp):
     # search for a regexp on filename
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
 
-    runtmp.run_sourmash('sig', 'grep', '^47.fa', sig47)
+    runtmp.run_sourmash("sig", "grep", "^47.fa", sig47)
 
     out = runtmp.last_result.out
     ss = load_signatures(out)
     ss = list(ss)
     assert len(ss) == 1
     ss = ss[0]
-    assert '7.fa' in ss.filename
-    assert ss.md5sum() == '09a08691ce52952152f0e866a59f6261'
+    assert "7.fa" in ss.filename
+    assert ss.md5sum() == "09a08691ce52952152f0e866a59f6261"
 
 
 def test_grep_4_no_manifest(runtmp):
     # fail search when no manifest, by default
-    sbt = utils.get_test_data('v6.sbt.zip')
+    sbt = utils.get_test_data("v6.sbt.zip")
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('sig', 'grep', 'e60265', sbt)
+    with pytest.raises(SourmashCommandFailed):
+        runtmp.run_sourmash("sig", "grep", "e60265", sbt)
 
     print(runtmp.last_result.err)
-    assert 'ERROR on filename' in runtmp.last_result.err
-    assert 'sig grep requires a manifest by default, but no manifest present.' in runtmp.last_result.err
+    assert "ERROR on filename" in runtmp.last_result.err
+    assert (
+        "sig grep requires a manifest by default, but no manifest present."
+        in runtmp.last_result.err
+    )
 
 
 def test_grep_4_no_manifest_ok(runtmp):
     # generate manifest if --no-require-manifest
-    sbt = utils.get_test_data('v6.sbt.zip')
+    sbt = utils.get_test_data("v6.sbt.zip")
 
-    runtmp.run_sourmash('sig', 'grep', 'e60265', sbt, '--no-require-manifest')
+    runtmp.run_sourmash("sig", "grep", "e60265", sbt, "--no-require-manifest")
 
     ss = load_signatures(runtmp.last_result.out)
     ss = list(ss)
     assert len(ss) == 1
     ss = ss[0]
-    assert 'e60265' in ss.md5sum()
+    assert "e60265" in ss.md5sum()
 
 
 def test_grep_5_zip_include(runtmp):
     # search zip, include on case sensitive match to name
-    allzip = utils.get_test_data('prot/all.zip')
+    allzip = utils.get_test_data("prot/all.zip")
 
-    runtmp.run_sourmash('sig', 'grep', '--dna', 'OS223', allzip)
+    runtmp.run_sourmash("sig", "grep", "--dna", "OS223", allzip)
 
     out = runtmp.last_result.out
     ss = load_signatures(out)
     ss = list(ss)
     assert len(ss) == 1
     ss = ss[0]
-    assert 'Shewanella baltica OS223' in ss.name
-    assert ss.md5sum() == '38729c6374925585db28916b82a6f513'
+    assert "Shewanella baltica OS223" in ss.name
+    assert ss.md5sum() == "38729c6374925585db28916b82a6f513"
 
 
 def test_grep_5_zip_include_picklist(runtmp):
     # search zip, include on case sensitive match to name
-    allzip = utils.get_test_data('prot/all.zip')
+    allzip = utils.get_test_data("prot/all.zip")
 
-    pickfile = runtmp.output('pick.csv')
-    with open(pickfile, 'w', newline="") as fp:
-        w = csv.DictWriter(fp, fieldnames=['md5'])
+    pickfile = runtmp.output("pick.csv")
+    with open(pickfile, "w", newline="") as fp:
+        w = csv.DictWriter(fp, fieldnames=["md5"])
         w.writeheader()
-        w.writerow(dict(md5='09a08691ce52952152f0e866a59f6261'))
-        w.writerow(dict(md5='38729c6374925585db28916b82a6f513'))
+        w.writerow(dict(md5="09a08691ce52952152f0e866a59f6261"))
+        w.writerow(dict(md5="38729c6374925585db28916b82a6f513"))
 
-    runtmp.run_sourmash('sig', 'grep', '--dna', 'OS223', allzip,
-                        '--picklist', f"{pickfile}:md5:md5")
+    runtmp.run_sourmash(
+        "sig", "grep", "--dna", "OS223", allzip, "--picklist", f"{pickfile}:md5:md5"
+    )
 
     out = runtmp.last_result.out
     print(out)
     err = runtmp.last_result.err
     print(err)
-    assert 'for given picklist, found 2 matches to 2 distinct values' in err
+    assert "for given picklist, found 2 matches to 2 distinct values" in err
 
     ss = load_signatures(out)
     ss = list(ss)
     assert len(ss) == 1
     ss = ss[0]
-    assert 'Shewanella baltica OS223' in ss.name
-    assert ss.md5sum() == '38729c6374925585db28916b82a6f513'
+    assert "Shewanella baltica OS223" in ss.name
+    assert ss.md5sum() == "38729c6374925585db28916b82a6f513"
 
 
 def test_grep_5_zip_include_case_insensitive(runtmp):
     # search zip, include on case insensitive match to name
-    allzip = utils.get_test_data('prot/all.zip')
+    allzip = utils.get_test_data("prot/all.zip")
 
-    runtmp.run_sourmash('sig', 'grep', '--dna', '-i', 'os223', allzip)
+    runtmp.run_sourmash("sig", "grep", "--dna", "-i", "os223", allzip)
 
     out = runtmp.last_result.out
     ss = load_signatures(out)
     ss = list(ss)
     assert len(ss) == 1
     ss = ss[0]
-    assert 'Shewanella baltica OS223' in ss.name
-    assert ss.md5sum() == '38729c6374925585db28916b82a6f513'
+    assert "Shewanella baltica OS223" in ss.name
+    assert ss.md5sum() == "38729c6374925585db28916b82a6f513"
 
 
 def test_grep_5_zip_exclude(runtmp):
     # search zip, exclude on case-sensitive match
-    allzip = utils.get_test_data('prot/all.zip')
+    allzip = utils.get_test_data("prot/all.zip")
 
-    runtmp.run_sourmash('sig', 'grep', '--dna', '-v', 'OS185', allzip)
+    runtmp.run_sourmash("sig", "grep", "--dna", "-v", "OS185", allzip)
 
     out = runtmp.last_result.out
     ss = load_signatures(out)
     ss = list(ss)
     assert len(ss) == 1
     ss = ss[0]
-    assert 'Shewanella baltica OS223' in ss.name
-    assert ss.md5sum() == '38729c6374925585db28916b82a6f513'
+    assert "Shewanella baltica OS223" in ss.name
+    assert ss.md5sum() == "38729c6374925585db28916b82a6f513"
 
 
 def test_grep_5_zip_exclude_case_insensitive(runtmp):
     # search zip, exclude on case-insensitive match
-    allzip = utils.get_test_data('prot/all.zip')
+    allzip = utils.get_test_data("prot/all.zip")
 
-    runtmp.run_sourmash('sig', 'grep', '--dna', '-vi', 'os185', allzip)
+    runtmp.run_sourmash("sig", "grep", "--dna", "-vi", "os185", allzip)
 
     out = runtmp.last_result.out
     ss = load_signatures(out)
     ss = list(ss)
     assert len(ss) == 1
     ss = ss[0]
-    assert 'Shewanella baltica OS223' in ss.name
-    assert ss.md5sum() == '38729c6374925585db28916b82a6f513'
+    assert "Shewanella baltica OS223" in ss.name
+    assert ss.md5sum() == "38729c6374925585db28916b82a6f513"
 
 
 def test_grep_6_zip_manifest_csv(runtmp):
     # do --csv and use result as picklist
-    allzip = utils.get_test_data('prot/all.zip')
+    allzip = utils.get_test_data("prot/all.zip")
 
-    runtmp.run_sourmash('sig', 'grep', '--dna', 'OS223', allzip,
-                        '--csv', 'match.csv')
+    runtmp.run_sourmash("sig", "grep", "--dna", "OS223", allzip, "--csv", "match.csv")
 
     out = runtmp.last_result.out
     ss = load_signatures(out)
     ss = list(ss)
     assert len(ss) == 1
     ss = ss[0]
-    assert 'Shewanella baltica OS223' in ss.name
-    assert ss.md5sum() == '38729c6374925585db28916b82a6f513'
+    assert "Shewanella baltica OS223" in ss.name
+    assert ss.md5sum() == "38729c6374925585db28916b82a6f513"
 
     # now run cat with picklist
-    runtmp.run_sourmash('sig', 'cat', allzip,
-                        '--picklist', 'match.csv::manifest')
+    runtmp.run_sourmash("sig", "cat", allzip, "--picklist", "match.csv::manifest")
 
     out = runtmp.last_result.out
     ss = load_signatures(out)
     ss = list(ss)
     assert len(ss) == 1
     ss = ss[0]
-    assert 'Shewanella baltica OS223' in ss.name
-    assert ss.md5sum() == '38729c6374925585db28916b82a6f513'
+    assert "Shewanella baltica OS223" in ss.name
+    assert ss.md5sum() == "38729c6374925585db28916b82a6f513"
 
 
 def test_grep_6_zip_manifest_csv_gz(runtmp):
     # do --csv and use result as picklist
-    allzip = utils.get_test_data('prot/all.zip')
+    allzip = utils.get_test_data("prot/all.zip")
 
-    runtmp.run_sourmash('sig', 'grep', '--dna', 'OS223', allzip,
-                        '--csv', 'match.csv.gz')
+    runtmp.run_sourmash(
+        "sig", "grep", "--dna", "OS223", allzip, "--csv", "match.csv.gz"
+    )
 
     out = runtmp.last_result.out
     ss = load_signatures(out)
     ss = list(ss)
     assert len(ss) == 1
     ss = ss[0]
-    assert 'Shewanella baltica OS223' in ss.name
-    assert ss.md5sum() == '38729c6374925585db28916b82a6f513'
+    assert "Shewanella baltica OS223" in ss.name
+    assert ss.md5sum() == "38729c6374925585db28916b82a6f513"
 
     # check that match.csv.gz is a gzip file
-    with gzip.open(runtmp.output('match.csv.gz'), 'rt', newline='') as fp:
+    with gzip.open(runtmp.output("match.csv.gz"), "rt", newline="") as fp:
         fp.read()
 
     # now run cat with picklist
-    runtmp.run_sourmash('sig', 'cat', allzip,
-                        '--picklist', 'match.csv.gz::manifest')
+    runtmp.run_sourmash("sig", "cat", allzip, "--picklist", "match.csv.gz::manifest")
 
     out = runtmp.last_result.out
     ss = load_signatures(out)
     ss = list(ss)
     assert len(ss) == 1
     ss = ss[0]
-    assert 'Shewanella baltica OS223' in ss.name
-    assert ss.md5sum() == '38729c6374925585db28916b82a6f513'
+    assert "Shewanella baltica OS223" in ss.name
+    assert ss.md5sum() == "38729c6374925585db28916b82a6f513"
 
 
 def test_sig_grep_7_lca(runtmp):
     # extract 47 from an LCA database, with --no-require-manifest
-    allzip = utils.get_test_data('lca/47+63.lca.json')
-    sig47 = utils.get_test_data('47.fa.sig')
-
-    runtmp.sourmash('sig', 'grep', "50a9274021e4", allzip,
-                    '--no-require-manifest', '-o', 'matches.sig')
-
-    match = sourmash.load_file_as_signatures(runtmp.output('matches.sig'))
+    allzip = utils.get_test_data("lca/47+63.lca.json")
+    sig47 = utils.get_test_data("47.fa.sig")
+
+    runtmp.sourmash(
+        "sig",
+        "grep",
+        "50a9274021e4",
+        allzip,
+        "--no-require-manifest",
+        "-o",
+        "matches.sig",
+    )
+
+    match = sourmash.load_file_as_signatures(runtmp.output("matches.sig"))
     match = list(match)[0]
 
     ss47 = sourmash.load_file_as_signatures(sig47)
@@ -324,50 +333,63 @@ def test_sig_grep_7_lca(runtmp):
 
 def test_sig_grep_7_picklist_md5_lca_fail(runtmp):
     # extract 47 from an LCA database, using a picklist w/full md5 => fail
-    allzip = utils.get_test_data('lca/47+63.lca.json')
+    allzip = utils.get_test_data("lca/47+63.lca.json")
 
     # select on any of these attributes
-    row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
-               md5full='50a9274021e43eda8b2e77f8fa60ae8e',
-               md5short='50a9274021e43eda8b2e77f8fa60ae8e'[:8],
-               fullIdent='NC_009665.1',
-               nodotIdent='NC_009665')
+    row = dict(
+        exactName="NC_009665.1 Shewanella baltica OS185, complete genome",
+        md5full="50a9274021e43eda8b2e77f8fa60ae8e",
+        md5short="50a9274021e43eda8b2e77f8fa60ae8e"[:8],
+        fullIdent="NC_009665.1",
+        nodotIdent="NC_009665",
+    )
 
     # make picklist
-    picklist_csv = runtmp.output('pick.csv')
-    with open(picklist_csv, 'w', newline='') as csvfp:
+    picklist_csv = runtmp.output("pick.csv")
+    with open(picklist_csv, "w", newline="") as csvfp:
         w = csv.DictWriter(csvfp, fieldnames=row.keys())
         w.writeheader()
         w.writerow(row)
 
     picklist_arg = f"{picklist_csv}:md5full:md5"
-    with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.sourmash('sig', 'grep', '50a92740', allzip,
-                        '--picklist', picklist_arg,
-                        '--no-require-manifest')
+    with pytest.raises(SourmashCommandFailed):
+        runtmp.sourmash(
+            "sig",
+            "grep",
+            "50a92740",
+            allzip,
+            "--picklist",
+            picklist_arg,
+            "--no-require-manifest",
+        )
 
     # this happens b/c the implementation of 'grep' uses picklists, and
     # LCA databases don't support multiple picklists.
     print(runtmp.last_result.err)
-    assert "This input collection doesn't support 'grep' with picklists." in runtmp.last_result.err
+    assert (
+        "This input collection doesn't support 'grep' with picklists."
+        in runtmp.last_result.err
+    )
 
 
 def test_sig_grep_8_count(runtmp):
-    zips = ['prot/all.zip',
-            'prot/dayhoff.sbt.zip',
-            'prot/dayhoff.zip',
-            'prot/hp.sbt.zip',
-            'prot/hp.zip',
-            'prot/protein.sbt.zip',
-            'prot/protein.zip']
-
-    zip_src = [ utils.get_test_data(x) for x in zips ]
-
-    os.mkdir(runtmp.output('prot'))
+    zips = [
+        "prot/all.zip",
+        "prot/dayhoff.sbt.zip",
+        "prot/dayhoff.zip",
+        "prot/hp.sbt.zip",
+        "prot/hp.zip",
+        "prot/protein.sbt.zip",
+        "prot/protein.zip",
+    ]
+
+    zip_src = [utils.get_test_data(x) for x in zips]
+
+    os.mkdir(runtmp.output("prot"))
     for src, dest in zip(zip_src, zips):
         shutil.copyfile(src, runtmp.output(dest))
-    
-    runtmp.sourmash('sig', 'grep', '-c', '0015939', *zips)
+
+    runtmp.sourmash("sig", "grep", "-c", "0015939", *zips)
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -391,23 +413,23 @@ def test_sig_grep_8_count(runtmp):
 
 def test_sig_grep_identical_md5s(runtmp):
     # test that we properly handle different signatures with identical md5s
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss = load_signatures(sig47)
     sig = list(ss)[0]
     new_sig = sig.to_mutable()
-    new_sig.name = 'foo'
-    sig47foo = runtmp.output('foo.sig')
+    new_sig.name = "foo"
+    sig47foo = runtmp.output("foo.sig")
     # this was only a problem when the signatures are stored in the same file
-    with open(sig47foo, 'wt') as fp:
+    with open(sig47foo, "w") as fp:
         sourmash.save_signatures([new_sig, sig], fp)
 
-    runtmp.run_sourmash('sig', 'grep', '-i', 'foo', sig47foo)
+    runtmp.run_sourmash("sig", "grep", "-i", "foo", sig47foo)
 
     out = runtmp.last_result.out
     ss = load_signatures(out)
     ss = list(ss)
     assert len(ss) == 1
     ss = ss[0]
-    assert 'Shewanella' not in ss.name
-    assert 'foo' in ss.name
-    assert ss.md5sum() == '09a08691ce52952152f0e866a59f6261'
+    assert "Shewanella" not in ss.name
+    assert "foo" in ss.name
+    assert ss.md5sum() == "09a08691ce52952152f0e866a59f6261"
diff --git a/tests/test_compare.py b/tests/test_compare.py
index bc25e98e3c..9821295cac 100644
--- a/tests/test_compare.py
+++ b/tests/test_compare.py
@@ -5,9 +5,14 @@
 import pytest
 
 import sourmash
-from sourmash.compare import (compare_all_pairs, compare_parallel,
-                              compare_serial, compare_serial_containment,
-                              compare_serial_max_containment, compare_serial_avg_containment)
+from sourmash.compare import (
+    compare_all_pairs,
+    compare_parallel,
+    compare_serial,
+    compare_serial_containment,
+    compare_serial_max_containment,
+    compare_serial_avg_containment,
+)
 import sourmash_tst_utils as utils
 
 
@@ -44,66 +49,90 @@ def test_compare_serial(siglist, ignore_abundance):
     similarities = compare_serial(siglist, ignore_abundance, downsample=False)
 
     true_similarities = np.array(
-        [[1., 0.356, 0.078, 0.086, 0., 0., 0.],
-         [0.356, 1., 0.072, 0.078, 0., 0., 0.],
-         [0.078, 0.072, 1., 0.074, 0., 0., 0.],
-         [0.086, 0.078, 0.074, 1., 0., 0., 0.],
-         [0., 0., 0., 0., 1., 0.382, 0.364],
-         [0., 0., 0., 0., 0.382, 1., 0.386],
-         [0., 0., 0., 0., 0.364, 0.386, 1.]])
+        [
+            [1.0, 0.356, 0.078, 0.086, 0.0, 0.0, 0.0],
+            [0.356, 1.0, 0.072, 0.078, 0.0, 0.0, 0.0],
+            [0.078, 0.072, 1.0, 0.074, 0.0, 0.0, 0.0],
+            [0.086, 0.078, 0.074, 1.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 0.0, 0.0, 1.0, 0.382, 0.364],
+            [0.0, 0.0, 0.0, 0.0, 0.382, 1.0, 0.386],
+            [0.0, 0.0, 0.0, 0.0, 0.364, 0.386, 1.0],
+        ]
+    )
 
     np.testing.assert_array_equal(similarities, true_similarities)
 
 
 def test_compare_parallel(siglist, ignore_abundance):
-    similarities = compare_parallel(siglist, ignore_abundance, downsample=False, n_jobs=2)
+    similarities = compare_parallel(
+        siglist, ignore_abundance, downsample=False, n_jobs=2
+    )
 
     true_similarities = np.array(
-        [[1., 0.356, 0.078, 0.086, 0., 0., 0.],
-         [0.356, 1., 0.072, 0.078, 0., 0., 0.],
-         [0.078, 0.072, 1., 0.074, 0., 0., 0.],
-         [0.086, 0.078, 0.074, 1., 0., 0., 0.],
-         [0., 0., 0., 0., 1., 0.382, 0.364],
-         [0., 0., 0., 0., 0.382, 1., 0.386],
-         [0., 0., 0., 0., 0.364, 0.386, 1.]])
+        [
+            [1.0, 0.356, 0.078, 0.086, 0.0, 0.0, 0.0],
+            [0.356, 1.0, 0.072, 0.078, 0.0, 0.0, 0.0],
+            [0.078, 0.072, 1.0, 0.074, 0.0, 0.0, 0.0],
+            [0.086, 0.078, 0.074, 1.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 0.0, 0.0, 1.0, 0.382, 0.364],
+            [0.0, 0.0, 0.0, 0.0, 0.382, 1.0, 0.386],
+            [0.0, 0.0, 0.0, 0.0, 0.364, 0.386, 1.0],
+        ]
+    )
 
     np.testing.assert_array_equal(similarities, true_similarities)
 
 
 def test_compare_all_pairs(siglist, ignore_abundance):
-    similarities_parallel = compare_all_pairs(siglist, ignore_abundance, downsample=False, n_jobs=2)
+    similarities_parallel = compare_all_pairs(
+        siglist, ignore_abundance, downsample=False, n_jobs=2
+    )
     similarities_serial = compare_serial(siglist, ignore_abundance, downsample=False)
     np.testing.assert_array_equal(similarities_parallel, similarities_serial)
 
 
 def test_compare_serial_jaccardANI(scaled_siglist, ignore_abundance):
-    jANI = compare_serial(scaled_siglist, ignore_abundance, downsample=False, return_ani=True)
+    jANI = compare_serial(
+        scaled_siglist, ignore_abundance, downsample=False, return_ani=True
+    )
     print(jANI)
-    
+
     true_jaccard_ANI = np.array(
-           [[1., 0.978, 0., 0.],
-           [0.978, 1., 0.96973012, 0.99262776],
-           [0., 0.96973012, 1., 0.97697011],
-           [0., 0.99262776, 0.97697011, 1.]])
+        [
+            [1.0, 0.978, 0.0, 0.0],
+            [0.978, 1.0, 0.96973012, 0.99262776],
+            [0.0, 0.96973012, 1.0, 0.97697011],
+            [0.0, 0.99262776, 0.97697011, 1.0],
+        ]
+    )
 
     np.testing.assert_array_almost_equal(jANI, true_jaccard_ANI, decimal=3)
 
 
 def test_compare_parallel_jaccardANI(scaled_siglist, ignore_abundance):
-    jANI = compare_parallel(scaled_siglist, ignore_abundance, downsample=False, n_jobs=2, return_ani=True)
+    jANI = compare_parallel(
+        scaled_siglist, ignore_abundance, downsample=False, n_jobs=2, return_ani=True
+    )
 
     true_jaccard_ANI = np.array(
-           [[1., 0.978, 0., 0.],
-           [0.978, 1., 0.96973012, 0.99262776],
-           [0., 0.96973012, 1., 0.97697011],
-           [0., 0.99262776, 0.97697011, 1.]])
+        [
+            [1.0, 0.978, 0.0, 0.0],
+            [0.978, 1.0, 0.96973012, 0.99262776],
+            [0.0, 0.96973012, 1.0, 0.97697011],
+            [0.0, 0.99262776, 0.97697011, 1.0],
+        ]
+    )
 
     np.testing.assert_array_almost_equal(jANI, true_jaccard_ANI, decimal=3)
 
 
 def test_compare_all_pairs_jaccardANI(scaled_siglist, ignore_abundance):
-    similarities_parallel = compare_all_pairs(scaled_siglist, ignore_abundance, downsample=False, n_jobs=2, return_ani=True)
-    similarities_serial = compare_serial(scaled_siglist, ignore_abundance, downsample=False, return_ani=True)
+    similarities_parallel = compare_all_pairs(
+        scaled_siglist, ignore_abundance, downsample=False, n_jobs=2, return_ani=True
+    )
+    similarities_serial = compare_serial(
+        scaled_siglist, ignore_abundance, downsample=False, return_ani=True
+    )
     np.testing.assert_array_equal(similarities_parallel, similarities_serial)
 
 
@@ -112,39 +141,56 @@ def test_compare_serial_containmentANI(scaled_siglist):
     print(containment_ANI)
 
     true_containment_ANI = np.array(
-        [[1, 0.966, 0., 0.],
-        [1, 1., 0.97715525, 1.],
-        [0., 0.96377054, 1., 0.97678608],
-        [0., 0.98667513, 0.97715525, 1.]])
+        [
+            [1, 0.966, 0.0, 0.0],
+            [1, 1.0, 0.97715525, 1.0],
+            [0.0, 0.96377054, 1.0, 0.97678608],
+            [0.0, 0.98667513, 0.97715525, 1.0],
+        ]
+    )
 
-    np.testing.assert_array_almost_equal(containment_ANI, true_containment_ANI, decimal=3)
+    np.testing.assert_array_almost_equal(
+        containment_ANI, true_containment_ANI, decimal=3
+    )
 
 
 def test_compare_serial_maxcontainmentANI(scaled_siglist):
-
     # check max_containment ANI
-    max_containment_ANI = compare_serial_max_containment(scaled_siglist, return_ani=True)
+    max_containment_ANI = compare_serial_max_containment(
+        scaled_siglist, return_ani=True
+    )
     print(max_containment_ANI)
 
     true_max_containment_ANI = np.array(
-        [[1., 1., 0., 0.],
-        [1., 1., 0.97715525, 1.],
-        [0., 0.97715525, 1., 0.97715525],
-        [0., 1., 0.97715525, 1.]])
+        [
+            [1.0, 1.0, 0.0, 0.0],
+            [1.0, 1.0, 0.97715525, 1.0],
+            [0.0, 0.97715525, 1.0, 0.97715525],
+            [0.0, 1.0, 0.97715525, 1.0],
+        ]
+    )
 
-    np.testing.assert_array_almost_equal(max_containment_ANI, true_max_containment_ANI, decimal=3)
+    np.testing.assert_array_almost_equal(
+        max_containment_ANI, true_max_containment_ANI, decimal=3
+    )
 
 
 def test_compare_serial_avg_containmentANI(scaled_siglist):
-
     # check avg_containment ANI
-    avg_containment_ANI = compare_serial_avg_containment(scaled_siglist, return_ani=True)
+    avg_containment_ANI = compare_serial_avg_containment(
+        scaled_siglist, return_ani=True
+    )
     print(avg_containment_ANI)
 
     true_avg_containment_ANI = np.array(
-        [[1., 0.983, 0., 0.],
-        [0.983, 1., 0.97046289, 0.99333757],
-        [0., 0.97046289, 1., 0.97697067],
-        [0., 0.99333757, 0.97697067, 1.]])
-
-    np.testing.assert_array_almost_equal(avg_containment_ANI, true_avg_containment_ANI, decimal=3)
+        [
+            [1.0, 0.983, 0.0, 0.0],
+            [0.983, 1.0, 0.97046289, 0.99333757],
+            [0.0, 0.97046289, 1.0, 0.97697067],
+            [0.0, 0.99333757, 0.97697067, 1.0],
+        ]
+    )
+
+    np.testing.assert_array_almost_equal(
+        avg_containment_ANI, true_avg_containment_ANI, decimal=3
+    )
diff --git a/tests/test_deprecated.py b/tests/test_deprecated.py
index fdd9acc53c..34097dd695 100644
--- a/tests/test_deprecated.py
+++ b/tests/test_deprecated.py
@@ -1,13 +1,14 @@
 from sourmash import signature
 import sourmash_tst_utils as utils
 
+
 def test_load_textmode(track_abundance):
     # ijson required a file in binary mode or bytes,
     # but we had an API example in the docs using 'rt'.
     # I fixed the docs, but I'm keeping this test here
     # to make sure we still support it =/
-    sigfile = utils.get_test_data('genome-s10+s11.sig')
-    with open(sigfile, 'rt') as sigfp:
+    sigfile = utils.get_test_data("genome-s10+s11.sig")
+    with open(sigfile) as sigfp:
         siglist = list(signature.load_signatures(sigfp))
     loaded_sig = siglist[0]
-    assert loaded_sig.name == 'genome-s10+s11'
+    assert loaded_sig.name == "genome-s10+s11"
diff --git a/tests/test_distance_utils.py b/tests/test_distance_utils.py
index 22067dcc68..6b44064a9e 100644
--- a/tests/test_distance_utils.py
+++ b/tests/test_distance_utils.py
@@ -3,23 +3,33 @@
 """
 import pytest
 import numpy as np
-from sourmash.distance_utils import (containment_to_distance, get_exp_probability_nothing_common,
-                                    handle_seqlen_nkmers, jaccard_to_distance,
-                                    ANIResult, ciANIResult, jaccardANIResult, var_n_mutated,
-                                    set_size_chernoff, set_size_exact_prob)
+from sourmash.distance_utils import (
+    containment_to_distance,
+    get_exp_probability_nothing_common,
+    handle_seqlen_nkmers,
+    jaccard_to_distance,
+    ANIResult,
+    ciANIResult,
+    jaccardANIResult,
+    var_n_mutated,
+    set_size_chernoff,
+    set_size_exact_prob,
+)
+
 
 def test_aniresult():
     res = ANIResult(0.4, 0.1)
     assert res.dist == 0.4
     assert res.ani == 0.6
     assert res.p_nothing_in_common == 0.1
-    assert res.p_exceeds_threshold ==True
+    assert res.p_exceeds_threshold == True
     # check that they're equivalent
     res2 = ANIResult(0.4, 0.1)
     assert res == res2
     res3 = ANIResult(0.5, 0)
     assert res != res3
-    assert res3.p_exceeds_threshold ==False
+    assert res3.p_exceeds_threshold == False
+
 
 def test_aniresult_bad_distance():
     """
@@ -38,18 +48,18 @@ def test_aniresult_bad_distance():
 def test_jaccard_aniresult():
     res = jaccardANIResult(0.4, 0.1, jaccard_error=0.03)
     assert res.dist == 0.4
-    assert res.ani == None
+    assert res.ani is None
     assert res.p_nothing_in_common == 0.1
     assert res.jaccard_error == 0.03
-    assert res.p_exceeds_threshold ==True
-    assert res.je_exceeds_threshold ==True
+    assert res.p_exceeds_threshold == True
+    assert res.je_exceeds_threshold == True
     res3 = jaccardANIResult(0.4, 0.1, jaccard_error=0.03, je_threshold=0.1)
-    assert res3.je_exceeds_threshold ==False
+    assert res3.je_exceeds_threshold == False
     assert res3.ani == 0.6
 
 
 def test_jaccard_aniresult_nojaccarderror():
-    #jaccard error is None
+    # jaccard error is None
     with pytest.raises(Exception) as exc:
         jaccardANIResult(0.4, 0.1, None)
     print("\n", str(exc.value))
@@ -57,14 +67,14 @@ def test_jaccard_aniresult_nojaccarderror():
 
 
 def test_ci_aniresult():
-    res = ciANIResult(0.4, 0.1, dist_low=0.3,dist_high=0.5)
+    res = ciANIResult(0.4, 0.1, dist_low=0.3, dist_high=0.5)
     print(res)
     assert res.dist == 0.4
     assert res.ani == 0.6
     assert res.p_nothing_in_common == 0.1
     assert res.ani_low == 0.5
     assert res.ani_high == 0.7
-    res2 = ciANIResult(0.4, 0.1, dist_low=0.3,dist_high=0.5)
+    res2 = ciANIResult(0.4, 0.1, dist_low=0.3, dist_high=0.5)
     assert res == res2
     res3 = ciANIResult(0.4, 0.2, dist_low=0.3, dist_high=0.5)
     assert res != res3
@@ -74,12 +84,14 @@ def test_containment_to_distance_zero():
     contain = 0
     scaled = 1
     nkmers = 10000
-    ksize=21
-    res = containment_to_distance(contain,ksize,scaled, n_unique_kmers=nkmers, estimate_ci=True)
+    ksize = 21
+    res = containment_to_distance(
+        contain, ksize, scaled, n_unique_kmers=nkmers, estimate_ci=True
+    )
     print(res)
     # check results
-    exp_dist,exp_low,exp_high,pnc = 1.0,1.0,1.0,1.0
-    exp_id, exp_idlow,exp_idhigh,pnc = 0.0,0.0,0.0,1.0
+    exp_dist, exp_low, exp_high, pnc = 1.0, 1.0, 1.0, 1.0
+    exp_id, exp_idlow, exp_idhigh, pnc = 0.0, 0.0, 0.0, 1.0
     assert res.dist == exp_dist
     assert res.dist_low == exp_low
     assert res.dist_high == exp_high
@@ -88,9 +100,15 @@ def test_containment_to_distance_zero():
     assert res.ani_low == exp_idlow
     assert res.ani_high == exp_idhigh
     # check without returning ci
-    res2 = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers)
+    res2 = containment_to_distance(contain, ksize, scaled, n_unique_kmers=nkmers)
     print(res2)
-    exp_res = ciANIResult(dist=1.0, dist_low=1.0, dist_high=1.0, p_nothing_in_common=1.0, p_threshold=0.001)
+    exp_res = ciANIResult(
+        dist=1.0,
+        dist_low=1.0,
+        dist_high=1.0,
+        p_nothing_in_common=1.0,
+        p_threshold=0.001,
+    )
     assert res2 == exp_res
 
 
@@ -98,11 +116,13 @@ def test_containment_to_distance_one():
     contain = 1
     scaled = 1
     nkmers = 10000
-    ksize=21
-    res = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers,estimate_ci=True)
+    ksize = 21
+    res = containment_to_distance(
+        contain, ksize, scaled, n_unique_kmers=nkmers, estimate_ci=True
+    )
     print(res)
-    exp_dist, exp_low,exp_high,pnc = 0.0,0.0,0.0,0.0
-    exp_id, exp_idlow,exp_idhigh,pnc = 1.0,1.0,1.0,0.0
+    exp_dist, exp_low, exp_high, pnc = 0.0, 0.0, 0.0, 0.0
+    exp_id, exp_idlow, exp_idhigh, pnc = 1.0, 1.0, 1.0, 0.0
     assert res.dist == exp_dist
     assert res.dist_low == exp_low
     assert res.dist_high == exp_high
@@ -112,7 +132,7 @@ def test_containment_to_distance_one():
     assert res.ani_high == exp_idhigh
 
     # check without returning ci
-    res = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers)
+    res = containment_to_distance(contain, ksize, scaled, n_unique_kmers=nkmers)
     assert res.dist == exp_dist
     assert res.ani == exp_id
     assert res.p_nothing_in_common == pnc
@@ -124,8 +144,10 @@ def test_containment_to_distance_scaled1():
     contain = 0.5
     scaled = 1
     nkmers = 10000
-    ksize=21
-    res = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers,estimate_ci=True)
+    ksize = 21
+    res = containment_to_distance(
+        contain, ksize, scaled, n_unique_kmers=nkmers, estimate_ci=True
+    )
     print(res)
     # check results
     assert res.dist == 0.032468221476108394
@@ -136,17 +158,27 @@ def test_containment_to_distance_scaled1():
     assert res.ani_low == 0.9635213980271021
     assert res.p_nothing_in_common == 0.0
     # without returning ci
-    res2 = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers)
-    assert (res2.dist,res2.ani,res2.p_nothing_in_common) == (0.032468221476108394, 0.9675317785238916, 0.0)
-    assert (res2.dist,res2.ani,res2.p_nothing_in_common) == (res.dist, res.ani, res.p_nothing_in_common)
+    res2 = containment_to_distance(contain, ksize, scaled, n_unique_kmers=nkmers)
+    assert (res2.dist, res2.ani, res2.p_nothing_in_common) == (
+        0.032468221476108394,
+        0.9675317785238916,
+        0.0,
+    )
+    assert (res2.dist, res2.ani, res2.p_nothing_in_common) == (
+        res.dist,
+        res.ani,
+        res.p_nothing_in_common,
+    )
 
 
 def test_containment_to_distance_scaled100():
     contain = 0.1
     scaled = 100
     nkmers = 10000
-    ksize=31
-    res = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers,estimate_ci=True)
+    ksize = 31
+    res = containment_to_distance(
+        contain, ksize, scaled, n_unique_kmers=nkmers, estimate_ci=True
+    )
     print(res)
     # check results
     assert res.dist == 0.07158545548052564
@@ -160,8 +192,10 @@ def test_containment_to_distance_scaled100_2():
     contain = 0.5
     scaled = 100
     nkmers = 10000
-    ksize=21
-    res= containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers,estimate_ci=True)
+    ksize = 21
+    res = containment_to_distance(
+        contain, ksize, scaled, n_unique_kmers=nkmers, estimate_ci=True
+    )
     print(res)
     # check results
     assert res.dist == 0.032468221476108394
@@ -174,8 +208,10 @@ def test_containment_to_distance_k10():
     contain = 0.5
     scaled = 100
     nkmers = 10000
-    ksize=10
-    res = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers,estimate_ci=True)
+    ksize = 10
+    res = containment_to_distance(
+        contain, ksize, scaled, n_unique_kmers=nkmers, estimate_ci=True
+    )
     print(res)
     # check results
     assert res.dist == 0.06696700846319259
@@ -188,17 +224,31 @@ def test_containment_to_distance_confidence():
     contain = 0.1
     scaled = 100
     nkmers = 10000
-    ksize=31
-    confidence=0.99
-    res = containment_to_distance(contain,ksize,scaled,confidence=confidence,n_unique_kmers=nkmers, estimate_ci=True)
+    ksize = 31
+    confidence = 0.99
+    res = containment_to_distance(
+        contain,
+        ksize,
+        scaled,
+        confidence=confidence,
+        n_unique_kmers=nkmers,
+        estimate_ci=True,
+    )
     print(res)
     # check results
     assert res.dist == 0.07158545548052564
     assert res.dist_low == 0.04802880300938562
     assert res.dist_high == 0.09619930040790341
     assert res.p_exceeds_threshold == False
-    confidence=0.90
-    res2 = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers,confidence=confidence, estimate_ci=True)
+    confidence = 0.90
+    res2 = containment_to_distance(
+        contain,
+        ksize,
+        scaled,
+        n_unique_kmers=nkmers,
+        confidence=confidence,
+        estimate_ci=True,
+    )
     print(res2)
     # check results
     assert res2.dist == res.dist
@@ -211,16 +261,30 @@ def test_nkmers_to_bp_containment():
     containment = 0.1
     scaled = 100
     bp_len = 10030
-    ksize=31
-    nkmers = handle_seqlen_nkmers(ksize, sequence_len_bp= bp_len)
+    ksize = 31
+    nkmers = handle_seqlen_nkmers(ksize, sequence_len_bp=bp_len)
     print("nkmers_from_bp:", nkmers)
-    confidence=0.99
-    kmer_res = containment_to_distance(containment,ksize,scaled,confidence=confidence,n_unique_kmers=nkmers,estimate_ci=True)
-    bp_res = containment_to_distance(containment,ksize,scaled,confidence=confidence,sequence_len_bp=bp_len,estimate_ci=True)
+    confidence = 0.99
+    kmer_res = containment_to_distance(
+        containment,
+        ksize,
+        scaled,
+        confidence=confidence,
+        n_unique_kmers=nkmers,
+        estimate_ci=True,
+    )
+    bp_res = containment_to_distance(
+        containment,
+        ksize,
+        scaled,
+        confidence=confidence,
+        sequence_len_bp=bp_len,
+        estimate_ci=True,
+    )
     print(f"\nkDIST: {kmer_res}")
     print(f"\nbpDIST:,{bp_res}")
     # check results
-    assert kmer_res==bp_res
+    assert kmer_res == bp_res
     assert kmer_res.dist == 0.07158545548052564
     assert kmer_res.dist_low == 0.04802880300938562
     assert kmer_res.dist_high == 0.09619930040790341
@@ -230,8 +294,8 @@ def test_jaccard_to_distance_zero():
     jaccard = 0
     scaled = 1
     nkmers = 10000
-    ksize=21
-    res= jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers)
+    ksize = 21
+    res = jaccard_to_distance(jaccard, ksize, scaled, n_unique_kmers=nkmers)
     print(res)
     # check results
     assert res.dist == 1.0
@@ -244,8 +308,8 @@ def test_jaccard_to_distance_one():
     jaccard = 1
     scaled = 1
     nkmers = 10000
-    ksize=21
-    res= jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers)
+    ksize = 21
+    res = jaccard_to_distance(jaccard, ksize, scaled, n_unique_kmers=nkmers)
     print(res)
     # check results
     assert res.dist == 0.0
@@ -259,36 +323,38 @@ def test_jaccard_to_distance_scaled():
     jaccard = 0.5
     scaled = 1
     nkmers = 10000
-    ksize=21
-    res = jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers)
+    ksize = 21
+    res = jaccard_to_distance(jaccard, ksize, scaled, n_unique_kmers=nkmers)
     print(res)
     # check results
     assert round(res.dist, 3) == round(0.019122659390482077, 3)
-    assert res.ani == None
+    assert res.ani is None
     assert res.p_exceeds_threshold == False
     assert res.jaccard_error == 0.00018351337045518042
-    assert res.je_exceeds_threshold ==True
+    assert res.je_exceeds_threshold == True
     scaled = 100
-    res2 = jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers)
+    res2 = jaccard_to_distance(jaccard, ksize, scaled, n_unique_kmers=nkmers)
     print(res2)
     assert res2.dist == res.dist
     assert res2.jaccard_error == res.jaccard_error
     assert res2.p_nothing_in_common != res.p_nothing_in_common
-    assert res2.p_exceeds_threshold ==False
+    assert res2.p_exceeds_threshold == False
 
 
 def test_jaccard_to_distance_k31():
     jaccard = 0.5
     scaled = 100
     nkmers = 10000
-    ksize=31
-    res = jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers)
+    ksize = 31
+    res = jaccard_to_distance(jaccard, ksize, scaled, n_unique_kmers=nkmers)
     print(res)
     # check results
-    assert res.je_exceeds_threshold ==True
-    assert res.ani == None
+    assert res.je_exceeds_threshold == True
+    assert res.ani is None
     assert res.p_exceeds_threshold == False
-    res2 = jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers, err_threshold=0.1)
+    res2 = jaccard_to_distance(
+        jaccard, ksize, scaled, n_unique_kmers=nkmers, err_threshold=0.1
+    )
     assert res2.je_exceeds_threshold == False
     assert res2.ani == 0.9870056455892898
 
@@ -297,8 +363,8 @@ def test_jaccard_to_distance_k31_2():
     jaccard = 0.1
     scaled = 100
     nkmers = 10000
-    ksize=31
-    res = jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers)
+    ksize = 31
+    res = jaccard_to_distance(jaccard, ksize, scaled, n_unique_kmers=nkmers)
     print(res)
     # check results
     assert res.ani == 0.9464928391768298
@@ -310,11 +376,11 @@ def test_nkmers_to_bp_jaccard():
     jaccard = 0.1
     scaled = 100
     bp_len = 10030
-    ksize=31
-    nkmers = handle_seqlen_nkmers(ksize, sequence_len_bp= bp_len)
+    ksize = 31
+    nkmers = handle_seqlen_nkmers(ksize, sequence_len_bp=bp_len)
     print("nkmers_from_bp:", nkmers)
-    kmer_res = jaccard_to_distance(jaccard,ksize,scaled,n_unique_kmers=nkmers)
-    bp_res = jaccard_to_distance(jaccard,ksize,scaled,sequence_len_bp=bp_len)
+    kmer_res = jaccard_to_distance(jaccard, ksize, scaled, n_unique_kmers=nkmers)
+    bp_res = jaccard_to_distance(jaccard, ksize, scaled, sequence_len_bp=bp_len)
     print(f"\nkmer_res: {kmer_res}")
     print(f"\nbp_res: {bp_res}")
     # check results
@@ -329,12 +395,16 @@ def test_exp_prob_nothing_common():
     ksize = 31
     scaled = 10
     bp_len = 1000030
-    nkmers = handle_seqlen_nkmers(ksize, sequence_len_bp= bp_len)
+    nkmers = handle_seqlen_nkmers(ksize, sequence_len_bp=bp_len)
     print("nkmers_from_bp:", nkmers)
 
-    nkmers_pnc = get_exp_probability_nothing_common(dist,ksize,scaled,n_unique_kmers=nkmers)
+    nkmers_pnc = get_exp_probability_nothing_common(
+        dist, ksize, scaled, n_unique_kmers=nkmers
+    )
     print(f"prob nothing in common: {nkmers_pnc}")
-    bp_pnc = get_exp_probability_nothing_common(dist,ksize,scaled,sequence_len_bp=bp_len)
+    bp_pnc = get_exp_probability_nothing_common(
+        dist, ksize, scaled, sequence_len_bp=bp_len
+    )
     assert nkmers_pnc == bp_pnc == 7.437016945722123e-07
 
 
@@ -347,15 +417,17 @@ def test_containment_to_distance_tinytestdata_var0():
     contain = 0.9
     scaled = 1
     nkmers = 4
-    ksize=31
-    res = containment_to_distance(contain,ksize,scaled,n_unique_kmers=nkmers, estimate_ci=True)
+    ksize = 31
+    res = containment_to_distance(
+        contain, ksize, scaled, n_unique_kmers=nkmers, estimate_ci=True
+    )
     print(res)
     # check results
     assert res.dist == 0.003392957179023992
-    assert res.dist_low == None
-    assert res.dist_high == None
-    assert res.ani_low == None
-    assert res.ani_high == None
+    assert res.dist_low is None
+    assert res.dist_high is None
+    assert res.ani_low is None
+    assert res.ani_high is None
     assert res.p_exceeds_threshold == False
 
 
@@ -364,7 +436,7 @@ def test_var_n_mutated():
     r = 0
     ksize = 31
     nkmers = 200
-    var_n_mut = var_n_mutated(nkmers,ksize,r)
+    var_n_mut = var_n_mutated(nkmers, ksize, r)
     print(f"var_n_mutated: {var_n_mut}")
     assert var_n_mut == 0
     # check var 0.0 valuerror
@@ -372,51 +444,71 @@ def test_var_n_mutated():
     ksize = 31
     nkmers = 200
     with pytest.raises(ValueError) as exc:
-        var_n_mut = var_n_mutated(nkmers,ksize,r)
+        var_n_mut = var_n_mutated(nkmers, ksize, r)
     assert "Error: varN <0.0!" in str(exc)
     # check successful
     r = 0.4
     ksize = 31
     nkmers = 200000
-    var_n_mut = var_n_mutated(nkmers,ksize,r)
+    var_n_mut = var_n_mutated(nkmers, ksize, r)
     print(f"var_n_mutated: {var_n_mut}")
     assert var_n_mut == 0.10611425440741508
 
 
 def test_handle_seqlen_nkmers():
     bp_len = 10030
-    ksize=31
+    ksize = 31
     # convert seqlen to nkmers
-    nkmers = handle_seqlen_nkmers(ksize, sequence_len_bp= bp_len)
+    nkmers = handle_seqlen_nkmers(ksize, sequence_len_bp=bp_len)
     assert nkmers == 10000
     # if nkmers is provided, just use that
-    nkmers = handle_seqlen_nkmers(ksize, sequence_len_bp= bp_len, n_unique_kmers= bp_len)
+    nkmers = handle_seqlen_nkmers(ksize, sequence_len_bp=bp_len, n_unique_kmers=bp_len)
     assert nkmers == 10030
     # if neither seqlen or nkmers provided, complain
     with pytest.raises(ValueError) as exc:
         nkmers = handle_seqlen_nkmers(ksize)
-    assert("Error: distance estimation requires input of either 'sequence_len_bp' or 'n_unique_kmers'") in str(exc)
+    assert (
+        "Error: distance estimation requires input of either 'sequence_len_bp' or 'n_unique_kmers'"
+    ) in str(exc)
 
 
 def test_set_size_chernoff():
-    eps = 10**(-6)
+    eps = 10 ** (-6)
     rel_error = 0.01
     set_size = 1000000
-    s = 1/0.1  # I'm used to using a scale value between 0 and 1
+    s = 1 / 0.1  # I'm used to using a scale value between 0 and 1
     value_from_mathematica = 0.928652
-    assert np.abs(set_size_chernoff(set_size, s, relative_error=rel_error) - value_from_mathematica) < eps
+    assert (
+        np.abs(
+            set_size_chernoff(set_size, s, relative_error=rel_error)
+            - value_from_mathematica
+        )
+        < eps
+    )
 
     rel_error = 0.05
     set_size = 10000
     s = 1
     value_from_mathematica = 0.999519
-    assert np.abs(set_size_chernoff(set_size, s, relative_error=rel_error) - value_from_mathematica) < eps
+    assert (
+        np.abs(
+            set_size_chernoff(set_size, s, relative_error=rel_error)
+            - value_from_mathematica
+        )
+        < eps
+    )
 
     rel_error = 0.001
     set_size = 10
-    s = 1/.01
+    s = 1 / 0.01
     value_from_mathematica = -1
-    assert np.abs(set_size_chernoff(set_size, s, relative_error=rel_error) - value_from_mathematica) < eps
+    assert (
+        np.abs(
+            set_size_chernoff(set_size, s, relative_error=rel_error)
+            - value_from_mathematica
+        )
+        < eps
+    )
 
 
 def test_set_size_exact_prob():
diff --git a/tests/test_hll.py b/tests/test_hll.py
index da8d3aad68..d49336bf7a 100644
--- a/tests/test_hll.py
+++ b/tests/test_hll.py
@@ -11,7 +11,7 @@
 K = 21  # size of kmer
 ERR_RATE = 0.01
 N_UNIQUE = 3356
-TRANSLATE = {'A': 'T', 'C': 'G', 'T': 'A', 'G': 'C'}
+TRANSLATE = {"A": "T", "C": "G", "T": "A", "G": "C"}
 
 
 def test_hll_add_python():
@@ -19,16 +19,16 @@ def test_hll_add_python():
     # use the lower level add() method, which accepts anything,
     # and compare to an exact count using collections.Counter
 
-    filename = utils.get_test_data('ecoli.genes.fna')
+    filename = utils.get_test_data("ecoli.genes.fna")
     hll = HLL(ERR_RATE, K)
     counter = set()
 
     with open(filename) as f:
         for n, record in enumerate(fasta_iter(f)):
-            sequence = record['sequence']
+            sequence = record["sequence"]
             seq_len = len(sequence)
             for n in range(0, seq_len + 1 - K):
-                kmer = sequence[n:n + K]
+                kmer = sequence[n : n + K]
                 rc = "".join(TRANSLATE[c] for c in kmer[::-1])
 
                 hll.add(kmer)
@@ -47,12 +47,12 @@ def test_hll_consume_string():
     # test rust code to count unique kmers using HyperLogLog,
     # using screed to feed each read to the counter.
 
-    filename = utils.get_test_data('ecoli.genes.fna')
+    filename = utils.get_test_data("ecoli.genes.fna")
     hll = HLL(ERR_RATE, K)
-    n_consumed = n = 0
+    n = 0
     with open(filename) as f:
         for n, record in enumerate(fasta_iter(f), 1):
-            hll.add_sequence(record['sequence'])
+            hll.add_sequence(record["sequence"])
 
     assert abs(1 - float(len(hll)) / N_UNIQUE) < ERR_RATE
 
@@ -60,10 +60,9 @@ def test_hll_consume_string():
 def test_hll_similarity_containment():
     N_UNIQUE_H1 = 500741
     N_UNIQUE_H2 = 995845
-    N_UNIQUE_U = 995845
 
     SIMILARITY = 0.502783
-    CONTAINMENT_H1 = 1.
+    CONTAINMENT_H1 = 1.0
     CONTAINMENT_H2 = 0.502783
 
     INTERSECTION = 500838
@@ -72,23 +71,23 @@ def test_hll_similarity_containment():
     hll2 = HLL(ERR_RATE, K)
     hllu = HLL(ERR_RATE, K)
 
-    filename = utils.get_test_data('genome-s10.fa.gz')
+    filename = utils.get_test_data("genome-s10.fa.gz")
     with gzip.GzipFile(filename) as f:
         for n, record in enumerate(fasta_iter(f)):
-            sequence = record['sequence']
+            sequence = record["sequence"]
             seq_len = len(sequence)
             for n in range(0, seq_len + 1 - K):
-                kmer = sequence[n:n + K]
+                kmer = sequence[n : n + K]
                 hll1.add(kmer)
                 hllu.add(kmer)
 
-    filename = utils.get_test_data('genome-s10+s11.fa.gz')
+    filename = utils.get_test_data("genome-s10+s11.fa.gz")
     with gzip.GzipFile(filename) as f:
         for n, record in enumerate(fasta_iter(f)):
-            sequence = record['sequence']
+            sequence = record["sequence"]
             seq_len = len(sequence)
             for n in range(0, seq_len + 1 - K):
-                kmer = sequence[n:n + K]
+                kmer = sequence[n : n + K]
                 hll2.add(kmer)
                 hllu.add(kmer)
 
@@ -113,13 +112,14 @@ def test_hll_similarity_containment():
     assert abs(1 - float(hll1.intersection(hllu)) / N_UNIQUE_U) < ERR_RATE
     """
 
+
 def test_hll_save_load():
-    filename = utils.get_test_data('ecoli.genes.fna')
+    filename = utils.get_test_data("ecoli.genes.fna")
     hll = HLL(ERR_RATE, K)
-    n_consumed = n = 0
+    n = 0
     with open(filename) as f:
         for n, record in enumerate(fasta_iter(f), 1):
-            hll.add_sequence(record['sequence'])
+            hll.add_sequence(record["sequence"])
 
     assert abs(1 - float(len(hll)) / N_UNIQUE) < ERR_RATE
 
diff --git a/tests/test_index.py b/tests/test_index.py
index af0c1da890..b207376443 100644
--- a/tests/test_index.py
+++ b/tests/test_index.py
@@ -9,10 +9,15 @@
 
 import sourmash
 from sourmash import load_one_signature, SourmashSignature
-from sourmash.index import (LinearIndex, ZipFileLinearIndex,
-                            make_jaccard_search_query, CounterGather,
-                            LazyLinearIndex, MultiIndex,
-                            StandaloneManifestIndex)
+from sourmash.index import (
+    LinearIndex,
+    ZipFileLinearIndex,
+    make_jaccard_search_query,
+    CounterGather,
+    LazyLinearIndex,
+    MultiIndex,
+    StandaloneManifestIndex,
+)
 from sourmash.index.revindex import RevIndex
 from sourmash.sbt import SBT, GraphFactory
 from sourmash import sourmash_args
@@ -90,7 +95,7 @@ def test_simple_index(n_children):
 
 def test_linear_index_prefetch_empty():
     # check that an exception is raised upon for an empty LinearIndex
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
 
     lidx = LinearIndex()
@@ -111,8 +116,8 @@ class FakeSignature:
         def minhash(self):
             raise Exception("don't touch me!")
 
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     ss47 = sourmash.load_one_signature(sig47)
     ss63 = sourmash.load_one_signature(sig63)
@@ -142,8 +147,8 @@ def minhash(self):
 
 def test_linear_index_search_subj_has_abundance():
     # check that search signatures in the index are flattened appropriately.
-    queryfile = utils.get_test_data('47.fa.sig')
-    subjfile = utils.get_test_data('track_abund/47.fa.sig')
+    queryfile = utils.get_test_data("47.fa.sig")
+    subjfile = utils.get_test_data("track_abund/47.fa.sig")
 
     qs = sourmash.load_one_signature(queryfile)
     ss = sourmash.load_one_signature(subjfile)
@@ -159,8 +164,8 @@ def test_linear_index_search_subj_has_abundance():
 
 def test_linear_index_gather_subj_has_abundance():
     # check that target signatures in the index are flattened appropriately.
-    queryfile = utils.get_test_data('47.fa.sig')
-    subjfile = utils.get_test_data('track_abund/47.fa.sig')
+    queryfile = utils.get_test_data("47.fa.sig")
+    subjfile = utils.get_test_data("track_abund/47.fa.sig")
 
     qs = sourmash.load_one_signature(queryfile)
     ss = sourmash.load_one_signature(subjfile)
@@ -178,7 +183,9 @@ def test_linear_index_gather_subj_has_abundance():
 def test_index_search_subj_scaled_is_lower():
     # check that subject sketches are appropriately downsampled for scaled
     # sketches.
-    sigfile = utils.get_test_data('scaled100/GCF_000005845.2_ASM584v2_genomic.fna.gz.sig.gz')
+    sigfile = utils.get_test_data(
+        "scaled100/GCF_000005845.2_ASM584v2_genomic.fna.gz.sig.gz"
+    )
     ss = sourmash.load_one_signature(sigfile)
 
     # double check :)
@@ -201,7 +208,7 @@ def test_index_search_subj_scaled_is_lower():
 def test_index_search_subj_num_is_lower():
     # check that subject sketches are appropriately downsampled for num
     # sketches
-    sigfile = utils.get_test_data('num/47.fa.sig')
+    sigfile = utils.get_test_data("num/47.fa.sig")
     ss = sourmash.load_one_signature(sigfile, ksize=31)
 
     # double check :)
@@ -223,7 +230,7 @@ def test_index_search_subj_num_is_lower():
 
 def test_index_search_query_num_is_lower():
     # check that query sketches are appropriately downsampled for num.
-    sigfile = utils.get_test_data('num/47.fa.sig')
+    sigfile = utils.get_test_data("num/47.fa.sig")
     qs = sourmash.load_one_signature(sigfile, ksize=31)
 
     # double check :)
@@ -244,8 +251,8 @@ def test_index_search_query_num_is_lower():
 
 def test_linear_index_search_abund():
     # test Index.search_abund
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    sig63 = utils.get_test_data('track_abund/63.fa.sig')
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    sig63 = utils.get_test_data("track_abund/63.fa.sig")
 
     ss47 = sourmash.load_one_signature(sig47)
     ss63 = sourmash.load_one_signature(sig63)
@@ -262,8 +269,8 @@ def test_linear_index_search_abund():
 
 def test_linear_index_search_abund_downsample_query():
     # test Index.search_abund with query with higher scaled
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    sig63 = utils.get_test_data('track_abund/63.fa.sig')
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    sig63 = utils.get_test_data("track_abund/63.fa.sig")
 
     ss47 = sourmash.load_one_signature(sig47)
     ss63 = sourmash.load_one_signature(sig63)
@@ -285,8 +292,8 @@ def test_linear_index_search_abund_downsample_query():
 
 def test_linear_index_search_abund_downsample_subj():
     # test Index.search_abund with subj with higher scaled
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    sig63 = utils.get_test_data('track_abund/63.fa.sig')
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    sig63 = utils.get_test_data("track_abund/63.fa.sig")
 
     ss47 = sourmash.load_one_signature(sig47)
     ss63 = sourmash.load_one_signature(sig63)
@@ -308,8 +315,8 @@ def test_linear_index_search_abund_downsample_subj():
 
 def test_linear_index_search_abund_requires_threshold():
     # test that Index.search_abund requires a 'threshold'
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    sig63 = utils.get_test_data('track_abund/63.fa.sig')
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    sig63 = utils.get_test_data("track_abund/63.fa.sig")
 
     ss47 = sourmash.load_one_signature(sig47)
     ss63 = sourmash.load_one_signature(sig63)
@@ -319,15 +326,15 @@ def test_linear_index_search_abund_requires_threshold():
     lidx.insert(ss63)
 
     with pytest.raises(TypeError) as exc:
-        results = list(lidx.search_abund(ss47, threshold=None))
+        list(lidx.search_abund(ss47, threshold=None))
 
     assert "'search_abund' requires 'threshold'" in str(exc.value)
 
 
 def test_linear_index_search_abund_query_flat():
     # test that Index.search_abund requires an abund query sig
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('track_abund/63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("track_abund/63.fa.sig")
 
     ss47 = sourmash.load_one_signature(sig47, ksize=31)
     ss63 = sourmash.load_one_signature(sig63)
@@ -337,15 +344,17 @@ def test_linear_index_search_abund_query_flat():
     lidx.insert(ss63)
 
     with pytest.raises(TypeError) as exc:
-        results = list(lidx.search_abund(ss47, threshold=0))
+        list(lidx.search_abund(ss47, threshold=0))
 
-    assert "'search_abund' requires query signature with abundance information" in str(exc.value)
+    assert "'search_abund' requires query signature with abundance information" in str(
+        exc.value
+    )
 
 
 def test_linear_index_search_abund_subj_flat():
     # test Index.search_abund requires an abund subj
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     ss47 = sourmash.load_one_signature(sig47)
     ss63 = sourmash.load_one_signature(sig63)
@@ -355,16 +364,19 @@ def test_linear_index_search_abund_subj_flat():
     lidx.insert(ss63)
 
     with pytest.raises(TypeError) as exc:
-        results = list(lidx.search_abund(ss47, threshold=0))
+        list(lidx.search_abund(ss47, threshold=0))
 
-    assert "'search_abund' requires subject signatures with abundance information" in str(exc.value)
+    assert (
+        "'search_abund' requires subject signatures with abundance information"
+        in str(exc.value)
+    )
 
 
 def test_linear_index_save(runtmp):
     # test save output from LinearIndex => JSON
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
     ss47 = sourmash.load_one_signature(sig47)
@@ -375,7 +387,7 @@ def test_linear_index_save(runtmp):
     linear.insert(ss47)
     linear.insert(ss63)
 
-    filename = runtmp.output('foo')
+    filename = runtmp.output("foo")
     linear.save(filename)
 
     si = set(sourmash.load_file_as_signatures(filename))
@@ -385,24 +397,24 @@ def test_linear_index_save(runtmp):
     print(len(si))
     print(len(x))
 
-    print('si: ', si)
-    print('x: ', x)
+    print("si: ", si)
+    print("x: ", x)
 
     assert si == x, si
 
 
 def test_linear_index_load(runtmp):
     # test .load class method of LinearIndex
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
     ss47 = sourmash.load_one_signature(sig47)
     ss63 = sourmash.load_one_signature(sig63)
 
-    filename = runtmp.output('foo')
-    with open(filename, 'wt') as fp:
+    filename = runtmp.output("foo")
+    with open(filename, "w") as fp:
         sourmash.save_signatures([ss2, ss47, ss63], fp)
 
     linear = LinearIndex.load(filename)
@@ -414,9 +426,9 @@ def test_linear_index_load(runtmp):
 
 def test_linear_index_save_load(runtmp):
     # LinearIndex save/load round trip
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
     ss47 = sourmash.load_one_signature(sig47)
@@ -427,7 +439,7 @@ def test_linear_index_save_load(runtmp):
     linear.insert(ss47)
     linear.insert(ss63)
 
-    filename = runtmp.output('foo')
+    filename = runtmp.output("foo")
     linear.save(filename)
     linear2 = LinearIndex.load(filename)
 
@@ -440,9 +452,9 @@ def test_linear_index_save_load(runtmp):
 
 def test_linear_gather_threshold_1():
     # test gather() method, in some detail
-    sig2 = load_one_signature(utils.get_test_data('2.fa.sig'), ksize=31)
-    sig47 = load_one_signature(utils.get_test_data('47.fa.sig'), ksize=31)
-    sig63 = load_one_signature(utils.get_test_data('63.fa.sig'), ksize=31)
+    sig2 = load_one_signature(utils.get_test_data("2.fa.sig"), ksize=31)
+    sig47 = load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
+    sig63 = load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31)
 
     linear = LinearIndex()
 
@@ -498,11 +510,11 @@ def test_linear_gather_threshold_1():
 
 def test_linear_gather_threshold_5():
     # test gather() method above threshold
-    sig2 = load_one_signature(utils.get_test_data('2.fa.sig'), ksize=31)
-    sig47 = load_one_signature(utils.get_test_data('47.fa.sig'), ksize=31)
-    sig63 = load_one_signature(utils.get_test_data('63.fa.sig'), ksize=31)
+    sig2 = load_one_signature(utils.get_test_data("2.fa.sig"), ksize=31)
+    sig47 = load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
+    sig63 = load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31)
 
-    linear = LinearIndex(filename='foo')
+    linear = LinearIndex(filename="foo")
 
     linear.insert(sig47)
     linear.insert(sig63)
@@ -528,21 +540,20 @@ def test_linear_gather_threshold_5():
     containment, match_sig, name = result
     assert containment == 1.0
     assert match_sig == sig2
-    assert name == 'foo'
+    assert name == "foo"
 
     # now, check with a threshold_bp that should be meet-able.
-    result = linear.best_containment(SourmashSignature(new_mh),
-                                     threshold_bp=5000)
+    result = linear.best_containment(SourmashSignature(new_mh), threshold_bp=5000)
     assert result
     containment, match_sig, name = result
     assert containment == 1.0
     assert match_sig == sig2
-    assert name == 'foo'
+    assert name == "foo"
 
 
 def test_linear_index_multik_select():
     # test that LinearIndx can load multiple (three) ksizes, 21/31/51
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     siglist = sourmash.load_file_as_signatures(sig2)
 
     linear = LinearIndex()
@@ -550,17 +561,17 @@ def test_linear_index_multik_select():
         linear.insert(ss)
 
     # select most specifically
-    linear2 = linear.select(ksize=31, moltype='DNA')
+    linear2 = linear.select(ksize=31, moltype="DNA")
     assert len(linear2) == 1
 
     # all are DNA:
-    linear2 = linear.select(moltype='DNA')
+    linear2 = linear.select(moltype="DNA")
     assert len(linear2) == 3
 
 
 def test_linear_index_moltype_select():
     # this loads two ksizes(21, 10), and two moltypes (DNA and protein)
-    filename = utils.get_test_data('genome-s10+s11.sig')
+    filename = utils.get_test_data("genome-s10+s11.sig")
     siglist = sourmash.load_file_as_signatures(filename)
 
     linear = LinearIndex()
@@ -568,19 +579,19 @@ def test_linear_index_moltype_select():
         linear.insert(ss)
 
     # select most specific DNA
-    linear2 = linear.select(ksize=30, moltype='DNA')
+    linear2 = linear.select(ksize=30, moltype="DNA")
     assert len(linear2) == 1
 
     # select most specific protein
-    linear2 = linear.select(ksize=10, moltype='protein')
+    linear2 = linear.select(ksize=10, moltype="protein")
     assert len(linear2) == 1
 
     # can leave off ksize, selects all ksizes
-    linear2 = linear.select(moltype='DNA')
+    linear2 = linear.select(moltype="DNA")
     assert len(linear2) == 2
 
     # can leave off ksize, selects all ksizes
-    linear2 = linear.select(moltype='protein')
+    linear2 = linear.select(moltype="protein")
     assert len(linear2) == 2
 
     # select something impossible
@@ -592,7 +603,7 @@ def test_linear_index_picklist_select():
     # test LinearIndex.select with a picklist
 
     # this loads three ksizes, 21/31/51
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     siglist = sourmash.load_file_as_signatures(sig2)
 
     linear = LinearIndex()
@@ -600,22 +611,22 @@ def test_linear_index_picklist_select():
         linear.insert(ss)
 
     # construct a picklist...
-    picklist = SignaturePicklist('md5prefix8')
-    picklist.init(['f3a90d4e'])
+    picklist = SignaturePicklist("md5prefix8")
+    picklist.init(["f3a90d4e"])
 
     # select on picklist
     linear2 = linear.select(picklist=picklist)
     assert len(linear2) == 1
     ss = list(linear2.signatures())[0]
     assert ss.minhash.ksize == 31
-    assert ss.md5sum().startswith('f3a90d4e55')
+    assert ss.md5sum().startswith("f3a90d4e55")
 
 
 def test_linear_index_picklist_select_exclude():
     # test select with a picklist, but exclude
 
     # this loads three ksizes, 21/31/51
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     siglist = sourmash.load_file_as_signatures(sig2)
 
     linear = LinearIndex()
@@ -623,8 +634,8 @@ def test_linear_index_picklist_select_exclude():
         linear.insert(ss)
 
     # construct a picklist...
-    picklist = SignaturePicklist('md5prefix8', pickstyle=PickStyle.EXCLUDE)
-    picklist.init(['f3a90d4e'])
+    picklist = SignaturePicklist("md5prefix8", pickstyle=PickStyle.EXCLUDE)
+    picklist.init(["f3a90d4e"])
 
     # select on picklist
     linear2 = linear.select(picklist=picklist)
@@ -634,37 +645,39 @@ def test_linear_index_picklist_select_exclude():
     for ss in list(linear2.signatures()):
         md5s.add(ss.md5sum())
         ksizes.add(ss.minhash.ksize)
-    assert md5s == set(['f372e47893edd349e5956f8b0d8dcbf7','43f3b48e59443092850964d355a20ac0'])
-    assert ksizes == set([21,51])
+    assert md5s == set(
+        ["f372e47893edd349e5956f8b0d8dcbf7", "43f3b48e59443092850964d355a20ac0"]
+    )
+    assert ksizes == set([21, 51])
 
 
 def test_index_same_md5sum_fsstorage(runtmp):
     # check SBT directory 'save' with two signatures that have identical md5
     c = runtmp
-    testdata1 = utils.get_test_data('img/2706795855.sig')
-    testdata2 = utils.get_test_data('img/638277004.sig')
+    testdata1 = utils.get_test_data("img/2706795855.sig")
+    testdata2 = utils.get_test_data("img/638277004.sig")
 
-    c.run_sourmash('index', '-k', '21', 'zzz.sbt.json', testdata1, testdata2)
+    c.run_sourmash("index", "-k", "21", "zzz.sbt.json", testdata1, testdata2)
     assert c.last_result.status == 0
 
-    outfile = c.output('zzz.sbt.json')
+    outfile = c.output("zzz.sbt.json")
     assert os.path.exists(outfile)
-    storage = c.output('.sbt.zzz')
+    storage = c.output(".sbt.zzz")
     assert len(glob.glob(storage + "/*")) == 4
 
 
 def test_index_same_md5sum_sbt_zipstorage(runtmp):
     # check SBT zipfile 'save' with two signatures w/identical md5
     c = runtmp
-    testdata1 = utils.get_test_data('img/2706795855.sig')
-    testdata2 = utils.get_test_data('img/638277004.sig')
+    testdata1 = utils.get_test_data("img/2706795855.sig")
+    testdata2 = utils.get_test_data("img/638277004.sig")
 
-    c.run_sourmash('index', '-k', '21', 'zzz.sbt.zip', testdata1, testdata2)
+    c.run_sourmash("index", "-k", "21", "zzz.sbt.zip", testdata1, testdata2)
     assert c.last_result.status == 0
 
-    outfile = c.output('zzz.sbt.zip')
+    outfile = c.output("zzz.sbt.zip")
     assert os.path.exists(outfile)
-    zout = zipfile.ZipFile(outfile, mode='r')
+    zout = zipfile.ZipFile(outfile, mode="r")
     # should have 3 files, 1 internal and two sigs. We check for 4 because the
     # directory also shows in namelist()
     assert len([f for f in zout.namelist() if f.startswith(".sbt.zzz/")]) == 5
@@ -672,11 +685,11 @@ def test_index_same_md5sum_sbt_zipstorage(runtmp):
 
 def test_zipfile_does_not_exist(runtmp):
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.sourmash('sig', 'describe', 'no-exist.zip')
+        runtmp.sourmash("sig", "describe", "no-exist.zip")
 
     # old behavior, pre PR #1777
-    assert 'FileNotFoundError: SOURMASH-MANIFEST.csv' not in str(exc)
-    assert not os.path.exists(runtmp.output('no-exist.zip'))
+    assert "FileNotFoundError: SOURMASH-MANIFEST.csv" not in str(exc)
+    assert not os.path.exists(runtmp.output("no-exist.zip"))
 
     # correct behavior
     assert "ERROR: Error while reading signatures from 'no-exist.zip'." in str(exc)
@@ -686,90 +699,102 @@ def test_zipfile_protein_command_search(runtmp):
     # test command-line search/gather of zipfile with protein sigs
     c = runtmp
 
-    sigfile1 = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    db_out = utils.get_test_data('prot/protein.zip')
+    sigfile1 = utils.get_test_data(
+        "prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    db_out = utils.get_test_data("prot/protein.zip")
 
-    c.run_sourmash('search', sigfile1, db_out, '--threshold', '0.0')
-    assert '2 matches' in c.last_result.out
+    c.run_sourmash("search", sigfile1, db_out, "--threshold", "0.0")
+    assert "2 matches" in c.last_result.out
 
-    c.run_sourmash('gather', sigfile1, db_out)
-    assert 'found 1 matches total' in c.last_result.out
-    assert 'the recovered matches hit 100.0% of the query' in c.last_result.out
+    c.run_sourmash("gather", sigfile1, db_out)
+    assert "found 1 matches total" in c.last_result.out
+    assert "the recovered matches hit 100.0% of the query" in c.last_result.out
 
 
 def test_zipfile_hp_command_search(runtmp):
     # test command-line search/gather of zipfile with hp sigs
     c = runtmp
 
-    sigfile1 = utils.get_test_data('prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    db_out = utils.get_test_data('prot/hp.zip')
+    sigfile1 = utils.get_test_data(
+        "prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    db_out = utils.get_test_data("prot/hp.zip")
 
-    c.run_sourmash('search', sigfile1, db_out, '--threshold', '0.0')
-    assert '2 matches' in c.last_result.out
+    c.run_sourmash("search", sigfile1, db_out, "--threshold", "0.0")
+    assert "2 matches" in c.last_result.out
 
-    c.run_sourmash('gather', sigfile1, db_out, '--threshold', '0.0')
-    assert 'found 1 matches total' in c.last_result.out
-    assert 'the recovered matches hit 100.0% of the query' in c.last_result.out
+    c.run_sourmash("gather", sigfile1, db_out, "--threshold", "0.0")
+    assert "found 1 matches total" in c.last_result.out
+    assert "the recovered matches hit 100.0% of the query" in c.last_result.out
 
 
 def test_zipfile_dayhoff_command_search(runtmp):
     # test command-line search/gather of zipfile with dayhoff sigs
     c = runtmp
 
-    sigfile1 = utils.get_test_data('prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    db_out = utils.get_test_data('prot/dayhoff.zip')
+    sigfile1 = utils.get_test_data(
+        "prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    db_out = utils.get_test_data("prot/dayhoff.zip")
 
-    c.run_sourmash('search', sigfile1, db_out, '--threshold', '0.0')
-    assert '2 matches' in c.last_result.out
+    c.run_sourmash("search", sigfile1, db_out, "--threshold", "0.0")
+    assert "2 matches" in c.last_result.out
 
-    c.run_sourmash('gather', sigfile1, db_out, '--threshold', '0.0')
-    assert 'found 1 matches total' in c.last_result.out
-    assert 'the recovered matches hit 100.0% of the query' in c.last_result.out
+    c.run_sourmash("gather", sigfile1, db_out, "--threshold", "0.0")
+    assert "found 1 matches total" in c.last_result.out
+    assert "the recovered matches hit 100.0% of the query" in c.last_result.out
 
 
 def test_zipfile_protein_command_search_combined(runtmp):
     # test command-line search/gather of combined zipfile with protein sigs
     c = runtmp
 
-    sigfile1 = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    db_out = utils.get_test_data('prot/all.zip')
+    sigfile1 = utils.get_test_data(
+        "prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    db_out = utils.get_test_data("prot/all.zip")
 
-    c.run_sourmash('search', sigfile1, db_out, '--threshold', '0.0')
-    assert '2 matches' in c.last_result.out
+    c.run_sourmash("search", sigfile1, db_out, "--threshold", "0.0")
+    assert "2 matches" in c.last_result.out
 
-    c.run_sourmash('gather', sigfile1, db_out)
-    assert 'found 1 matches total' in c.last_result.out
-    assert 'the recovered matches hit 100.0% of the query' in c.last_result.out
+    c.run_sourmash("gather", sigfile1, db_out)
+    assert "found 1 matches total" in c.last_result.out
+    assert "the recovered matches hit 100.0% of the query" in c.last_result.out
 
 
 def test_zipfile_hp_command_search_combined(runtmp):
     # test command-line search/gather of combined zipfile with hp sigs
     c = runtmp
 
-    sigfile1 = utils.get_test_data('prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    db_out = utils.get_test_data('prot/all.zip')
+    sigfile1 = utils.get_test_data(
+        "prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    db_out = utils.get_test_data("prot/all.zip")
 
-    c.run_sourmash('search', sigfile1, db_out, '--threshold', '0.0')
-    assert '2 matches' in c.last_result.out
+    c.run_sourmash("search", sigfile1, db_out, "--threshold", "0.0")
+    assert "2 matches" in c.last_result.out
 
-    c.run_sourmash('gather', sigfile1, db_out, '--threshold', '0.0')
-    assert 'found 1 matches total' in c.last_result.out
-    assert 'the recovered matches hit 100.0% of the query' in c.last_result.out
+    c.run_sourmash("gather", sigfile1, db_out, "--threshold", "0.0")
+    assert "found 1 matches total" in c.last_result.out
+    assert "the recovered matches hit 100.0% of the query" in c.last_result.out
 
 
 def test_zipfile_dayhoff_command_search_combined(runtmp):
     # test command-line search/gather of combined zipfile with dayhoff sigs
     c = runtmp
 
-    sigfile1 = utils.get_test_data('prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    db_out = utils.get_test_data('prot/all.zip')
+    sigfile1 = utils.get_test_data(
+        "prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    db_out = utils.get_test_data("prot/all.zip")
 
-    c.run_sourmash('search', sigfile1, db_out, '--threshold', '0.0')
-    assert '2 matches' in c.last_result.out
+    c.run_sourmash("search", sigfile1, db_out, "--threshold", "0.0")
+    assert "2 matches" in c.last_result.out
 
-    c.run_sourmash('gather', sigfile1, db_out, '--threshold', '0.0')
-    assert 'found 1 matches total' in c.last_result.out
-    assert 'the recovered matches hit 100.0% of the query' in c.last_result.out
+    c.run_sourmash("gather", sigfile1, db_out, "--threshold", "0.0")
+    assert "found 1 matches total" in c.last_result.out
+    assert "the recovered matches hit 100.0% of the query" in c.last_result.out
 
 
 def test_zipfile_dayhoff_command_search_protein(runtmp):
@@ -777,21 +802,23 @@ def test_zipfile_dayhoff_command_search_protein(runtmp):
     c = runtmp
 
     # with dayhoff query
-    sigfile1 = utils.get_test_data('prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    db_out = utils.get_test_data('prot/protein.zip')
+    sigfile1 = utils.get_test_data(
+        "prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    db_out = utils.get_test_data("prot/protein.zip")
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('search', sigfile1, db_out, '--threshold', '0.0')
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash("search", sigfile1, db_out, "--threshold", "0.0")
 
     print(c.last_result.out)
     print(c.last_result.err)
 
-    assert 'no compatible signatures found in ' in c.last_result.err
+    assert "no compatible signatures found in " in c.last_result.err
 
 
 def test_zipfile_API_signatures(use_manifest):
     # return all of the .sig and .sig.gz files in all.zip
-    zipfile_db = utils.get_test_data('prot/all.zip')
+    zipfile_db = utils.get_test_data("prot/all.zip")
 
     zipidx = ZipFileLinearIndex.load(zipfile_db, use_manifest=use_manifest)
     siglist = list(zipidx.signatures())
@@ -814,7 +841,7 @@ def __init__(self):
             pass
 
         def signatures(self):
-            yield 'a'
+            yield "a"
             raise Exception("don't touch me!")
 
         def __len__(self):
@@ -832,10 +859,11 @@ def __len__(self):
 
 def test_zipfile_API_signatures_traverse_yield_all(use_manifest):
     # include dna-sig.noext, but not build.sh (cannot be loaded as signature)
-    zipfile_db = utils.get_test_data('prot/all.zip')
+    zipfile_db = utils.get_test_data("prot/all.zip")
 
-    zipidx = ZipFileLinearIndex.load(zipfile_db, traverse_yield_all=True,
-                                     use_manifest=use_manifest)
+    zipidx = ZipFileLinearIndex.load(
+        zipfile_db, traverse_yield_all=True, use_manifest=use_manifest
+    )
     siglist = list(zipidx.signatures())
     assert len(siglist) == 8
     assert len(zipidx) == 8
@@ -848,11 +876,12 @@ def test_zipfile_API_signatures_traverse_yield_all(use_manifest):
 
 def test_zipfile_API_signatures_traverse_yield_all_select(use_manifest):
     # include dna-sig.noext
-    zipfile_db = utils.get_test_data('prot/all.zip')
+    zipfile_db = utils.get_test_data("prot/all.zip")
 
-    zipidx = ZipFileLinearIndex.load(zipfile_db, traverse_yield_all=True,
-                                     use_manifest=use_manifest)
-    zipidx = zipidx.select(moltype='DNA')
+    zipidx = ZipFileLinearIndex.load(
+        zipfile_db, traverse_yield_all=True, use_manifest=use_manifest
+    )
+    zipidx = zipidx.select(moltype="DNA")
     siglist = list(zipidx.signatures())
     assert len(siglist) == 2
     assert len(zipidx) == 2
@@ -860,14 +889,15 @@ def test_zipfile_API_signatures_traverse_yield_all_select(use_manifest):
 
 def test_zipfile_API_signatures_traverse_yield_all_manifest():
     # check that manifest len is correct
-    zipfile_db = utils.get_test_data('prot/all.zip')
+    zipfile_db = utils.get_test_data("prot/all.zip")
 
-    zipidx = ZipFileLinearIndex.load(zipfile_db, traverse_yield_all=True,
-                                     use_manifest=True)
+    zipidx = ZipFileLinearIndex.load(
+        zipfile_db, traverse_yield_all=True, use_manifest=True
+    )
     assert len(zipidx) == 8, len(zipidx)
     assert len(zipidx.manifest) == 8, len(zipidx.manifest)
 
-    zipidx = zipidx.select(moltype='DNA')
+    zipidx = zipidx.select(moltype="DNA")
     siglist = list(zipidx.signatures())
     assert len(siglist) == 2
     assert len(zipidx) == 2
@@ -876,13 +906,13 @@ def test_zipfile_API_signatures_traverse_yield_all_manifest():
 
 def test_zipfile_API_signatures_select(use_manifest):
     # include dna-sig.noext
-    zipfile_db = utils.get_test_data('prot/all.zip')
+    zipfile_db = utils.get_test_data("prot/all.zip")
 
     zipidx = ZipFileLinearIndex.load(zipfile_db, use_manifest=use_manifest)
     ziplist_pre = LinearIndex(zipidx.signatures())
-    ziplist_pre = ziplist_pre.select(moltype='DNA')
+    ziplist_pre = ziplist_pre.select(moltype="DNA")
 
-    zipidx = zipidx.select(moltype='DNA')
+    zipidx = zipidx.select(moltype="DNA")
     siglist = list(zipidx.signatures())
 
     if use_manifest:
@@ -897,7 +927,7 @@ def test_zipfile_API_signatures_select(use_manifest):
 
 def test_zipfile_API_signatures_select_abund_false(use_manifest):
     # check for abund=False (all signatures match b/c can convert)
-    zipfile_db = utils.get_test_data('track_abund/track_abund.zip')
+    zipfile_db = utils.get_test_data("track_abund/track_abund.zip")
 
     zipidx = ZipFileLinearIndex.load(zipfile_db, use_manifest=use_manifest)
     ziplist_pre = LinearIndex(zipidx.signatures())
@@ -913,7 +943,7 @@ def test_zipfile_API_signatures_select_abund_false(use_manifest):
 
 def test_zipfile_API_signatures_select_abund_true(use_manifest):
     # find all abund=True (all signatures match, b/c abund)
-    zipfile_db = utils.get_test_data('track_abund/track_abund.zip')
+    zipfile_db = utils.get_test_data("track_abund/track_abund.zip")
 
     zipidx = ZipFileLinearIndex.load(zipfile_db, use_manifest=use_manifest)
     ziplist_pre = LinearIndex(zipidx.signatures())
@@ -929,7 +959,7 @@ def test_zipfile_API_signatures_select_abund_true(use_manifest):
 
 def test_zipfile_API_signatures_select_abund_none(use_manifest):
     # find all abund=None (all signatures match, b/c no selection criteria)
-    zipfile_db = utils.get_test_data('track_abund/track_abund.zip')
+    zipfile_db = utils.get_test_data("track_abund/track_abund.zip")
 
     zipidx = ZipFileLinearIndex.load(zipfile_db, use_manifest=use_manifest)
     ziplist_pre = LinearIndex(zipidx.signatures())
@@ -945,14 +975,14 @@ def test_zipfile_API_signatures_select_abund_none(use_manifest):
 
 def test_zipfile_API_signatures_select_twice(use_manifest):
     # include dna-sig.noext
-    zipfile_db = utils.get_test_data('prot/all.zip')
+    zipfile_db = utils.get_test_data("prot/all.zip")
 
     zipidx = ZipFileLinearIndex.load(zipfile_db, use_manifest=use_manifest)
     ziplist_pre = LinearIndex(zipidx.signatures())
-    ziplist_pre = ziplist_pre.select(moltype='DNA')
+    ziplist_pre = ziplist_pre.select(moltype="DNA")
     ziplist_pre = ziplist_pre.select(ksize=31)
 
-    zipidx = zipidx.select(moltype='DNA')
+    zipidx = zipidx.select(moltype="DNA")
     zipidx = zipidx.select(ksize=31)
     siglist = list(zipidx.signatures())
 
@@ -968,17 +998,17 @@ def test_zipfile_API_signatures_select_twice(use_manifest):
 
 def test_zipfile_API_save():
     # ZipFileLinearIndex.save is not implemented.
-    zipfile_db = utils.get_test_data('prot/all.zip')
+    zipfile_db = utils.get_test_data("prot/all.zip")
 
     zipidx = ZipFileLinearIndex.load(zipfile_db)
 
     with pytest.raises(NotImplementedError):
-        zipidx.save('xxx')
+        zipidx.save("xxx")
 
 
 def test_zipfile_API_insert():
     # ZipFileLinearIndex.insert is not implemented.
-    zipfile_db = utils.get_test_data('prot/all.zip')
+    zipfile_db = utils.get_test_data("prot/all.zip")
 
     zipidx = ZipFileLinearIndex.load(zipfile_db)
 
@@ -989,7 +1019,7 @@ def test_zipfile_API_insert():
 
 def test_zipfile_API_location(use_manifest):
     # test ZipFileLinearIndex.location property
-    zipfile_db = utils.get_test_data('prot/all.zip')
+    zipfile_db = utils.get_test_data("prot/all.zip")
 
     zipidx = ZipFileLinearIndex.load(zipfile_db, use_manifest=use_manifest)
 
@@ -1000,9 +1030,8 @@ def test_zipfile_load_file_as_signatures(use_manifest):
     # make sure that ZipFileLinearIndex.signatures works, and is generator
     from types import GeneratorType
 
-    zipfile_db = utils.get_test_data('prot/all.zip')
-    sigs = sourmash_args.load_file_as_signatures(zipfile_db,
-                                                 _use_manifest=use_manifest)
+    zipfile_db = utils.get_test_data("prot/all.zip")
+    sigs = sourmash_args.load_file_as_signatures(zipfile_db, _use_manifest=use_manifest)
 
     # it's fine if this needs to change, but for now I want to make
     # sure that this is a generator.
@@ -1019,10 +1048,10 @@ def test_zipfile_load_file_as_signatures_traverse_yield_all(use_manifest):
     # test with --force, which loads all files
     from types import GeneratorType
 
-    zipfile_db = utils.get_test_data('prot/all.zip')
-    sigs = sourmash_args.load_file_as_signatures(zipfile_db,
-                                                 yield_all_files=True,
-                                                 _use_manifest=use_manifest)
+    zipfile_db = utils.get_test_data("prot/all.zip")
+    sigs = sourmash_args.load_file_as_signatures(
+        zipfile_db, yield_all_files=True, _use_manifest=use_manifest
+    )
 
     # it's fine if this needs to change, but for now I want to make
     # sure that this is a generator.
@@ -1036,21 +1065,21 @@ def test_zipfile_load_database_fail_if_not_zip(runtmp):
     # fail _load_database if not .zip
     c = runtmp
 
-    zipfile_db = utils.get_test_data('prot/all.zip')
-    badname = c.output('xyz.nada')
+    zipfile_db = utils.get_test_data("prot/all.zip")
+    badname = c.output("xyz.nada")
     shutil.copyfile(zipfile_db, badname)
 
     with pytest.raises(ValueError) as exc:
-        sigs = sourmash_args.load_file_as_signatures(badname)
+        sourmash_args.load_file_as_signatures(badname)
 
-    assert 'Error while reading signatures from' in str(exc.value)
+    assert "Error while reading signatures from" in str(exc.value)
 
 
 def test_multi_index_search():
     # test MultiIndex.search
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
     ss47 = sourmash.load_one_signature(sig47)
@@ -1061,8 +1090,7 @@ def test_multi_index_search():
     lidx3 = LinearIndex.load(sig63)
 
     # create MultiIndex with source location override
-    lidx = MultiIndex.load([lidx1, lidx2, lidx3], ['A', None, 'C'],
-                           None)
+    lidx = MultiIndex.load([lidx1, lidx2, lidx3], ["A", None, "C"], None)
     lidx = lidx.select(ksize=31)
 
     # now, search for sig2
@@ -1070,7 +1098,7 @@ def test_multi_index_search():
     print([s[1].name for s in sr])
     assert len(sr) == 1
     assert sr[0][1] == ss2
-    assert sr[0][2] == 'A'      # source override
+    assert sr[0][2] == "A"  # source override
 
     # search for sig47 with lower threshold; search order not guaranteed.
     sr = lidx.search(ss47, threshold=0.1)
@@ -1078,9 +1106,9 @@ def test_multi_index_search():
     assert len(sr) == 2
     sr.sort(key=lambda x: -x[0])
     assert sr[0][1] == ss47
-    assert sr[0][2] == sig47    # source was set to None, so no override
+    assert sr[0][2] == sig47  # source was set to None, so no override
     assert sr[1][1] == ss63
-    assert sr[1][2] == 'C'      # source override
+    assert sr[1][2] == "C"  # source override
 
     # search for sig63 with lower threshold; search order not guaranteed.
     sr = lidx.search(ss63, threshold=0.1)
@@ -1088,9 +1116,9 @@ def test_multi_index_search():
     assert len(sr) == 2
     sr.sort(key=lambda x: -x[0])
     assert sr[0][1] == ss63
-    assert sr[0][2] == 'C'      # source override
+    assert sr[0][2] == "C"  # source override
     assert sr[1][1] == ss47
-    assert sr[1][2] == sig47    # source was set to None, so no override
+    assert sr[1][2] == sig47  # source was set to None, so no override
 
     # search for sig63 with high threshold => 1 match
     sr = lidx.search(ss63, threshold=0.8)
@@ -1098,45 +1126,44 @@ def test_multi_index_search():
     assert len(sr) == 1
     sr.sort(key=lambda x: -x[0])
     assert sr[0][1] == ss63
-    assert sr[0][2] == 'C'      # source override
+    assert sr[0][2] == "C"  # source override
 
 
 def test_multi_index_gather():
     # test MultiIndex.best_containment
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
     ss47 = sourmash.load_one_signature(sig47)
-    ss63 = sourmash.load_one_signature(sig63)
+    sourmash.load_one_signature(sig63)
 
     lidx1 = LinearIndex.load(sig2)
     lidx2 = LinearIndex.load(sig47)
     lidx3 = LinearIndex.load(sig63)
 
     # create MultiIndex with source location override
-    lidx = MultiIndex.load([lidx1, lidx2, lidx3], ['A', None, 'C'],
-                           None)
+    lidx = MultiIndex.load([lidx1, lidx2, lidx3], ["A", None, "C"], None)
     lidx = lidx.select(ksize=31)
 
     match = lidx.best_containment(ss2)
     assert match
     assert match.score == 1.0
-    assert match.location == 'A'
+    assert match.location == "A"
 
     match = lidx.best_containment(ss47)
     assert match
     assert match.score == 1.0
     assert match.signature == ss47
-    assert match.location == sig47     # no source override
+    assert match.location == sig47  # no source override
 
 
 def test_multi_index_signatures():
     # test MultiIndex.signatures
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
     ss47 = sourmash.load_one_signature(sig47)
@@ -1147,8 +1174,7 @@ def test_multi_index_signatures():
     lidx3 = LinearIndex.load(sig63)
 
     # create MultiIndex with source location override
-    lidx = MultiIndex.load([lidx1, lidx2, lidx3], ['A', None, 'C'],
-                           None)
+    lidx = MultiIndex.load([lidx1, lidx2, lidx3], ["A", None, "C"], None)
     lidx = lidx.select(ksize=31)
 
     siglist = list(lidx.signatures())
@@ -1168,13 +1194,13 @@ def test_multi_index_create_prepend():
     # test MultiIndex constructor - location must be specified if
     # 'prepend_location is True
     with pytest.raises(ValueError):
-        mi = MultiIndex(None, None, prepend_location=True)
+        MultiIndex(None, None, prepend_location=True)
 
 
 def test_multi_index_load_from_directory():
     # test MultiIndex loading from a directory. The full paths to the
     # signature files should be available via 'signatures_with_location()'
-    dirname = utils.get_test_data('prot/protein')
+    dirname = utils.get_test_data("prot/protein")
     mi = MultiIndex.load_from_directory(dirname, force=False)
 
     assert mi.location == dirname
@@ -1183,10 +1209,12 @@ def test_multi_index_load_from_directory():
     assert len(sigs) == 2
 
     # check to make sure that full paths to expected sig files are returned
-    locs = [ x[1] for x in mi.signatures_with_location() ]
+    locs = [x[1] for x in mi.signatures_with_location()]
 
-    endings = ('GCA_001593925.1_ASM159392v1_protein.faa.gz.sig',
-               'GCA_001593935.1_ASM159393v1_protein.faa.gz.sig')
+    endings = (
+        "GCA_001593925.1_ASM159392v1_protein.faa.gz.sig",
+        "GCA_001593935.1_ASM159393v1_protein.faa.gz.sig",
+    )
     for loc in locs:
         found = False
         for end in endings:
@@ -1195,16 +1223,16 @@ def test_multi_index_load_from_directory():
         assert found, f"could not find full filename in locations for {end}"
 
     # also check internal locations and parent value --
-    assert mi.parent.endswith('prot/protein')
+    assert mi.parent.endswith("prot/protein")
 
-    ilocs = [ x[1] for x in mi._signatures_with_internal() ]
+    ilocs = [x[1] for x in mi._signatures_with_internal()]
     assert endings[0] in ilocs, ilocs
     assert endings[1] in ilocs, ilocs
 
 
 def test_multi_index_load_from_directory_2():
     # only load .sig files, currently; not the databases under that directory.
-    dirname = utils.get_test_data('prot')
+    dirname = utils.get_test_data("prot")
     mi = MultiIndex.load_from_directory(dirname, force=False)
 
     sigs = list(mi.signatures())
@@ -1214,13 +1242,12 @@ def test_multi_index_load_from_directory_2():
 def test_multi_index_load_from_directory_3_simple_bad_file(runtmp):
     # check that force=False fails properly when confronted with non-JSON
     # files.
-    c = runtmp
 
-    with open(runtmp.output('badsig.sig'), 'wt') as fp:
-        fp.write('bad content.')
+    with open(runtmp.output("badsig.sig"), "w") as fp:
+        fp.write("bad content.")
 
     with pytest.raises(ValueError):
-        mi = MultiIndex.load_from_directory(runtmp.location, force=False)
+        MultiIndex.load_from_directory(runtmp.location, force=False)
 
 
 def test_multi_index_load_from_directory_3(runtmp):
@@ -1228,7 +1255,7 @@ def test_multi_index_load_from_directory_3(runtmp):
     # files that are legit sourmash files...
     c = runtmp
 
-    dirname = utils.get_test_data('prot')
+    dirname = utils.get_test_data("prot")
 
     count = 0
     for root, dirs, files in os.walk(dirname):
@@ -1240,7 +1267,7 @@ def test_multi_index_load_from_directory_3(runtmp):
             count += 1
 
     with pytest.raises(ValueError):
-        mi = MultiIndex.load_from_directory(c.location, force=False)
+        MultiIndex.load_from_directory(c.location, force=False)
 
 
 def test_multi_index_load_from_directory_3_yield_all_true(runtmp):
@@ -1248,7 +1275,7 @@ def test_multi_index_load_from_directory_3_yield_all_true(runtmp):
     # Note here that only .sig/.sig.gz files are loaded.
     c = runtmp
 
-    dirname = utils.get_test_data('prot')
+    dirname = utils.get_test_data("prot")
 
     count = 0
     for root, dirs, files in os.walk(dirname):
@@ -1269,7 +1296,7 @@ def test_multi_index_load_from_directory_3_yield_all_true_subdir(runtmp):
     # check that force works ok on subdirectories.
     # Note here that only .sig/.sig.gz files are loaded.
     c = runtmp
-    dirname = utils.get_test_data('prot')
+    dirname = utils.get_test_data("prot")
 
     target_dir = c.output("some_subdir")
     os.mkdir(target_dir)
@@ -1285,7 +1312,7 @@ def test_multi_index_load_from_directory_3_yield_all_true_subdir(runtmp):
 
     mi = MultiIndex.load_from_directory(c.location, force=True)
 
-    locations = set([ row['internal_location'] for row in mi.manifest.rows ])
+    locations = set([row["internal_location"] for row in mi.manifest.rows])
     print(locations)
 
     sigs = list(mi.signatures())
@@ -1296,12 +1323,12 @@ def test_multi_index_load_from_directory_3_sig_gz(runtmp):
     # check that we find .sig.gz files, too
     c = runtmp
 
-    dirname = utils.get_test_data('prot')
+    dirname = utils.get_test_data("prot")
 
     count = 0
     for root, dirs, files in os.walk(dirname):
         for name in files:
-            if not name.endswith('.sig'): # skip non .sig things
+            if not name.endswith(".sig"):  # skip non .sig things
                 continue
             print(f"at {name}")
             fullname = os.path.join(root, name)
@@ -1321,26 +1348,25 @@ def test_multi_index_load_from_directory_3_check_traverse_fn(runtmp):
     # test the actual traverse function... eventually this test can be
     # removed, probably, as we consolidate functionality and test MultiIndex
     # better.
-    c = runtmp
 
-    dirname = utils.get_test_data('prot')
+    dirname = utils.get_test_data("prot")
     files = list(sourmash_args.traverse_find_sigs([dirname]))
     assert len(files) == 7, files
 
     files = list(sourmash_args.traverse_find_sigs([dirname], True))
-    assert len(files) == 20, files # if this fails, check for extra files!
+    assert len(files) == 20, files  # if this fails, check for extra files!
 
 
 def test_multi_index_load_from_directory_no_exist():
     # raise ValueError on files that don't exist in load_from_directory
-    dirname = utils.get_test_data('does-not-exist')
+    dirname = utils.get_test_data("does-not-exist")
     with pytest.raises(ValueError):
-        mi = MultiIndex.load_from_directory(dirname, force=True)
+        MultiIndex.load_from_directory(dirname, force=True)
 
 
 def test_multi_index_load_from_file_path():
     # test that MultiIndex.load_from_path works fine
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
 
     mi = MultiIndex.load_from_path(sig2)
     assert len(mi) == 3
@@ -1349,29 +1375,29 @@ def test_multi_index_load_from_file_path():
 
 def test_multi_index_load_from_file_path_no_exist():
     # test that load_from_path fails on non-existent files
-    filename = utils.get_test_data('does-not-exist')
+    filename = utils.get_test_data("does-not-exist")
     with pytest.raises(ValueError):
-        mi = MultiIndex.load_from_directory(filename, force=True)
+        MultiIndex.load_from_directory(filename, force=True)
 
 
 def test_multi_index_load_from_pathlist_no_exist():
     # test that load_from_pathlist fails on non-existent files
-    dirname = utils.get_test_data('does-not-exist')
+    dirname = utils.get_test_data("does-not-exist")
     with pytest.raises(ValueError):
-        mi = MultiIndex.load_from_pathlist(dirname)
+        MultiIndex.load_from_pathlist(dirname)
 
 
 def test_multi_index_load_from_pathlist_1(runtmp):
     # test functionality of MultiIndex.load_from_pathlist with .sig files
     c = runtmp
 
-    dirname = utils.get_test_data('prot')
+    dirname = utils.get_test_data("prot")
     files = list(sourmash_args.traverse_find_sigs([dirname]))
     assert len(files) == 7, files
 
-    file_list = c.output('filelist.txt')
+    file_list = c.output("filelist.txt")
 
-    with open(file_list, 'wt') as fp:
+    with open(file_list, "w") as fp:
         print("\n".join(files), file=fp)
     mi = MultiIndex.load_from_pathlist(file_list)
 
@@ -1388,54 +1414,57 @@ def test_multi_index_load_from_pathlist_2(runtmp):
     # CTB note: if you create extra files under this directory,
     # it will fail :)
     c = runtmp
-    dirname = utils.get_test_data('prot')
+    dirname = utils.get_test_data("prot")
     files = list(sourmash_args.traverse_find_sigs([dirname], True))
-    assert len(files) == 20, files # check there aren't extra files in here!
+    assert len(files) == 20, files  # check there aren't extra files in here!
 
-    file_list = c.output('filelist.txt')
+    file_list = c.output("filelist.txt")
 
-    with open(file_list, 'wt') as fp:
+    with open(file_list, "w") as fp:
         print("\n".join(files), file=fp)
 
     with pytest.raises(ValueError) as exc:
-        mi = MultiIndex.load_from_pathlist(file_list)
+        MultiIndex.load_from_pathlist(file_list)
 
     print(str(exc))
-    assert 'Error while reading signatures from' in str(exc)
+    assert "Error while reading signatures from" in str(exc)
 
 
 def test_multi_index_load_from_pathlist_3_zipfile(runtmp):
     # can we load zipfiles in a pathlist? yes please.
     c = runtmp
 
-    zipfile = utils.get_test_data('prot/all.zip')
+    zipfile = utils.get_test_data("prot/all.zip")
 
-    file_list = c.output('filelist.txt')
+    file_list = c.output("filelist.txt")
 
-    with open(file_list, 'wt') as fp:
+    with open(file_list, "w") as fp:
         print(zipfile, file=fp)
 
     mi = MultiIndex.load_from_pathlist(file_list)
     assert len(mi) == 8
 
+
 ##
 ## test a slightly outre version of JaccardSearch - this is a test of the
 ## JaccardSearch 'collect' protocol, in particular...
 ##
 
+
 class JaccardSearchBestOnly_ButIgnore(JaccardSearch):
     "A class that ignores certain results, but still does all the pruning."
+
     def __init__(self, ignore_list):
         super().__init__(SearchType.JACCARD, threshold=0.1)
         self.ignore_list = ignore_list
 
     # a collect function that _ignores_ things in the ignore_list
     def collect(self, score, match):
-        print('in collect; current threshold:', self.threshold)
+        print("in collect; current threshold:", self.threshold)
         for q in self.ignore_list:
-            print('ZZZ', match, match.similarity(q))
+            print("ZZZ", match, match.similarity(q))
             if match.similarity(q) == 1.0:
-                print('yes, found.')
+                print("yes, found.")
                 return False
 
         # update threshold if not perfect match, which could help prune.
@@ -1445,9 +1474,9 @@ def collect(self, score, match):
 
 def test_linear_index_gather_ignore():
     # do we properly ignore exact matches in 'search' for LinearIndex?
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
     ss47 = sourmash.load_one_signature(sig47, ksize=31)
@@ -1460,7 +1489,7 @@ def test_linear_index_gather_ignore():
     search_fn = JaccardSearchBestOnly_ButIgnore([ss47])
 
     results = list(lidx.find(search_fn, ss47))
-    results = [ sr.signature for sr in results ]
+    results = [sr.signature for sr in results]
 
     def is_found(ss, xx):
         for q in xx:
@@ -1478,9 +1507,9 @@ def test_lca_index_gather_ignore():
     # do we properly ignore exact matches in gather on an LCA DB?
     from sourmash.lca import LCA_Database
 
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
     ss47 = sourmash.load_one_signature(sig47, ksize=31)
@@ -1496,7 +1525,7 @@ def test_lca_index_gather_ignore():
     search_fn = JaccardSearchBestOnly_ButIgnore([ss47])
 
     results = list(db.find(search_fn, ss47))
-    results = [ sr.signature for sr in results ]
+    results = [sr.signature for sr in results]
 
     def is_found(ss, xx):
         for q in xx:
@@ -1512,9 +1541,9 @@ def is_found(ss, xx):
 
 def test_sbt_index_gather_ignore():
     # do we properly ignore exact matches in gather on an SBT?
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
     ss47 = sourmash.load_one_signature(sig47, ksize=31)
@@ -1529,15 +1558,15 @@ def test_sbt_index_gather_ignore():
     db.insert(ss63)
 
     # ...now search with something that should ignore sig47, the exact match.
-    print(f'\n** trying to ignore {ss47}')
+    print(f"\n** trying to ignore {ss47}")
     search_fn = JaccardSearchBestOnly_ButIgnore([ss47])
 
     results = list(db.find(search_fn, ss47))
-    results = [ sr.signature for sr in results ]
+    results = [sr.signature for sr in results]
 
     def is_found(ss, xx):
         for q in xx:
-            print('is found?', ss, ss.similarity(q))
+            print("is found?", ss, ss.similarity(q))
             if ss.similarity(q) == 1.0:
                 return True
         return False
@@ -1552,39 +1581,41 @@ def test_counter_gather_test_consume():
     # (see test_index_protocol.py for generic CounterGather tests.)
     query_mh = sourmash.MinHash(n=0, ksize=31, scaled=1)
     query_mh.add_many(range(0, 20))
-    query_ss = SourmashSignature(query_mh, name='query')
+    query_ss = SourmashSignature(query_mh, name="query")
 
     match_mh_1 = query_mh.copy_and_clear()
     match_mh_1.add_many(range(0, 10))
-    match_ss_1 = SourmashSignature(match_mh_1, name='match1')
+    match_ss_1 = SourmashSignature(match_mh_1, name="match1")
 
     match_mh_2 = query_mh.copy_and_clear()
     match_mh_2.add_many(range(7, 15))
-    match_ss_2 = SourmashSignature(match_mh_2, name='match2')
+    match_ss_2 = SourmashSignature(match_mh_2, name="match2")
 
     match_mh_3 = query_mh.copy_and_clear()
     match_mh_3.add_many(range(13, 17))
-    match_ss_3 = SourmashSignature(match_mh_3, name='match3')
+    match_ss_3 = SourmashSignature(match_mh_3, name="match3")
 
     # load up the counter
     counter = CounterGather(query_ss)
-    counter.add(match_ss_1, location='loc a')
-    counter.add(match_ss_2, location='loc b')
-    counter.add(match_ss_3, location='loc c')
+    counter.add(match_ss_1, location="loc a")
+    counter.add(match_ss_2, location="loc b")
+    counter.add(match_ss_3, location="loc c")
 
     ### ok, dig into actual counts...
     import pprint
+
     pprint.pprint(counter.counter)
     pprint.pprint(list(counter.signatures()))
     pprint.pprint(counter.locations)
 
     assert set(counter.signatures()) == set([match_ss_1, match_ss_2, match_ss_3])
-    assert list(sorted(counter.locations.values())) == ['loc a', 'loc b', 'loc c']
+    assert list(sorted(counter.locations.values())) == ["loc a", "loc b", "loc c"]
     pprint.pprint(counter.counter.most_common())
-    assert list(counter.counter.most_common()) == \
-        [('26d4943627b33c446f37be1f5baf8d46', 10),
-         ('f51cedec90ea666e0ebc11aa274eca61', 8),
-         ('f331f8279113d77e42ab8efca8f9cc17', 4)]
+    assert list(counter.counter.most_common()) == [
+        ("26d4943627b33c446f37be1f5baf8d46", 10),
+        ("f51cedec90ea666e0ebc11aa274eca61", 8),
+        ("f331f8279113d77e42ab8efca8f9cc17", 4),
+    ]
 
     ## round 1
 
@@ -1595,12 +1626,13 @@ def test_counter_gather_test_consume():
     assert cur_query == query_ss.minhash
 
     counter.consume(intersect_mh)
-    assert set(counter.signatures()) == set([ match_ss_1, match_ss_2, match_ss_3 ])
-    assert list(sorted(counter.locations.values())) == ['loc a', 'loc b', 'loc c']
+    assert set(counter.signatures()) == set([match_ss_1, match_ss_2, match_ss_3])
+    assert list(sorted(counter.locations.values())) == ["loc a", "loc b", "loc c"]
     pprint.pprint(counter.counter.most_common())
-    assert list(counter.counter.most_common()) == \
-        [('f51cedec90ea666e0ebc11aa274eca61', 5),
-         ('f331f8279113d77e42ab8efca8f9cc17', 4)]
+    assert list(counter.counter.most_common()) == [
+        ("f51cedec90ea666e0ebc11aa274eca61", 5),
+        ("f331f8279113d77e42ab8efca8f9cc17", 4),
+    ]
 
     ### round 2
 
@@ -1611,12 +1643,13 @@ def test_counter_gather_test_consume():
     assert cur_query != query_ss.minhash
 
     counter.consume(intersect_mh)
-    assert set(counter.signatures()) == set([ match_ss_1, match_ss_2, match_ss_3 ])
-    assert list(sorted(counter.locations.values())) == ['loc a', 'loc b', 'loc c']
+    assert set(counter.signatures()) == set([match_ss_1, match_ss_2, match_ss_3])
+    assert list(sorted(counter.locations.values())) == ["loc a", "loc b", "loc c"]
 
     pprint.pprint(counter.counter.most_common())
-    assert list(counter.counter.most_common()) == \
-        [('f331f8279113d77e42ab8efca8f9cc17', 2)]
+    assert list(counter.counter.most_common()) == [
+        ("f331f8279113d77e42ab8efca8f9cc17", 2)
+    ]
 
     ## round 3
 
@@ -1627,8 +1660,8 @@ def test_counter_gather_test_consume():
     assert cur_query != query_ss.minhash
 
     counter.consume(intersect_mh)
-    assert set(counter.signatures()) == set([ match_ss_1, match_ss_2, match_ss_3 ])
-    assert list(sorted(counter.locations.values())) == ['loc a', 'loc b', 'loc c']
+    assert set(counter.signatures()) == set([match_ss_1, match_ss_2, match_ss_3])
+    assert list(sorted(counter.locations.values())) == ["loc a", "loc b", "loc c"]
     pprint.pprint(counter.counter.most_common())
     assert list(counter.counter.most_common()) == []
 
@@ -1639,8 +1672,8 @@ def test_counter_gather_test_consume():
     assert not results
 
     counter.consume(intersect_mh)
-    assert set(counter.signatures()) == set([ match_ss_1, match_ss_2, match_ss_3 ])
-    assert list(sorted(counter.locations.values())) == ['loc a', 'loc b', 'loc c']
+    assert set(counter.signatures()) == set([match_ss_1, match_ss_2, match_ss_3])
+    assert list(sorted(counter.locations.values())) == ["loc a", "loc b", "loc c"]
     assert list(counter.counter.most_common()) == []
 
 
@@ -1649,28 +1682,28 @@ def test_counter_gather_identical_md5sum():
     # check what happens with identical matches w/different names
     query_mh = sourmash.MinHash(n=0, ksize=31, scaled=1)
     query_mh.add_many(range(0, 20))
-    query_ss = SourmashSignature(query_mh, name='query')
+    query_ss = SourmashSignature(query_mh, name="query")
 
     match_mh_1 = query_mh.copy_and_clear()
     match_mh_1.add_many(range(0, 10))
-    match_ss_1 = SourmashSignature(match_mh_1, name='match1')
+    match_ss_1 = SourmashSignature(match_mh_1, name="match1")
 
     # same as match_mh_1
     match_mh_2 = query_mh.copy_and_clear()
     match_mh_2.add_many(range(0, 10))
-    match_ss_2 = SourmashSignature(match_mh_2, name='match2')
+    match_ss_2 = SourmashSignature(match_mh_2, name="match2")
 
     # identical md5sum
     assert match_ss_1.md5sum() == match_ss_2.md5sum()
 
     # load up the counter
     counter = CounterGather(query_ss)
-    counter.add(match_ss_1, location='loc a')
-    counter.add(match_ss_2, location='loc b')
+    counter.add(match_ss_1, location="loc a")
+    counter.add(match_ss_2, location="loc b")
 
     assert len(counter.siglist) == 1
     stored_match = list(counter.siglist.values()).pop()
-    assert stored_match.name == 'match2'
+    assert stored_match.name == "match2"
     # CTB note: this behavior may be changed freely, as the protocol
     # tests simply specify that _one_ of the identical matches is
     # returned. See test_counter_gather_multiple_identical_matches.
@@ -1678,9 +1711,9 @@ def test_counter_gather_identical_md5sum():
 
 def test_lazy_index_1():
     # test some basic features of LazyLinearIndex
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
     ss47 = sourmash.load_one_signature(sig47)
@@ -1735,14 +1768,14 @@ def minhash(self):
     lazy = LazyLinearIndex(lidx)
     lazy2 = lazy.select(ksize=31)
     with pytest.raises(ValueError) as e:
-        lazy3 = lazy2.select(ksize=21)
+        lazy2.select(ksize=21)
 
     assert str(e.value) == "cannot select on two different values for ksize"
 
 
 def test_lazy_index_4_bool():
     # test some basic features of LazyLinearIndex
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
 
     # test bool false/true
@@ -1757,24 +1790,26 @@ def test_lazy_index_4_bool():
 def test_lazy_index_wraps_multi_index_location():
     # check that 'location' works fine when MultiIndex is wrapped by
     # LazyLinearIndex.
-    sigdir = utils.get_test_data('prot/protein/')
-    sigzip = utils.get_test_data('prot/protein.zip')
-    siglca = utils.get_test_data('prot/protein.lca.json.gz')
-    sigsbt = utils.get_test_data('prot/protein.sbt.zip')
+    sigdir = utils.get_test_data("prot/protein/")
+    sigzip = utils.get_test_data("prot/protein.zip")
+    siglca = utils.get_test_data("prot/protein.lca.json.gz")
+    sigsbt = utils.get_test_data("prot/protein.sbt.zip")
 
     db_paths = (sigdir, sigzip, siglca, sigsbt)
-    dbs = [ sourmash.load_file_as_index(db_path) for db_path in db_paths ]
+    dbs = [sourmash.load_file_as_index(db_path) for db_path in db_paths]
 
     mi = MultiIndex.load(dbs, db_paths, None)
     lazy = LazyLinearIndex(mi)
 
-    mi2 = mi.select(moltype='protein')
-    lazy2 = lazy.select(moltype='protein')
+    mi2 = mi.select(moltype="protein")
+    lazy2 = lazy.select(moltype="protein")
 
-    for (ss_tup, ss_lazy_tup) in zip(mi2.signatures_with_location(),
-                                     lazy2.signatures_with_location()):
+    for ss_tup, ss_lazy_tup in zip(
+        mi2.signatures_with_location(), lazy2.signatures_with_location()
+    ):
         assert ss_tup == ss_lazy_tup
 
+
 def test_revindex_index_search():
     # confirm that RevIndex works
     sig2 = utils.get_test_data("2.fa.sig")
@@ -1848,9 +1883,9 @@ def test_revindex_gather():
 
 def test_revindex_gather_ignore():
     # check that RevIndex gather ignores things properly.
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
     ss47 = sourmash.load_one_signature(sig47, ksize=31)
@@ -1863,7 +1898,7 @@ def test_revindex_gather_ignore():
     search_fn = JaccardSearchBestOnly_ButIgnore([ss47])
 
     results = list(lidx.find(search_fn, ss47))
-    results = [ ss.signature for ss in results ]
+    results = [ss.signature for ss in results]
 
     def is_found(ss, xx):
         for q in xx:
@@ -1881,8 +1916,8 @@ def test_standalone_manifest_signatures(runtmp):
     # build a StandaloneManifestIndex and test 'signatures' method.
 
     ## first, build a manifest in memory using MultiIndex
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     ss47 = sourmash.load_one_signature(sig47)
     ss63 = sourmash.load_one_signature(sig63)
@@ -1895,7 +1930,7 @@ def test_standalone_manifest_signatures(runtmp):
     ## got a manifest! ok, now test out StandaloneManifestIndex
     mm = StandaloneManifestIndex(mi.manifest, None)
 
-    siglist = [ ss for ss in mm.signatures() ]
+    siglist = [ss for ss in mm.signatures()]
     assert len(siglist) == 2
     assert ss47 in siglist
     assert ss63 in siglist
@@ -1905,11 +1940,11 @@ def test_standalone_manifest_signatures_prefix(runtmp):
     # try out 'prefix' for StandaloneManifestIndex
 
     ## first, build a manifest in memory using MultiIndex
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
-    ss47 = sourmash.load_one_signature(sig47)
-    ss63 = sourmash.load_one_signature(sig63)
+    sourmash.load_one_signature(sig47)
+    sourmash.load_one_signature(sig63)
 
     lidx1 = LinearIndex.load(sig47)
     lidx2 = LinearIndex.load(sig63)
@@ -1917,11 +1952,10 @@ def test_standalone_manifest_signatures_prefix(runtmp):
 
     # ok, now remove the abspath prefix from iloc
     for row in mi.manifest.rows:
-        row['internal_location'] = os.path.basename(row['internal_location'])
+        row["internal_location"] = os.path.basename(row["internal_location"])
 
     ## this should succeed!
-    mm = StandaloneManifestIndex(mi.manifest, None,
-                                 prefix=utils.get_test_data(''))
+    mm = StandaloneManifestIndex(mi.manifest, None, prefix=utils.get_test_data(""))
 
     assert len(list(mm.signatures())) == 2
 
@@ -1930,25 +1964,24 @@ def test_standalone_manifest_signatures_prefix_fail(runtmp):
     # give StandaloneManifest the wrong prefix
 
     ## first, build a manifest in memory using MultiIndex
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
-    ss47 = sourmash.load_one_signature(sig47)
-    ss63 = sourmash.load_one_signature(sig63)
+    sourmash.load_one_signature(sig47)
+    sourmash.load_one_signature(sig63)
 
     lidx1 = LinearIndex.load(sig47)
     lidx2 = LinearIndex.load(sig63)
-    print('XXX', lidx1.location)
+    print("XXX", lidx1.location)
 
     mi = MultiIndex.load([lidx1, lidx2], [sig47, sig63], "")
 
     # remove prefix from manifest
     for row in mi.manifest.rows:
-        row['internal_location'] = os.path.basename(row['internal_location'])
+        row["internal_location"] = os.path.basename(row["internal_location"])
 
     ## got a manifest! ok, now test out StandaloneManifestIndex
-    mm = StandaloneManifestIndex(mi.manifest, None,
-                                 prefix=runtmp.output('foo'))
+    mm = StandaloneManifestIndex(mi.manifest, None, prefix=runtmp.output("foo"))
 
     # should fail
     with pytest.raises(ValueError) as exc:
@@ -1960,37 +1993,37 @@ def test_standalone_manifest_signatures_prefix_fail(runtmp):
 
 def test_standalone_manifest_load_from_dir(runtmp):
     # test loading a mf with relative directory paths from test-data
-    mf = utils.get_test_data('scaled/mf.csv')
+    mf = utils.get_test_data("scaled/mf.csv")
     idx = sourmash.load_file_as_index(mf)
 
     siglist = list(idx.signatures())
     assert len(siglist) == 15
 
-    assert idx                  # should be 'True'
+    assert idx  # should be 'True'
     assert len(idx) == 15
 
     with pytest.raises(NotImplementedError):
         idx.insert()
 
     with pytest.raises(NotImplementedError):
-        idx.save('foo')
+        idx.save("foo")
 
     assert idx.location == mf
 
 
 def test_standalone_manifest_lazy_load(runtmp):
     # check that it's actually doing lazy loading
-    orig_sig47 = utils.get_test_data('47.fa.sig')
-    sig47 = runtmp.output('47.fa.sig')
+    orig_sig47 = utils.get_test_data("47.fa.sig")
+    sig47 = runtmp.output("47.fa.sig")
 
     # build an external manifest
     shutil.copyfile(orig_sig47, sig47)
 
     # this is an abspath to sig47
-    runtmp.sourmash('sig', 'manifest', sig47, '-o', 'mf.csv')
+    runtmp.sourmash("sig", "manifest", sig47, "-o", "mf.csv")
 
     # should work to get signatures:
-    idx = StandaloneManifestIndex.load(runtmp.output('mf.csv'))
+    idx = StandaloneManifestIndex.load(runtmp.output("mf.csv"))
 
     siglist = list(idx.signatures())
     assert len(siglist) == 1
@@ -2013,18 +2046,19 @@ def test_standalone_manifest_lazy_load(runtmp):
 
 def test_standalone_manifest_lazy_load_2_prefix(runtmp):
     # check that it's actually doing lazy loading; supply explicit prefix
-    orig_sig47 = utils.get_test_data('47.fa.sig')
-    sig47 = runtmp.output('47.fa.sig')
+    orig_sig47 = utils.get_test_data("47.fa.sig")
+    sig47 = runtmp.output("47.fa.sig")
 
     # build an external manifest
     # note, here use a relative path to 47.fa.sig; the manifest will contain
     # just '47.fa.sig' as the location
     shutil.copyfile(orig_sig47, sig47)
-    runtmp.sourmash('sig', 'manifest', '47.fa.sig', '-o', 'mf.csv')
+    runtmp.sourmash("sig", "manifest", "47.fa.sig", "-o", "mf.csv")
 
     # should work to get signatures:
-    idx = StandaloneManifestIndex.load(runtmp.output('mf.csv'),
-                                       prefix=runtmp.output(''))
+    idx = StandaloneManifestIndex.load(
+        runtmp.output("mf.csv"), prefix=runtmp.output("")
+    )
 
     siglist = list(idx.signatures())
     assert len(siglist) == 1
@@ -2047,68 +2081,68 @@ def test_standalone_manifest_lazy_load_2_prefix(runtmp):
 
 def test_standalone_manifest_search(runtmp):
     # test a straight up 'search'
-    query_sig = utils.get_test_data('scaled/genome-s12.fa.gz.sig')
-    mf = utils.get_test_data('scaled/mf.csv')
+    query_sig = utils.get_test_data("scaled/genome-s12.fa.gz.sig")
+    mf = utils.get_test_data("scaled/mf.csv")
 
-    runtmp.sourmash('search', query_sig, mf)
+    runtmp.sourmash("search", query_sig, mf)
 
     out = runtmp.last_result.out
     print(out)
-    assert '100.0%       d84ef28f' in out
+    assert "100.0%       d84ef28f" in out
 
 
 def test_standalone_manifest_prefetch_lazy(runtmp):
     # check that prefetch is actually doing lazy loading on manifest index.
-    orig_sig47 = utils.get_test_data('47.fa.sig')
-    sig47 = runtmp.output('47.fa.sig')
-    orig_sig2 = utils.get_test_data('2.fa.sig')
-    sig2 = runtmp.output('2.fa.sig')
-    orig_sig63 = utils.get_test_data('63.fa.sig')
-    sig63 = runtmp.output('63.fa.sig')
+    orig_sig47 = utils.get_test_data("47.fa.sig")
+    sig47 = runtmp.output("47.fa.sig")
+    orig_sig2 = utils.get_test_data("2.fa.sig")
+    sig2 = runtmp.output("2.fa.sig")
+    orig_sig63 = utils.get_test_data("63.fa.sig")
+    sig63 = runtmp.output("63.fa.sig")
 
     shutil.copyfile(orig_sig47, sig47)
-    runtmp.sourmash('sig', 'manifest', sig47, '-o', 'mf1.csv')
+    runtmp.sourmash("sig", "manifest", sig47, "-o", "mf1.csv")
     shutil.copyfile(orig_sig2, sig2)
-    runtmp.sourmash('sig', 'manifest', sig2, '-o', 'mf2.csv')
+    runtmp.sourmash("sig", "manifest", sig2, "-o", "mf2.csv")
     shutil.copyfile(orig_sig63, sig63)
-    runtmp.sourmash('sig', 'manifest', sig63, '-o', 'mf3.csv')
+    runtmp.sourmash("sig", "manifest", sig63, "-o", "mf3.csv")
 
     # combine the manifests, manually for now...
-    mf1 = CollectionManifest.load_from_filename(runtmp.output('mf1.csv'))
+    mf1 = CollectionManifest.load_from_filename(runtmp.output("mf1.csv"))
     assert len(mf1) == 1
 
-    mf2 = CollectionManifest.load_from_filename(runtmp.output('mf2.csv'))
+    mf2 = CollectionManifest.load_from_filename(runtmp.output("mf2.csv"))
     assert len(mf2) == 3
 
-    mf3 = CollectionManifest.load_from_filename(runtmp.output('mf3.csv'))
+    mf3 = CollectionManifest.load_from_filename(runtmp.output("mf3.csv"))
     assert len(mf3) == 1
 
     mf = mf1 + mf2 + mf3
     assert len(mf) == 5
 
-    mf.write_to_filename(runtmp.output('mf.csv'))
+    mf.write_to_filename(runtmp.output("mf.csv"))
 
     # ok! now, remove the last signature, 'sig63'.
     os.unlink(sig63)
 
     # ...but loading the manifest should still work.
-    idx = StandaloneManifestIndex.load(runtmp.output('mf.csv'))
+    idx = StandaloneManifestIndex.load(runtmp.output("mf.csv"))
 
     # double check - third load will fail. this relies on load order :shrug:.
     sig_iter = iter(idx.signatures())
     ss = next(sig_iter)
     print(ss)
-    assert '47.fa' in ss.filename
+    assert "47.fa" in ss.filename
 
     for i in range(3):
         ss = next(sig_iter)
         print(i, ss)
-        assert '2.fa' in ss.filename
+        assert "2.fa" in ss.filename
 
     with pytest.raises(ValueError) as exc:
         ss = next(sig_iter)
-    assert 'Error while reading signatures from' in str(exc)
-    assert '63.fa.sig' in str(exc)
+    assert "Error while reading signatures from" in str(exc)
+    assert "63.fa.sig" in str(exc)
 
     # ok! now test prefetch... should get one match legit, to 47,
     # and then no matches to 2, and then error.
@@ -2125,5 +2159,5 @@ def test_standalone_manifest_prefetch_lazy(runtmp):
     with pytest.raises(ValueError) as exc:
         sr = next(g)
 
-    assert 'Error while reading signatures from' in str(exc)
-    assert '63.fa.sig' in str(exc)
+    assert "Error while reading signatures from" in str(exc)
+    assert "63.fa.sig" in str(exc)
diff --git a/tests/test_index_protocol.py b/tests/test_index_protocol.py
index 4a6672408e..b843e9883d 100644
--- a/tests/test_index_protocol.py
+++ b/tests/test_index_protocol.py
@@ -8,27 +8,30 @@
 
 import sourmash
 from sourmash import SourmashSignature
-from sourmash.index import (LinearIndex, ZipFileLinearIndex,
-                            LazyLinearIndex, MultiIndex,
-                            StandaloneManifestIndex,
-                            IndexSearchResult)
+from sourmash.index import (
+    LinearIndex,
+    ZipFileLinearIndex,
+    LazyLinearIndex,
+    MultiIndex,
+    StandaloneManifestIndex,
+    IndexSearchResult,
+)
 from sourmash.index import CounterGather
 from sourmash.index.sqlite_index import SqliteIndex
 from sourmash.index.revindex import RevIndex
 from sourmash.sbt import SBT, GraphFactory
 from sourmash.manifest import CollectionManifest, BaseCollectionManifest
 from sourmash.lca.lca_db import LCA_Database, load_single_database
-from sourmash.minhash import (flatten_and_intersect_scaled,
-                              flatten_and_downsample_scaled)
+from sourmash.minhash import flatten_and_intersect_scaled, flatten_and_downsample_scaled
 
 import sourmash_tst_utils as utils
 
 
 def _load_three_sigs():
     # utility function - load & return these three sigs.
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
     ss47 = sourmash.load_one_signature(sig47)
@@ -55,7 +58,7 @@ def build_lazy_linear_index(runtmp):
 
 def build_sbt_index(runtmp):
     ss2, ss47, ss63 = _load_three_sigs()
-    
+
     factory = GraphFactory(5, 100, 3)
     root = SBT(factory, d=2)
 
@@ -68,7 +71,7 @@ def build_sbt_index(runtmp):
 
 def build_sbt_index_save_load(runtmp):
     root = build_sbt_index(runtmp)
-    out = runtmp.output('xyz.sbt.zip')
+    out = runtmp.output("xyz.sbt.zip")
     root.save(out)
 
     return sourmash.load_file_as_index(out)
@@ -77,7 +80,7 @@ def build_sbt_index_save_load(runtmp):
 def build_zipfile_index(runtmp):
     from sourmash.save_load import SaveSignatures_ZipFile
 
-    location = runtmp.output('index.zip')
+    location = runtmp.output("index.zip")
     with SaveSignatures_ZipFile(location) as save_sigs:
         for ss in _load_three_sigs():
             save_sigs.add(ss)
@@ -95,9 +98,9 @@ def build_multi_index(runtmp):
 
 
 def build_standalone_manifest_index(runtmp):
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
     ss47 = sourmash.load_one_signature(sig47)
@@ -106,10 +109,10 @@ def build_standalone_manifest_index(runtmp):
     siglist = [(ss2, sig2), (ss47, sig47), (ss63, sig63)]
 
     rows = []
-    rows.extend((CollectionManifest.make_manifest_row(ss, loc) for ss, loc in siglist ))
+    rows.extend((CollectionManifest.make_manifest_row(ss, loc) for ss, loc in siglist))
     mf = CollectionManifest(rows)
     mf_filename = runtmp.output("mf.csv")
-    
+
     mf.write_to_filename(mf_filename)
 
     idx = StandaloneManifestIndex.load(mf_filename)
@@ -118,7 +121,7 @@ def build_standalone_manifest_index(runtmp):
 
 def build_lca_index(runtmp):
     siglist = _load_three_sigs()
-    db = LCA_Database(31, 1000, 'DNA')
+    db = LCA_Database(31, 1000, "DNA")
     for ss in siglist:
         db.insert(ss)
 
@@ -127,14 +130,14 @@ def build_lca_index(runtmp):
 
 def build_lca_index_save_load(runtmp):
     db = build_lca_index(runtmp)
-    outfile = runtmp.output('db.lca.json')
+    outfile = runtmp.output("db.lca.json")
     db.save(outfile)
 
     return sourmash.load_file_as_index(outfile)
 
 
 def build_sqlite_index(runtmp):
-    filename = runtmp.output('idx.sqldb')
+    filename = runtmp.output("idx.sqldb")
     db = SqliteIndex.create(filename)
 
     siglist = _load_three_sigs()
@@ -157,8 +160,8 @@ def build_revindex(runtmp):
 
 def build_lca_index_save_load_sql(runtmp):
     db = build_lca_index(runtmp)
-    outfile = runtmp.output('db.lca.json')
-    db.save(outfile, format='sql')
+    outfile = runtmp.output("db.lca.json")
+    db.save(outfile, format="sql")
 
     x = load_single_database(outfile)
     db_load = x[0]
@@ -171,19 +174,22 @@ def build_lca_index_save_load_sql(runtmp):
 # building functions.
 #
 
-@pytest.fixture(params=[build_linear_index,
-                        build_lazy_linear_index,
-                        build_sbt_index,
-                        build_zipfile_index,
-                        build_multi_index,
-                        build_standalone_manifest_index,
-                        build_lca_index,
-                        build_sbt_index_save_load,
-                        build_lca_index_save_load,
-                        build_sqlite_index,
-                        build_lca_index_save_load_sql,
-#                        build_revindex,
-                        ]
+
+@pytest.fixture(
+    params=[
+        build_linear_index,
+        build_lazy_linear_index,
+        build_sbt_index,
+        build_zipfile_index,
+        build_multi_index,
+        build_standalone_manifest_index,
+        build_lca_index,
+        build_sbt_index_save_load,
+        build_lca_index_save_load,
+        build_sqlite_index,
+        build_lca_index_save_load_sql,
+        #                        build_revindex,
+    ]
 )
 def index_obj(request, runtmp):
     build_fn = request.param
@@ -271,7 +277,7 @@ def test_index_signatures(index_obj):
     assert len(siglist) == 3
 
     # check md5sums, since 'in' doesn't always work
-    md5s = set(( ss.md5sum() for ss in siglist ))
+    md5s = set(ss.md5sum() for ss in siglist)
     assert ss2.md5sum() in md5s
     assert ss47.md5sum() in md5s
     assert ss63.md5sum() in md5s
@@ -285,7 +291,7 @@ def test_index_signatures_with_location(index_obj):
     assert len(siglist) == 3
 
     # check md5sums, since 'in' doesn't always work
-    md5s = set(( ss.md5sum() for ss, loc in siglist ))
+    md5s = set((ss.md5sum() for ss, loc in siglist))
     assert ss2.md5sum() in md5s
     assert ss47.md5sum() in md5s
     assert ss63.md5sum() in md5s
@@ -315,15 +321,22 @@ def test_index_manifest(index_obj):
 
 def test_index_select_basic(index_obj):
     # select does the basic thing ok
-    idx = index_obj.select(ksize=31, moltype='DNA', abund=False,
-                           containment=True, scaled=1000, num=0, picklist=None)
+    idx = index_obj.select(
+        ksize=31,
+        moltype="DNA",
+        abund=False,
+        containment=True,
+        scaled=1000,
+        num=0,
+        picklist=None,
+    )
 
     assert len(idx) == 3
     siglist = list(idx.signatures())
     assert len(siglist) == 3
 
     # check md5sums, since 'in' doesn't always work
-    md5s = set(( ss.md5sum() for ss in siglist ))
+    md5s = set(ss.md5sum() for ss in siglist)
     ss2, ss47, ss63 = _load_three_sigs()
     assert ss2.md5sum() in md5s
     assert ss47.md5sum() in md5s
@@ -477,6 +490,7 @@ class CounterGather_LinearIndex:
     Provides an (inefficient) CounterGather-style class, for
     protocol testing purposes.
     """
+
     def __init__(self, orig_query):
         "Constructor - take a SourmashSignature that is the original query."
         orig_query_mh = orig_query.minhash
@@ -564,6 +578,7 @@ class CounterGather_LCA:
     based on LCA_Database. This is currently just for protocol
     and API testing purposes.
     """
+
     def __init__(self, query):
         from sourmash.lca.lca_db import LCA_Database
 
@@ -572,8 +587,7 @@ def __init__(self, query):
             raise ValueError("must use scaled MinHash")
 
         self.orig_query_mh = query_mh
-        lca_db = LCA_Database(query_mh.ksize, query_mh.scaled,
-                              query_mh.moltype)
+        lca_db = LCA_Database(query_mh.ksize, query_mh.scaled, query_mh.moltype)
         self.db = lca_db
         self.siglist = {}
         self.locations = {}
@@ -598,8 +612,7 @@ def add(self, ss, *, location=None, require_overlap=True):
 
     def signatures(self):
         "Yield all signatures."
-        for ss in self.siglist.values():
-            yield ss
+        yield from self.siglist.values()
 
     def downsample(self, scaled):
         "Track highest scaled across all possible matches."
@@ -635,8 +648,7 @@ def peek(self, query_mh, *, threshold_bp=0):
         cont = result.score
         match = result.signature
 
-        intersect_mh = flatten_and_intersect_scaled(result.signature.minhash,
-                                                    query_mh)
+        intersect_mh = flatten_and_intersect_scaled(result.signature.minhash, query_mh)
 
         md5 = result.signature.md5sum()
         location = self.locations[md5]
@@ -648,10 +660,12 @@ def consume(self, intersect_mh):
         self.query_started = 1
 
 
-@pytest.fixture(params=[CounterGather,
-                        CounterGather_LinearIndex,
-                        CounterGather_LCA,
-                        ]
+@pytest.fixture(
+    params=[
+        CounterGather,
+        CounterGather_LinearIndex,
+        CounterGather_LCA,
+    ]
 )
 def counter_gather_constructor(request):
     build_fn = request.param
@@ -664,19 +678,19 @@ def test_counter_get_signatures(counter_gather_constructor):
     # test .signatures() method
     query_mh = sourmash.MinHash(n=0, ksize=31, scaled=1)
     query_mh.add_many(range(0, 20))
-    query_ss = SourmashSignature(query_mh, name='query')
+    query_ss = SourmashSignature(query_mh, name="query")
 
     match_mh_1 = query_mh.copy_and_clear()
     match_mh_1.add_many(range(0, 10))
-    match_ss_1 = SourmashSignature(match_mh_1, name='match1')
+    match_ss_1 = SourmashSignature(match_mh_1, name="match1")
 
     match_mh_2 = query_mh.copy_and_clear()
     match_mh_2.add_many(range(10, 15))
-    match_ss_2 = SourmashSignature(match_mh_2, name='match2')
+    match_ss_2 = SourmashSignature(match_mh_2, name="match2")
 
     match_mh_3 = query_mh.copy_and_clear()
     match_mh_3.add_many(range(15, 17))
-    match_ss_3 = SourmashSignature(match_mh_3, name='match3')
+    match_ss_3 = SourmashSignature(match_mh_3, name="match3")
 
     counter = counter_gather_constructor(query_ss)
     counter.add(match_ss_1)
@@ -720,19 +734,19 @@ def test_counter_gather_1(counter_gather_constructor):
     # generated via CounterGather
     query_mh = sourmash.MinHash(n=0, ksize=31, scaled=1)
     query_mh.add_many(range(0, 20))
-    query_ss = SourmashSignature(query_mh, name='query')
+    query_ss = SourmashSignature(query_mh, name="query")
 
     match_mh_1 = query_mh.copy_and_clear()
     match_mh_1.add_many(range(0, 10))
-    match_ss_1 = SourmashSignature(match_mh_1, name='match1')
+    match_ss_1 = SourmashSignature(match_mh_1, name="match1")
 
     match_mh_2 = query_mh.copy_and_clear()
     match_mh_2.add_many(range(10, 15))
-    match_ss_2 = SourmashSignature(match_mh_2, name='match2')
+    match_ss_2 = SourmashSignature(match_mh_2, name="match2")
 
     match_mh_3 = query_mh.copy_and_clear()
     match_mh_3.add_many(range(15, 17))
-    match_ss_3 = SourmashSignature(match_mh_3, name='match3')
+    match_ss_3 = SourmashSignature(match_mh_3, name="match3")
 
     # load up the counter
     counter = counter_gather_constructor(query_ss)
@@ -742,9 +756,11 @@ def test_counter_gather_1(counter_gather_constructor):
 
     results = _consume_all(query_ss.minhash, counter)
 
-    expected = (['match1', 10],
-                ['match2', 5],
-                ['match3', 2],)
+    expected = (
+        ["match1", 10],
+        ["match2", 5],
+        ["match3", 2],
+    )
     assert len(results) == len(expected), results
 
     for (sr, size), (exp_name, exp_size) in zip(results, expected):
@@ -762,19 +778,19 @@ def test_counter_gather_1_b(counter_gather_constructor):
     # larger.
     query_mh = sourmash.MinHash(n=0, ksize=31, scaled=1)
     query_mh.add_many(range(0, 20))
-    query_ss = SourmashSignature(query_mh, name='query')
+    query_ss = SourmashSignature(query_mh, name="query")
 
     match_mh_1 = query_mh.copy_and_clear()
     match_mh_1.add_many(range(0, 10))
-    match_ss_1 = SourmashSignature(match_mh_1, name='match1')
+    match_ss_1 = SourmashSignature(match_mh_1, name="match1")
 
     match_mh_2 = query_mh.copy_and_clear()
     match_mh_2.add_many(range(7, 15))
-    match_ss_2 = SourmashSignature(match_mh_2, name='match2')
+    match_ss_2 = SourmashSignature(match_mh_2, name="match2")
 
     match_mh_3 = query_mh.copy_and_clear()
     match_mh_3.add_many(range(13, 17))
-    match_ss_3 = SourmashSignature(match_mh_3, name='match3')
+    match_ss_3 = SourmashSignature(match_mh_3, name="match3")
 
     # load up the counter
     counter = counter_gather_constructor(query_ss)
@@ -784,9 +800,11 @@ def test_counter_gather_1_b(counter_gather_constructor):
 
     results = _consume_all(query_ss.minhash, counter)
 
-    expected = (['match1', 10],
-                ['match2', 5],
-                ['match3', 2],)
+    expected = (
+        ["match1", 10],
+        ["match2", 5],
+        ["match3", 2],
+    )
     assert len(results) == len(expected), results
 
     for (sr, size), (exp_name, exp_size) in zip(results, expected):
@@ -806,19 +824,19 @@ def test_counter_gather_1_c_with_threshold(counter_gather_constructor):
 
     query_mh = sourmash.MinHash(n=0, ksize=31, scaled=1)
     query_mh.add_many(range(0, 20))
-    query_ss = SourmashSignature(query_mh, name='query')
+    query_ss = SourmashSignature(query_mh, name="query")
 
     match_mh_1 = query_mh.copy_and_clear()
     match_mh_1.add_many(range(0, 10))
-    match_ss_1 = SourmashSignature(match_mh_1, name='match1')
+    match_ss_1 = SourmashSignature(match_mh_1, name="match1")
 
     match_mh_2 = query_mh.copy_and_clear()
     match_mh_2.add_many(range(7, 15))
-    match_ss_2 = SourmashSignature(match_mh_2, name='match2')
+    match_ss_2 = SourmashSignature(match_mh_2, name="match2")
 
     match_mh_3 = query_mh.copy_and_clear()
     match_mh_3.add_many(range(13, 17))
-    match_ss_3 = SourmashSignature(match_mh_3, name='match3')
+    match_ss_3 = SourmashSignature(match_mh_3, name="match3")
 
     # load up the counter
     counter = counter_gather_constructor(query_ss)
@@ -826,11 +844,9 @@ def test_counter_gather_1_c_with_threshold(counter_gather_constructor):
     counter.add(match_ss_2)
     counter.add(match_ss_3)
 
-    results = _consume_all(query_ss.minhash, counter,
-                           threshold_bp=3)
+    results = _consume_all(query_ss.minhash, counter, threshold_bp=3)
 
-    expected = (['match1', 10],
-                ['match2', 5])
+    expected = (["match1", 10], ["match2", 5])
     assert len(results) == len(expected), results
 
     for (sr, size), (exp_name, exp_size) in zip(results, expected):
@@ -844,19 +860,19 @@ def test_counter_gather_1_d_diff_scaled(counter_gather_constructor):
     # test as above, but with different scaled.
     query_mh = sourmash.MinHash(n=0, ksize=31, scaled=1)
     query_mh.add_many(range(0, 20))
-    query_ss = SourmashSignature(query_mh, name='query')
+    query_ss = SourmashSignature(query_mh, name="query")
 
     match_mh_1 = query_mh.copy_and_clear().downsample(scaled=10)
     match_mh_1.add_many(range(0, 10))
-    match_ss_1 = SourmashSignature(match_mh_1, name='match1')
+    match_ss_1 = SourmashSignature(match_mh_1, name="match1")
 
     match_mh_2 = query_mh.copy_and_clear().downsample(scaled=20)
     match_mh_2.add_many(range(7, 15))
-    match_ss_2 = SourmashSignature(match_mh_2, name='match2')
+    match_ss_2 = SourmashSignature(match_mh_2, name="match2")
 
     match_mh_3 = query_mh.copy_and_clear().downsample(scaled=30)
     match_mh_3.add_many(range(13, 17))
-    match_ss_3 = SourmashSignature(match_mh_3, name='match3')
+    match_ss_3 = SourmashSignature(match_mh_3, name="match3")
 
     # load up the counter
     counter = counter_gather_constructor(query_ss)
@@ -866,9 +882,11 @@ def test_counter_gather_1_d_diff_scaled(counter_gather_constructor):
 
     results = _consume_all(query_ss.minhash, counter)
 
-    expected = (['match1', 10],
-                ['match2', 5],
-                ['match3', 2],)
+    expected = (
+        ["match1", 10],
+        ["match2", 5],
+        ["match3", 2],
+    )
     assert len(results) == len(expected), results
 
     for (sr, size), (exp_name, exp_size) in zip(results, expected):
@@ -885,18 +903,18 @@ def test_counter_gather_1_d_diff_scaled_query(counter_gather_constructor):
 
     match_mh_1 = query_mh.copy_and_clear().downsample(scaled=10)
     match_mh_1.add_many(range(0, 10))
-    match_ss_1 = SourmashSignature(match_mh_1, name='match1')
+    match_ss_1 = SourmashSignature(match_mh_1, name="match1")
 
     match_mh_2 = query_mh.copy_and_clear().downsample(scaled=20)
     match_mh_2.add_many(range(7, 15))
-    match_ss_2 = SourmashSignature(match_mh_2, name='match2')
+    match_ss_2 = SourmashSignature(match_mh_2, name="match2")
 
     match_mh_3 = query_mh.copy_and_clear().downsample(scaled=30)
     match_mh_3.add_many(range(13, 17))
-    match_ss_3 = SourmashSignature(match_mh_3, name='match3')
+    match_ss_3 = SourmashSignature(match_mh_3, name="match3")
 
     # downsample query now -
-    query_ss = SourmashSignature(query_mh.downsample(scaled=100), name='query')
+    query_ss = SourmashSignature(query_mh.downsample(scaled=100), name="query")
 
     # load up the counter
     counter = counter_gather_constructor(query_ss)
@@ -906,9 +924,11 @@ def test_counter_gather_1_d_diff_scaled_query(counter_gather_constructor):
 
     results = _consume_all(query_ss.minhash, counter)
 
-    expected = (['match1', 10],
-                ['match2', 5],
-                ['match3', 2],)
+    expected = (
+        ["match1", 10],
+        ["match2", 5],
+        ["match3", 2],
+    )
     assert len(results) == len(expected), results
 
     for (sr, size), (exp_name, exp_size) in zip(results, expected):
@@ -922,19 +942,19 @@ def test_counter_gather_1_e_abund_query(counter_gather_constructor):
     # test as above, but abund query
     query_mh = sourmash.MinHash(n=0, ksize=31, scaled=1, track_abundance=1)
     query_mh.add_many(range(0, 20))
-    query_ss = SourmashSignature(query_mh, name='query')
+    query_ss = SourmashSignature(query_mh, name="query")
 
     match_mh_1 = query_mh.copy_and_clear().flatten()
     match_mh_1.add_many(range(0, 10))
-    match_ss_1 = SourmashSignature(match_mh_1, name='match1')
+    match_ss_1 = SourmashSignature(match_mh_1, name="match1")
 
     match_mh_2 = query_mh.copy_and_clear().flatten()
     match_mh_2.add_many(range(7, 15))
-    match_ss_2 = SourmashSignature(match_mh_2, name='match2')
+    match_ss_2 = SourmashSignature(match_mh_2, name="match2")
 
     match_mh_3 = query_mh.copy_and_clear().flatten()
     match_mh_3.add_many(range(13, 17))
-    match_ss_3 = SourmashSignature(match_mh_3, name='match3')
+    match_ss_3 = SourmashSignature(match_mh_3, name="match3")
 
     # load up the counter
     counter = counter_gather_constructor(query_ss)
@@ -945,9 +965,11 @@ def test_counter_gather_1_e_abund_query(counter_gather_constructor):
     # must flatten before peek!
     results = _consume_all(query_ss.minhash.flatten(), counter)
 
-    expected = (['match1', 10],
-                ['match2', 5],
-                ['match3', 2],)
+    expected = (
+        ["match1", 10],
+        ["match2", 5],
+        ["match3", 2],
+    )
     assert len(results) == len(expected), results
 
     for (sr, size), (exp_name, exp_size) in zip(results, expected):
@@ -961,19 +983,19 @@ def test_counter_gather_1_f_abund_match(counter_gather_constructor):
     # test as above, but abund query
     query_mh = sourmash.MinHash(n=0, ksize=31, scaled=1, track_abundance=1)
     query_mh.add_many(range(0, 20))
-    query_ss = SourmashSignature(query_mh.flatten(), name='query')
+    query_ss = SourmashSignature(query_mh.flatten(), name="query")
 
     match_mh_1 = query_mh.copy_and_clear()
     match_mh_1.add_many(range(0, 10))
-    match_ss_1 = SourmashSignature(match_mh_1, name='match1')
+    match_ss_1 = SourmashSignature(match_mh_1, name="match1")
 
     match_mh_2 = query_mh.copy_and_clear()
     match_mh_2.add_many(range(7, 15))
-    match_ss_2 = SourmashSignature(match_mh_2, name='match2')
+    match_ss_2 = SourmashSignature(match_mh_2, name="match2")
 
     match_mh_3 = query_mh.copy_and_clear()
     match_mh_3.add_many(range(13, 17))
-    match_ss_3 = SourmashSignature(match_mh_3, name='match3')
+    match_ss_3 = SourmashSignature(match_mh_3, name="match3")
 
     # load up the counter
     counter = counter_gather_constructor(query_ss)
@@ -984,9 +1006,11 @@ def test_counter_gather_1_f_abund_match(counter_gather_constructor):
     # must flatten before peek!
     results = _consume_all(query_ss.minhash.flatten(), counter)
 
-    expected = (['match1', 10],
-                ['match2', 5],
-                ['match3', 2],)
+    expected = (
+        ["match1", 10],
+        ["match2", 5],
+        ["match3", 2],
+    )
     assert len(results) == len(expected), results
 
     for (sr, size), (exp_name, exp_size) in zip(results, expected):
@@ -999,13 +1023,14 @@ def test_counter_gather_1_f_abund_match(counter_gather_constructor):
 def test_counter_gather_2(counter_gather_constructor):
     # check basic set of gather results on semi-real data,
     # generated via CounterGather
-    testdata_combined = utils.get_test_data('gather/combined.sig')
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_combined = utils.get_test_data("gather/combined.sig")
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
     query_ss = sourmash.load_one_signature(testdata_combined, ksize=21)
-    subject_sigs = [ (sourmash.load_one_signature(t, ksize=21), t)
-                     for t in testdata_sigs ]
+    subject_sigs = [
+        (sourmash.load_one_signature(t, ksize=21), t) for t in testdata_sigs
+    ]
 
     # load up the counter
     counter = counter_gather_constructor(query_ss)
@@ -1014,18 +1039,20 @@ def test_counter_gather_2(counter_gather_constructor):
 
     results = _consume_all(query_ss.minhash, counter)
 
-    expected = (['NC_003198.1', 487],
-                ['NC_000853.1', 192],
-                ['NC_011978.1', 169],
-                ['NC_002163.1', 157],
-                ['NC_003197.2', 152],
-                ['NC_009486.1', 92],
-                ['NC_006905.1', 76],
-                ['NC_011080.1', 59],
-                ['NC_011274.1', 42],
-                ['NC_006511.1', 31],
-                ['NC_011294.1', 7],
-                ['NC_004631.1', 2])
+    expected = (
+        ["NC_003198.1", 487],
+        ["NC_000853.1", 192],
+        ["NC_011978.1", 169],
+        ["NC_002163.1", 157],
+        ["NC_003197.2", 152],
+        ["NC_009486.1", 92],
+        ["NC_006905.1", 76],
+        ["NC_011080.1", 59],
+        ["NC_011274.1", 42],
+        ["NC_006511.1", 31],
+        ["NC_011294.1", 7],
+        ["NC_004631.1", 2],
+    )
     assert len(results) == len(expected)
 
     for (sr, size), (exp_name, exp_size) in zip(results, expected):
@@ -1040,11 +1067,11 @@ def test_counter_gather_exact_match(counter_gather_constructor):
     # query == match
     query_mh = sourmash.MinHash(n=0, ksize=31, scaled=1)
     query_mh.add_many(range(0, 20))
-    query_ss = SourmashSignature(query_mh, name='query')
+    query_ss = SourmashSignature(query_mh, name="query")
 
     # load up the counter; provide a location override, too.
     counter = counter_gather_constructor(query_ss)
-    counter.add(query_ss, location='somewhere over the rainbow')
+    counter.add(query_ss, location="somewhere over the rainbow")
 
     results = _consume_all(query_ss.minhash, counter)
     assert len(results) == 1
@@ -1052,14 +1079,14 @@ def test_counter_gather_exact_match(counter_gather_constructor):
 
     assert sr.score == 1.0
     assert sr.signature == query_ss
-    assert sr.location == 'somewhere over the rainbow'
+    assert sr.location == "somewhere over the rainbow"
 
 
 def test_counter_gather_multiple_identical_matches(counter_gather_constructor):
     # test multiple identical matches being inserted, with only one return
     query_mh = sourmash.MinHash(n=0, ksize=31, scaled=1)
     query_mh.add_many(range(0, 20))
-    query_ss = SourmashSignature(query_mh, name='query')
+    query_ss = SourmashSignature(query_mh, name="query")
 
     # create counter...
     counter = counter_gather_constructor(query_ss)
@@ -1068,7 +1095,7 @@ def test_counter_gather_multiple_identical_matches(counter_gather_constructor):
     match_mh = query_mh.copy_and_clear()
     match_mh.add_many(range(5, 15))
 
-    for name in 'match1', 'match2', 'match3':
+    for name in "match1", "match2", "match3":
         match_ss = SourmashSignature(match_mh, name=name)
         counter.add(match_ss, location=name)
 
@@ -1080,18 +1107,18 @@ def test_counter_gather_multiple_identical_matches(counter_gather_constructor):
     assert overlap_count == 10
 
     # any one of the three is valid
-    assert sr.location in ('match1', 'match2', 'match3')
+    assert sr.location in ("match1", "match2", "match3")
 
 
 def test_counter_gather_add_after_peek(counter_gather_constructor):
     # cannot add after peek or consume
     query_mh = sourmash.MinHash(n=0, ksize=31, scaled=1)
     query_mh.add_many(range(0, 20))
-    query_ss = SourmashSignature(query_mh, name='query')
+    query_ss = SourmashSignature(query_mh, name="query")
 
     # load up the counter
     counter = counter_gather_constructor(query_ss)
-    counter.add(query_ss, location='somewhere over the rainbow')
+    counter.add(query_ss, location="somewhere over the rainbow")
 
     counter.peek(query_ss.minhash)
 
@@ -1103,11 +1130,11 @@ def test_counter_gather_add_after_consume(counter_gather_constructor):
     # cannot add after peek or consume
     query_mh = sourmash.MinHash(n=0, ksize=31, scaled=1)
     query_mh.add_many(range(0, 20))
-    query_ss = SourmashSignature(query_mh, name='query')
+    query_ss = SourmashSignature(query_mh, name="query")
 
     # load up the counter
     counter = counter_gather_constructor(query_ss)
-    counter.add(query_ss, location='somewhere over the rainbow')
+    counter.add(query_ss, location="somewhere over the rainbow")
 
     counter.consume(query_ss.minhash)
 
@@ -1119,11 +1146,11 @@ def test_counter_gather_consume_empty_intersect(counter_gather_constructor):
     # check that consume works fine when there is an empty signature.
     query_mh = sourmash.MinHash(n=0, ksize=31, scaled=1)
     query_mh.add_many(range(0, 20))
-    query_ss = SourmashSignature(query_mh, name='query')
+    query_ss = SourmashSignature(query_mh, name="query")
 
     # load up the counter
     counter = counter_gather_constructor(query_ss)
-    counter.add(query_ss, location='somewhere over the rainbow')
+    counter.add(query_ss, location="somewhere over the rainbow")
 
     # nothing really happens here :laugh:, just making sure there's no error
     counter.consume(query_ss.minhash.copy_and_clear())
@@ -1132,11 +1159,11 @@ def test_counter_gather_consume_empty_intersect(counter_gather_constructor):
 def test_counter_gather_empty_initial_query(counter_gather_constructor):
     # check empty initial query
     query_mh = sourmash.MinHash(n=0, ksize=31, scaled=1)
-    query_ss = SourmashSignature(query_mh, name='query')
+    query_ss = SourmashSignature(query_mh, name="query")
 
     match_mh_1 = query_mh.copy_and_clear()
     match_mh_1.add_many(range(0, 10))
-    match_ss_1 = SourmashSignature(match_mh_1, name='match1')
+    match_ss_1 = SourmashSignature(match_mh_1, name="match1")
 
     # load up the counter
     counter = counter_gather_constructor(query_ss)
@@ -1149,7 +1176,7 @@ def test_counter_gather_num_query(counter_gather_constructor):
     # check num query
     query_mh = sourmash.MinHash(n=500, ksize=31)
     query_mh.add_many(range(0, 10))
-    query_ss = SourmashSignature(query_mh, name='query')
+    query_ss = SourmashSignature(query_mh, name="query")
 
     with pytest.raises(ValueError):
         counter_gather_constructor(query_ss)
@@ -1159,11 +1186,11 @@ def test_counter_gather_empty_cur_query(counter_gather_constructor):
     # test empty cur query
     query_mh = sourmash.MinHash(n=0, ksize=31, scaled=1)
     query_mh.add_many(range(0, 20))
-    query_ss = SourmashSignature(query_mh, name='query')
+    query_ss = SourmashSignature(query_mh, name="query")
 
     # load up the counter
     counter = counter_gather_constructor(query_ss)
-    counter.add(query_ss, location='somewhere over the rainbow')
+    counter.add(query_ss, location="somewhere over the rainbow")
 
     cur_query_mh = query_ss.minhash.copy_and_clear()
     results = _consume_all(cur_query_mh, counter)
@@ -1174,27 +1201,27 @@ def test_counter_gather_add_num_matchy(counter_gather_constructor):
     # test add num query
     query_mh = sourmash.MinHash(n=0, ksize=31, scaled=1)
     query_mh.add_many(range(0, 20))
-    query_ss = SourmashSignature(query_mh, name='query')
+    query_ss = SourmashSignature(query_mh, name="query")
 
     match_mh = sourmash.MinHash(n=500, ksize=31)
     match_mh.add_many(range(0, 20))
-    match_ss = SourmashSignature(match_mh, name='query')
+    match_ss = SourmashSignature(match_mh, name="query")
 
     # load up the counter
     counter = counter_gather_constructor(query_ss)
     with pytest.raises(ValueError):
-        counter.add(match_ss, location='somewhere over the rainbow')
+        counter.add(match_ss, location="somewhere over the rainbow")
 
 
 def test_counter_gather_bad_cur_query(counter_gather_constructor):
     # test cur query that is not subset of original query
     query_mh = sourmash.MinHash(n=0, ksize=31, scaled=1)
     query_mh.add_many(range(0, 20))
-    query_ss = SourmashSignature(query_mh, name='query')
+    query_ss = SourmashSignature(query_mh, name="query")
 
     # load up the counter
     counter = counter_gather_constructor(query_ss)
-    counter.add(query_ss, location='somewhere over the rainbow')
+    counter.add(query_ss, location="somewhere over the rainbow")
 
     cur_query_mh = query_ss.minhash.copy_and_clear()
     cur_query_mh.add_many(range(20, 30))
@@ -1206,11 +1233,11 @@ def test_counter_gather_add_no_overlap(counter_gather_constructor):
     # check adding match with no overlap w/query
     query_mh = sourmash.MinHash(n=0, ksize=31, scaled=1)
     query_mh.add_many(range(0, 10))
-    query_ss = SourmashSignature(query_mh, name='query')
+    query_ss = SourmashSignature(query_mh, name="query")
 
     match_mh_1 = query_mh.copy_and_clear()
     match_mh_1.add_many(range(10, 20))
-    match_ss_1 = SourmashSignature(match_mh_1, name='match1')
+    match_ss_1 = SourmashSignature(match_mh_1, name="match1")
 
     # load up the counter
     counter = counter_gather_constructor(query_ss)
@@ -1224,18 +1251,18 @@ def test_counter_gather_big_threshold(counter_gather_constructor):
     # check 'peek' with a huge threshold
     query_mh = sourmash.MinHash(n=0, ksize=31, scaled=1)
     query_mh.add_many(range(0, 20))
-    query_ss = SourmashSignature(query_mh, name='query')
+    query_ss = SourmashSignature(query_mh, name="query")
 
     match_mh_1 = query_mh.copy_and_clear()
     match_mh_1.add_many(range(0, 10))
-    match_ss_1 = SourmashSignature(match_mh_1, name='match1')
+    match_ss_1 = SourmashSignature(match_mh_1, name="match1")
 
     # load up the counter
     counter = counter_gather_constructor(query_ss)
     counter.add(match_ss_1)
 
     # impossible threshold:
-    threshold_bp=30*query_ss.minhash.scaled
+    threshold_bp = 30 * query_ss.minhash.scaled
     results = counter.peek(query_ss.minhash, threshold_bp=threshold_bp)
     assert results == []
 
@@ -1243,7 +1270,7 @@ def test_counter_gather_big_threshold(counter_gather_constructor):
 def test_counter_gather_empty_counter(counter_gather_constructor):
     # check empty counter
     query_mh = sourmash.MinHash(n=0, ksize=31, scaled=1)
-    query_ss = SourmashSignature(query_mh, name='query')
+    query_ss = SourmashSignature(query_mh, name="query")
 
     # empty counter!
     counter = counter_gather_constructor(query_ss)
diff --git a/tests/test_jaccard.py b/tests/test_jaccard.py
index ce0846a3ae..87093ee194 100644
--- a/tests/test_jaccard.py
+++ b/tests/test_jaccard.py
@@ -72,10 +72,10 @@ def test_dna_mh(track_abundance):
     e1 = MinHash(n=5, ksize=4, track_abundance=track_abundance)
     e2 = MinHash(n=5, ksize=4, track_abundance=track_abundance)
 
-    seq = 'ATGGCAGTGACGATGCCAG'
+    seq = "ATGGCAGTGACGATGCCAG"
     e1.add_sequence(seq)
     for i in range(len(seq) - 3):
-        e2.add_kmer(seq[i:i + 4])
+        e2.add_kmer(seq[i : i + 4])
 
     assert e1.hashes.keys() == e2.hashes.keys()
     print(e1.hashes.keys())
@@ -84,19 +84,17 @@ def test_dna_mh(track_abundance):
 
 
 def test_protein_mh(track_abundance):
-    e1 = MinHash(n=5, ksize=2, is_protein=True,
-                    track_abundance=track_abundance)
-    e2 = MinHash(n=5, ksize=2, is_protein=True,
-                    track_abundance=track_abundance)
+    e1 = MinHash(n=5, ksize=2, is_protein=True, track_abundance=track_abundance)
+    e2 = MinHash(n=5, ksize=2, is_protein=True, track_abundance=track_abundance)
 
     # ok, so this is confusing, but: we are adding _DNA_ kmers here,
     # and translating. so, add_sequence and add_kmer actually both add
     # 6-mers.
-    seq = 'ATGGCAGTGACGATGCCG'
+    seq = "ATGGCAGTGACGATGCCG"
     e1.add_sequence(seq)
 
     for i in range(len(seq) - 5):
-        kmer = seq[i:i + 6]
+        kmer = seq[i : i + 6]
         e2.add_kmer(kmer)
 
     assert e1.hashes.keys() == e2.hashes.keys()
@@ -107,10 +105,9 @@ def test_pickle(track_abundance):
     import pickle
     from io import BytesIO
 
-    e1 = MinHash(n=5, ksize=6, is_protein=False,
-                 track_abundance=track_abundance)
+    e1 = MinHash(n=5, ksize=6, is_protein=False, track_abundance=track_abundance)
 
-    seq = 'ATGGCAGTGACGATGCCG'
+    seq = "ATGGCAGTGACGATGCCG"
     e1.add_sequence(seq)
     e1.add_sequence(seq)
 
@@ -131,8 +128,7 @@ def test_pickle(track_abundance):
 
 def test_bad_construct_1(track_abundance):
     try:
-        e1 = MinHash(ksize=6, is_protein=False,
-                        track_abundance=track_abundance)
+        MinHash(ksize=6, is_protein=False, track_abundance=track_abundance)
         assert 0, "require n in constructor"
     except TypeError:
         pass
@@ -140,8 +136,7 @@ def test_bad_construct_1(track_abundance):
 
 def test_bad_construct_2(track_abundance):
     try:
-        e1 = MinHash(n=100, is_protein=False,
-                        track_abundance=track_abundance)
+        MinHash(n=100, is_protein=False, track_abundance=track_abundance)
         assert 0, "require ksize in constructor"
     except TypeError:
         pass
@@ -175,15 +170,16 @@ def test_abund_similarity_zero():
 
 ####
 
+
 def test_jaccard_on_real_data():
     from sourmash.signature import load_signatures
 
-    afile = 'n10000/GCF_000005845.2_ASM584v2_genomic.fna.gz.sig.gz'
+    afile = "n10000/GCF_000005845.2_ASM584v2_genomic.fna.gz.sig.gz"
     a = utils.get_test_data(afile)
     sig1 = list(load_signatures(a))[0]
     mh1 = sig1.minhash
 
-    bfile = 'n10000/GCF_000006945.1_ASM694v1_genomic.fna.gz.sig.gz'
+    bfile = "n10000/GCF_000006945.1_ASM694v1_genomic.fna.gz.sig.gz"
     b = utils.get_test_data(bfile)
     sig2 = list(load_signatures(b))[0]
     mh2 = sig2.minhash
@@ -210,12 +206,12 @@ def test_jaccard_on_real_data():
 def test_scaled_on_real_data():
     from sourmash.signature import load_signatures
 
-    afile = 'scaled100/GCF_000005845.2_ASM584v2_genomic.fna.gz.sig.gz'
+    afile = "scaled100/GCF_000005845.2_ASM584v2_genomic.fna.gz.sig.gz"
     a = utils.get_test_data(afile)
     sig1 = list(load_signatures(a))[0]
     mh1 = sig1.minhash
 
-    bfile = 'scaled100/GCF_000006945.1_ASM694v1_genomic.fna.gz.sig.gz'
+    bfile = "scaled100/GCF_000006945.1_ASM694v1_genomic.fna.gz.sig.gz"
     b = utils.get_test_data(bfile)
     sig2 = list(load_signatures(b))[0]
     mh2 = sig2.minhash
@@ -243,12 +239,12 @@ def test_scaled_on_real_data():
 def test_scaled_on_real_data_2():
     from sourmash.signature import load_signatures
 
-    afile = 'scaled100/GCF_000005845.2_ASM584v2_genomic.fna.gz.sig.gz'
+    afile = "scaled100/GCF_000005845.2_ASM584v2_genomic.fna.gz.sig.gz"
     a = utils.get_test_data(afile)
     sig1 = list(load_signatures(a))[0]
     mh1 = sig1.minhash
 
-    bfile = 'scaled100/GCF_000006945.1_ASM694v1_genomic.fna.gz.sig.gz'
+    bfile = "scaled100/GCF_000006945.1_ASM694v1_genomic.fna.gz.sig.gz"
     b = utils.get_test_data(bfile)
     sig2 = list(load_signatures(b))[0]
     mh2 = sig2.minhash
@@ -276,12 +272,12 @@ def test_scaled_on_real_data_2():
 def test_downsample_scaled_with_num():
     from sourmash.signature import load_signatures
 
-    afile = 'scaled100/GCF_000005845.2_ASM584v2_genomic.fna.gz.sig.gz'
+    afile = "scaled100/GCF_000005845.2_ASM584v2_genomic.fna.gz.sig.gz"
     a = utils.get_test_data(afile)
     sig1 = list(load_signatures(a))[0]
     mh1 = sig1.minhash
 
     with pytest.raises(ValueError) as exc:
-        mh = mh1.downsample(num=500)
+        mh1.downsample(num=500)
 
-    assert 'cannot downsample a scaled MinHash using num' in str(exc.value)
+    assert "cannot downsample a scaled MinHash using num" in str(exc.value)
diff --git a/tests/test_lca.py b/tests/test_lca.py
index 46b1d9716d..7db105628e 100644
--- a/tests/test_lca.py
+++ b/tests/test_lca.py
@@ -21,8 +21,7 @@
 
 def test_api_create_search():
     # create a database and then search for result.
-    ss = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
-                                     ksize=31)
+    ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
 
     lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000)
     assert len(lca_db) == 0
@@ -44,18 +43,16 @@ def test_api_create_search():
 def test_api_find_picklist_select():
     # does 'find' respect picklists?
 
-    sig47 = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
-                                        ksize=31)
-    sig63 = sourmash.load_one_signature(utils.get_test_data('63.fa.sig'),
-                                        ksize=31)
+    sig47 = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
+    sig63 = sourmash.load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31)
 
     lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000)
     lca_db.insert(sig47)
     lca_db.insert(sig63)
 
     # construct a picklist...
-    picklist = SignaturePicklist('md5prefix8')
-    picklist.init(['09a08691'])
+    picklist = SignaturePicklist("md5prefix8")
+    picklist.init(["09a08691"])
 
     # run a 'find' with sig63, should find 47 and 63 both.
     search_obj = make_jaccard_search_query(do_containment=True, threshold=0.0)
@@ -72,24 +69,22 @@ def test_api_find_picklist_select():
     # and check that it is the expected one!
     ss = results[0].signature
     assert ss.minhash.ksize == 31
-    assert ss.md5sum().startswith('09a08691c')
+    assert ss.md5sum().startswith("09a08691c")
 
 
 def test_api_find_picklist_select_exclude():
     # does 'find' respect picklists?
 
-    sig47 = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
-                                        ksize=31)
-    sig63 = sourmash.load_one_signature(utils.get_test_data('63.fa.sig'),
-                                        ksize=31)
+    sig47 = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
+    sig63 = sourmash.load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31)
 
     lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000)
     lca_db.insert(sig47)
     lca_db.insert(sig63)
 
     # construct a picklist...
-    picklist = SignaturePicklist('md5prefix8', pickstyle= PickStyle.EXCLUDE)
-    picklist.init(['09a08691'])
+    picklist = SignaturePicklist("md5prefix8", pickstyle=PickStyle.EXCLUDE)
+    picklist.init(["09a08691"])
 
     # run a 'find' with sig63, should find 47 and 63 both.
     search_obj = make_jaccard_search_query(do_containment=True, threshold=0.0)
@@ -106,13 +101,12 @@ def test_api_find_picklist_select_exclude():
     # and check that it is the expected one!
     ss = results[0].signature
     assert ss.minhash.ksize == 31
-    assert ss.md5sum().startswith('38729c637')
+    assert ss.md5sum().startswith("38729c637")
 
 
 def test_api_create_insert():
     # test some internal implementation stuff: create & then insert a sig.
-    ss = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
-                                     ksize=31)
+    ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
 
     lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000)
     lca_db.insert(ss)
@@ -131,16 +125,15 @@ def test_api_create_insert():
     for vv in lca_db._hashval_to_idx.values():
         set_of_values.update(vv)
     assert len(set_of_values) == 1
-    assert set_of_values == { 0 }
+    assert set_of_values == {0}
 
-    assert not lca_db._idx_to_lid          # no lineage added
-    assert not lca_db._lid_to_lineage      # no lineage added
+    assert not lca_db._idx_to_lid  # no lineage added
+    assert not lca_db._lid_to_lineage  # no lineage added
 
 
 def test_api_create_insert_bad_ksize():
     # can we insert a ksize=21 signature into a ksize=31 DB? hopefully not.
-    ss = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
-                                     ksize=31)
+    ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
 
     lca_db = sourmash.lca.LCA_Database(ksize=21, scaled=1000)
     with pytest.raises(ValueError):
@@ -149,17 +142,15 @@ def test_api_create_insert_bad_ksize():
 
 def test_api_create_insert_bad_ident():
     # can we insert a signature with no/empty ident?
-    ss1 = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
-                                      ksize=31)
-    ss2 = sourmash.load_one_signature(utils.get_test_data('63.fa.sig'),
-                                      ksize=31)
+    ss1 = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
+    ss2 = sourmash.load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31)
     ss1 = ss1.to_mutable()
     ss2 = ss2.to_mutable()
 
-    ss1.name = ''
-    ss1.filename = ''
-    ss2.name = ''
-    ss2.filename = ''
+    ss1.name = ""
+    ss1.filename = ""
+    ss2.name = ""
+    ss2.filename = ""
 
     lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000)
     lca_db.insert(ss1)
@@ -171,8 +162,7 @@ def test_api_create_insert_bad_ident():
 def test_api_create_insert_bad_scaled():
     # can we insert a scaled=1000 signature into a scaled=500 DB?
     # hopefully not.
-    ss = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
-                                     ksize=31)
+    ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
     assert ss.minhash.scaled == 1000
 
     lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=500)
@@ -183,11 +173,10 @@ def test_api_create_insert_bad_scaled():
 def test_api_create_insert_bad_moltype():
     # can we insert a DNAsignature into a protein DB?
     # hopefully not.
-    ss = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
-                                     ksize=31)
-    assert ss.minhash.moltype == 'DNA'
+    ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
+    assert ss.minhash.moltype == "DNA"
 
-    lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=500, moltype='protein')
+    lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=500, moltype="protein")
     with pytest.raises(ValueError):
         lca_db.insert(ss)
 
@@ -195,13 +184,12 @@ def test_api_create_insert_bad_moltype():
 def test_api_create_insert_ident():
     # test some internal implementation stuff: signature inserted with
     # different ident than name.
-    ss = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
-                                     ksize=31)
+    ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
 
     lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000)
-    lca_db.insert(ss, ident='foo')
+    lca_db.insert(ss, ident="foo")
 
-    ident = 'foo'
+    ident = "foo"
     assert len(lca_db._ident_to_name) == 1
     assert ident in lca_db._ident_to_name
     assert lca_db._ident_to_name[ident] == ss.name
@@ -215,27 +203,25 @@ def test_api_create_insert_ident():
     for vv in lca_db._hashval_to_idx.values():
         set_of_values.update(vv)
     assert len(set_of_values) == 1
-    assert set_of_values == { 0 }
+    assert set_of_values == {0}
 
-    assert not lca_db._idx_to_lid          # no lineage added
-    assert not lca_db._lid_to_lineage      # no lineage added
+    assert not lca_db._idx_to_lid  # no lineage added
+    assert not lca_db._lid_to_lineage  # no lineage added
     assert not lca_db._lineage_to_lid
     assert not lca_db._lid_to_idx
 
 
 def test_api_create_insert_two():
     # check internal details if multiple signatures are inserted.
-    ss = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
-                                     ksize=31)
-    ss2 = sourmash.load_one_signature(utils.get_test_data('63.fa.sig'),
-                                      ksize=31)
+    ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
+    ss2 = sourmash.load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31)
 
     lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000)
-    lca_db.insert(ss, ident='foo')
-    lca_db.insert(ss2, ident='bar')
+    lca_db.insert(ss, ident="foo")
+    lca_db.insert(ss2, ident="bar")
 
-    ident = 'foo'
-    ident2 = 'bar'
+    ident = "foo"
+    ident2 = "bar"
     assert len(lca_db._ident_to_name) == 2
     assert ident in lca_db._ident_to_name
     assert ident2 in lca_db._ident_to_name
@@ -258,22 +244,20 @@ def test_api_create_insert_two():
     for vv in lca_db._hashval_to_idx.values():
         set_of_values.update(vv)
     assert len(set_of_values) == 2
-    assert set_of_values == { 0, 1 }
+    assert set_of_values == {0, 1}
 
-    assert not lca_db._idx_to_lid          # no lineage added
-    assert not lca_db._lid_to_lineage      # no lineage added
+    assert not lca_db._idx_to_lid  # no lineage added
+    assert not lca_db._lid_to_lineage  # no lineage added
     assert not lca_db._lineage_to_lid
     assert not lca_db._lid_to_idx
 
 
 def test_api_create_insert_w_lineage():
     # test some internal implementation stuff - insert signature w/lineage
-    ss = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
-                                     ksize=31)
+    ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
 
     lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000)
-    lineage = ((LineagePair('rank1', 'name1'),
-                LineagePair('rank2', 'name2')))
+    lineage = (LineagePair("rank1", "name1"), LineagePair("rank2", "name2"))
 
     lca_db.insert(ss, lineage=lineage)
 
@@ -293,14 +277,14 @@ def test_api_create_insert_w_lineage():
     for vv in lca_db._hashval_to_idx.values():
         set_of_values.update(vv)
     assert len(set_of_values) == 1
-    assert set_of_values == { 0 }
+    assert set_of_values == {0}
 
     # check lineage stuff
     assert len(lca_db._idx_to_lid) == 1
     assert lca_db._idx_to_lid[0] == 0
     assert len(lca_db._lid_to_lineage) == 1
     assert lca_db._lid_to_lineage[0] == lineage
-    assert lca_db._lid_to_idx[0] == { 0 }
+    assert lca_db._lid_to_idx[0] == {0}
 
     assert len(lca_db._lineage_to_lid) == 1
     assert lca_db._lineage_to_lid[lineage] == 0
@@ -308,12 +292,10 @@ def test_api_create_insert_w_lineage():
 
 def test_api_create_insert_w_bad_lineage():
     # test some internal implementation stuff - insert signature w/bad lineage
-    ss = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
-                                     ksize=31)
+    ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
 
     lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000)
-    lineage = ([LineagePair('rank1', 'name1'),
-                LineagePair('rank2', 'name2')],)
+    lineage = ([LineagePair("rank1", "name1"), LineagePair("rank2", "name2")],)
 
     with pytest.raises(ValueError):
         lca_db.insert(ss, lineage=lineage)
@@ -321,11 +303,10 @@ def test_api_create_insert_w_bad_lineage():
 
 def test_api_create_insert_w_bad_lineage_2():
     # test some internal implementation stuff - insert signature w/bad lineage
-    ss = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
-                                     ksize=31)
+    ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
 
     lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000)
-    lineage = 1 # something non-iterable...
+    lineage = 1  # something non-iterable...
 
     with pytest.raises(ValueError):
         lca_db.insert(ss, lineage=lineage)
@@ -333,8 +314,7 @@ def test_api_create_insert_w_bad_lineage_2():
 
 def test_api_create_gather():
     # create a database, and then run gather on it.
-    ss = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
-                                     ksize=31)
+    ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
 
     lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000)
     lca_db.insert(ss)
@@ -348,10 +328,8 @@ def test_api_create_gather():
 
 def test_api_add_genome_lineage():
     # LCA_Databases can store/retrieve arbitrary lineages/taxonomies.
-    ss = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
-                                     ksize=31)
-    lineage = ((LineagePair('rank1', 'name1'),
-               (LineagePair('rank2', 'name2'))))
+    ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
+    lineage = (LineagePair("rank1", "name1"), (LineagePair("rank2", "name2")))
 
     lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000)
     lca_db.insert(ss, lineage=lineage)
@@ -366,26 +344,24 @@ def test_api_add_genome_lineage():
 def test_api_insert_update():
     # check that cached parts of LCA_Database are updated when a new
     # signature is inserted.
-    ss = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
-                                     ksize=31)
-    ss2 = sourmash.load_one_signature(utils.get_test_data('63.fa.sig'),
-                                      ksize=31)
+    ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
+    ss2 = sourmash.load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31)
 
     lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000)
     lca_db.insert(ss)
 
-    all_mh = [ x.minhash for x in lca_db.signatures() ]
+    all_mh = [x.minhash for x in lca_db.signatures()]
     assert ss.minhash in all_mh
 
     # see decorator @cached_property
-    assert hasattr(lca_db, '_cache')
+    assert hasattr(lca_db, "_cache")
     assert lca_db._cache
     # inserting a signature should delete the cache
     lca_db.insert(ss2)
-    assert not hasattr(lca_db, '_cache')
+    assert not hasattr(lca_db, "_cache")
 
     # check that it's rebuilt etc. etc.
-    all_mh = [ x.minhash for x in lca_db.signatures() ]
+    all_mh = [x.minhash for x in lca_db.signatures()]
     assert ss.minhash in all_mh
     assert ss2.minhash in all_mh
 
@@ -393,8 +369,7 @@ def test_api_insert_update():
 def test_api_insert_retrieve_check_name():
     # check that signatures retrieved from LCA_Database objects have the
     # right name.
-    ss = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
-                                     ksize=31)
+    ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
 
     lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000)
     lca_db.insert(ss)
@@ -408,10 +383,8 @@ def test_api_insert_retrieve_check_name():
 
 def test_api_create_insert_two_then_scale():
     # construct database, THEN downsample
-    ss = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
-                                     ksize=31)
-    ss2 = sourmash.load_one_signature(utils.get_test_data('63.fa.sig'),
-                                      ksize=31)
+    ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
+    ss2 = sourmash.load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31)
 
     lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000)
     lca_db.insert(ss)
@@ -431,10 +404,8 @@ def test_api_create_insert_two_then_scale():
 
 def test_api_create_insert_two_then_scale_then_add():
     # construct database, THEN downsample, then add another
-    ss = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
-                                     ksize=31)
-    ss2 = sourmash.load_one_signature(utils.get_test_data('63.fa.sig'),
-                                      ksize=31)
+    ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
+    ss2 = sourmash.load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31)
 
     lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000)
     lca_db.insert(ss)
@@ -460,10 +431,8 @@ def test_api_create_insert_two_then_scale_then_add():
 
 def test_api_create_insert_scale_two():
     # downsample while constructing database
-    ss = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
-                                     ksize=31)
-    ss2 = sourmash.load_one_signature(utils.get_test_data('63.fa.sig'),
-                                      ksize=31)
+    ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
+    ss2 = sourmash.load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31)
 
     # downsample to 5000 while inserting:
     lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=5000)
@@ -483,7 +452,7 @@ def test_api_create_insert_scale_two():
 
 
 def test_load_single_db():
-    filename = utils.get_test_data('lca/delmont-1.lca.json')
+    filename = utils.get_test_data("lca/delmont-1.lca.json")
     db, ksize, scaled = lca_utils.load_single_database(filename)
 
     print(db)
@@ -494,9 +463,9 @@ def test_load_single_db():
 
 def test_load_single_db_empty(runtmp):
     # test load_single_database on an empty file; should raise ValueError
-    empty = runtmp.output('empty.lca.json')
+    empty = runtmp.output("empty.lca.json")
 
-    with open(empty, "wt") as fp:
+    with open(empty, "w"):
         pass
 
     with pytest.raises(ValueError) as exc:
@@ -506,8 +475,8 @@ def test_load_single_db_empty(runtmp):
 
 
 def test_databases():
-    filename1 = utils.get_test_data('lca/delmont-1.lca.json')
-    filename2 = utils.get_test_data('lca/delmont-2.lca.json')
+    filename1 = utils.get_test_data("lca/delmont-1.lca.json")
+    filename2 = utils.get_test_data("lca/delmont-2.lca.json")
     dblist, ksize, scaled = lca_utils.load_databases([filename1, filename2])
 
     print(dblist)
@@ -518,7 +487,7 @@ def test_databases():
 
 
 def test_databases_load_fail_on_no_JSON():
-    filename1 = utils.get_test_data('prot/protein.zip')
+    filename1 = utils.get_test_data("prot/protein.zip")
     with pytest.raises(ValueError) as exc:
         dblist, ksize, scaled = lca_utils.load_databases([filename1])
 
@@ -528,36 +497,37 @@ def test_databases_load_fail_on_no_JSON():
 
 
 def test_databases_load_fail_on_dir():
-    filename1 = utils.get_test_data('lca')
+    filename1 = utils.get_test_data("lca")
     with pytest.raises(ValueError) as exc:
         dblist, ksize, scaled = lca_utils.load_databases([filename1])
 
     err = str(exc.value)
     print(err)
     assert f"'{filename1}' is not a file and cannot be loaded as an LCA database" in err
-    assert not 'found 0 matches total;' in err
+    assert "found 0 matches total;" not in err
 
 
 def test_databases_load_fail_on_not_exist():
-    filename1 = utils.get_test_data('does-not-exist')
+    filename1 = utils.get_test_data("does-not-exist")
     with pytest.raises(ValueError) as exc:
         dblist, ksize, scaled = lca_utils.load_databases([filename1])
 
     err = str(exc.value)
     print(err)
     assert f"'{filename1}' is not a file and cannot be loaded as an LCA database" in err
-    assert not 'found 0 matches total;' in err
+    assert "found 0 matches total;" not in err
+
 
 def test_db_repr():
-    filename = utils.get_test_data('lca/delmont-1.lca.json')
+    filename = utils.get_test_data("lca/delmont-1.lca.json")
     db, ksize, scaled = lca_utils.load_single_database(filename)
 
-    assert repr(db) == "LCA_Database('{}')".format(filename)
+    assert repr(db) == f"LCA_Database('{filename}')"
 
 
 def test_lca_index_signatures_method():
     # test 'signatures' method from base class Index
-    filename = utils.get_test_data('lca/47+63.lca.json')
+    filename = utils.get_test_data("lca/47+63.lca.json")
     db, ksize, scaled = lca_utils.load_single_database(filename)
 
     siglist = list(db.signatures())
@@ -567,13 +537,13 @@ def test_lca_index_signatures_method():
 def test_lca_index_select():
     # test 'select' method from Index base class.
 
-    filename = utils.get_test_data('lca/47+63.lca.json')
+    filename = utils.get_test_data("lca/47+63.lca.json")
     db, ksize, scaled = lca_utils.load_single_database(filename)
 
     xx = db.select(ksize=31)
     assert xx == db
 
-    xx = db.select(moltype='DNA')
+    xx = db.select(moltype="DNA")
     assert xx == db
 
     xx = db.select(abund=False)
@@ -583,7 +553,7 @@ def test_lca_index_select():
         db.select(ksize=21)
 
     with pytest.raises(ValueError):
-        db.select(moltype='protein')
+        db.select(moltype="protein")
 
     with pytest.raises(ValueError):
         db.select(abund=True)
@@ -592,12 +562,12 @@ def test_lca_index_select():
 def test_lca_index_select_picklist():
     # test 'select' method from Index base class with a picklist.
 
-    filename = utils.get_test_data('lca/47+63.lca.json')
+    filename = utils.get_test_data("lca/47+63.lca.json")
     db, ksize, scaled = lca_utils.load_single_database(filename)
 
     # construct a picklist...
-    picklist = SignaturePicklist('md5prefix8')
-    picklist.init(['50a92740'])
+    picklist = SignaturePicklist("md5prefix8")
+    picklist.init(["50a92740"])
 
     xx = db.select(picklist=picklist)
     assert xx == db
@@ -605,7 +575,7 @@ def test_lca_index_select_picklist():
     siglist = list(db.signatures())
     assert len(siglist) == 1
     ss = siglist[0]
-    assert ss.md5sum().startswith('50a92740')
+    assert ss.md5sum().startswith("50a92740")
     assert ss.minhash.ksize == 31
 
 
@@ -613,14 +583,14 @@ def test_lca_index_find_picklist_check_overlap():
     # make sure 'find' works for picklists that exclude relevant signatures
     # (bug #1638)
 
-    query_fn = utils.get_test_data('47.fa.sig')
+    query_fn = utils.get_test_data("47.fa.sig")
     query_sig = sourmash.load_one_signature(query_fn, ksize=31)
-    db_fn = utils.get_test_data('lca/47+63.lca.json')
+    db_fn = utils.get_test_data("lca/47+63.lca.json")
     db, ksize, scaled = lca_utils.load_single_database(db_fn)
 
     # construct a picklist...
-    picklist = SignaturePicklist('ident')
-    picklist.init(['NC_009665.1'])
+    picklist = SignaturePicklist("ident")
+    picklist.init(["NC_009665.1"])
 
     xx = db.select(picklist=picklist)
     assert xx == db
@@ -632,12 +602,12 @@ def test_lca_index_find_picklist_check_overlap():
 def test_lca_index_select_picklist_exclude():
     # test 'select' method from Index base class with a picklist.
 
-    filename = utils.get_test_data('lca/47+63.lca.json')
+    filename = utils.get_test_data("lca/47+63.lca.json")
     db, ksize, scaled = lca_utils.load_single_database(filename)
 
     # construct a picklist...
-    picklist = SignaturePicklist('md5prefix8', pickstyle=PickStyle.EXCLUDE)
-    picklist.init(['50a92740'])
+    picklist = SignaturePicklist("md5prefix8", pickstyle=PickStyle.EXCLUDE)
+    picklist.init(["50a92740"])
 
     xx = db.select(picklist=picklist)
     assert xx == db
@@ -645,19 +615,19 @@ def test_lca_index_select_picklist_exclude():
     siglist = list(db.signatures())
     assert len(siglist) == 1
     ss = siglist[0]
-    assert ss.md5sum().startswith('e88dc390')
+    assert ss.md5sum().startswith("e88dc390")
     assert ss.minhash.ksize == 31
 
 
 def test_lca_index_select_picklist_twice():
     # test 'select' method from Index base class with a picklist.
 
-    filename = utils.get_test_data('lca/47+63.lca.json')
+    filename = utils.get_test_data("lca/47+63.lca.json")
     db, ksize, scaled = lca_utils.load_single_database(filename)
 
     # construct a picklist...
-    picklist = SignaturePicklist('md5prefix8')
-    picklist.init(['50a92740'])
+    picklist = SignaturePicklist("md5prefix8")
+    picklist.init(["50a92740"])
 
     xx = db.select(picklist=picklist)
     assert xx == db
@@ -668,13 +638,12 @@ def test_lca_index_select_picklist_twice():
     assert "we do not (yet) support multiple picklists for LCA databases" in str(exc)
 
 
-
 def test_search_db_scaled_gt_sig_scaled():
-    dbfile = utils.get_test_data('lca/47+63.lca.json')
+    dbfile = utils.get_test_data("lca/47+63.lca.json")
     db, ksize, scaled = lca_utils.load_single_database(dbfile)
-    sig = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'))
+    sig = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"))
 
-    results = db.search(sig, threshold=.01, ignore_abundance=True)
+    results = db.search(sig, threshold=0.01, ignore_abundance=True)
     match_sig = results[0][1]
 
     minhash = sig.minhash.downsample(scaled=10000)
@@ -682,28 +651,28 @@ def test_search_db_scaled_gt_sig_scaled():
 
 
 def test_search_db_scaled_lt_sig_scaled():
-    dbfile = utils.get_test_data('lca/47+63.lca.json')
+    dbfile = utils.get_test_data("lca/47+63.lca.json")
     db, ksize, scaled = lca_utils.load_single_database(dbfile)
-    sig = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'))
+    sig = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"))
 
     sig = sig.to_mutable()
     sig.minhash = sig.minhash.downsample(scaled=100000)
 
-    results = db.search(sig, threshold=.01, ignore_abundance=True)
+    results = db.search(sig, threshold=0.01, ignore_abundance=True)
     print(results)
     assert results[0].score == 1.0
     match = results[0].signature
 
-    orig_sig = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'))
+    orig_sig = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"))
     assert orig_sig.minhash.jaccard(match.minhash, downsample=True) == 1.0
 
 
 def test_gather_db_scaled_gt_sig_scaled():
-    dbfile = utils.get_test_data('lca/47+63.lca.json')
+    dbfile = utils.get_test_data("lca/47+63.lca.json")
     db, ksize, scaled = lca_utils.load_single_database(dbfile)
-    sig = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'))
+    sig = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"))
 
-    result = db.best_containment(sig, threshold=.01, ignore_abundance=True)
+    result = db.best_containment(sig, threshold=0.01, ignore_abundance=True)
     match_sig = result[1]
 
     minhash = sig.minhash.downsample(scaled=10000)
@@ -711,12 +680,12 @@ def test_gather_db_scaled_gt_sig_scaled():
 
 
 def test_gather_db_scaled_lt_sig_scaled():
-    dbfile = utils.get_test_data('lca/47+63.lca.json')
+    dbfile = utils.get_test_data("lca/47+63.lca.json")
     db, ksize, scaled = lca_utils.load_single_database(dbfile)
-    sig = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'))
+    sig = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"))
     sig_minhash = sig.minhash.downsample(scaled=100000)
 
-    result = db.best_containment(sig, threshold=.01, ignore_abundance=True)
+    result = db.best_containment(sig, threshold=0.01, ignore_abundance=True)
     match_sig = result[1]
 
     minhash = match_sig.minhash.downsample(scaled=100000)
@@ -724,7 +693,7 @@ def test_gather_db_scaled_lt_sig_scaled():
 
 
 def test_db_lineage_to_lid():
-    dbfile = utils.get_test_data('lca/47+63.lca.json')
+    dbfile = utils.get_test_data("lca/47+63.lca.json")
     db, ksize, scaled = lca_utils.load_single_database(dbfile)
 
     d = db._lineage_to_lid
@@ -735,15 +704,15 @@ def test_db_lineage_to_lid():
     print(items)
 
     lin1 = items[0][0][-1]
-    assert lin1.rank == 'strain'
-    assert lin1.name == 'Shewanella baltica OS185'
+    assert lin1.rank == "strain"
+    assert lin1.name == "Shewanella baltica OS185"
     lin1 = items[1][0][-1]
-    assert lin1.rank == 'strain'
-    assert lin1.name == 'Shewanella baltica OS223'
+    assert lin1.rank == "strain"
+    assert lin1.name == "Shewanella baltica OS223"
 
 
 def test_db_lid_to_idx():
-    dbfile = utils.get_test_data('lca/47+63.lca.json')
+    dbfile = utils.get_test_data("lca/47+63.lca.json")
     db, ksize, scaled = lca_utils.load_single_database(dbfile)
 
     d = db._lid_to_idx
@@ -756,7 +725,7 @@ def test_db_lid_to_idx():
 
 
 def test_db_idx_to_ident():
-    dbfile = utils.get_test_data('lca/47+63.lca.json')
+    dbfile = utils.get_test_data("lca/47+63.lca.json")
     db, ksize, scaled = lca_utils.load_single_database(dbfile)
 
     d = db._idx_to_ident
@@ -765,23 +734,23 @@ def test_db_idx_to_ident():
     assert len(items) == 2
 
     print(items)
-    assert items == [(32, 'NC_009665'), (48, 'NC_011663')]
+    assert items == [(32, "NC_009665"), (48, "NC_011663")]
 
 
 ## command line tests
 
 
 def test_run_sourmash_lca():
-    status, out, err = utils.runscript('sourmash', ['lca'], fail_ok=True)
-    assert status != 0                    # no args provided, ok ;)
+    status, out, err = utils.runscript("sourmash", ["lca"], fail_ok=True)
+    assert status != 0  # no args provided, ok ;)
 
 
 def test_basic_index(runtmp, lca_db_format):
-    taxcsv = utils.get_test_data('lca/delmont-1.csv')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    lca_db = runtmp.output(f'delmont-1.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca/delmont-1.csv")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    lca_db = runtmp.output(f"delmont-1.lca.{lca_db_format}")
 
-    cmd = ['lca', 'index', taxcsv, 'delmont-1', input_sig, '-F', lca_db_format]
+    cmd = ["lca", "index", taxcsv, "delmont-1", input_sig, "-F", lca_db_format]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -790,38 +759,50 @@ def test_basic_index(runtmp, lca_db_format):
 
     assert os.path.exists(lca_db), lca_db
 
-    assert 'Building LCA database with ksize=31 scaled=10000 moltype=DNA' in runtmp.last_result.err
-    assert "** assuming column 'MAGs' is identifiers in spreadsheet" in runtmp.last_result.err
-    assert "** assuming column 'Domain' is superkingdom in spreadsheet" in runtmp.last_result.err
-    assert '1 identifiers used out of 1 distinct identifiers in spreadsheet.' in runtmp.last_result.err
+    assert (
+        "Building LCA database with ksize=31 scaled=10000 moltype=DNA"
+        in runtmp.last_result.err
+    )
+    assert (
+        "** assuming column 'MAGs' is identifiers in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "** assuming column 'Domain' is superkingdom in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "1 identifiers used out of 1 distinct identifiers in spreadsheet."
+        in runtmp.last_result.err
+    )
 
 
 def test_basic_index_twice(runtmp, lca_db_format):
     # run 'lca index' twice.
-    taxcsv = utils.get_test_data('lca/delmont-1.csv')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    lca_db = runtmp.output(f'delmont-1.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca/delmont-1.csv")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    runtmp.output(f"delmont-1.lca.{lca_db_format}")
 
-    cmd = ['lca', 'index', taxcsv, 'delmont-1', input_sig, '-F', lca_db_format]
+    cmd = ["lca", "index", taxcsv, "delmont-1", input_sig, "-F", lca_db_format]
     runtmp.sourmash(*cmd)
 
     with pytest.raises(SourmashCommandFailed):
-        cmd = ['lca', 'index', taxcsv, 'delmont-1', input_sig, '-F', lca_db_format]
+        cmd = ["lca", "index", taxcsv, "delmont-1", input_sig, "-F", lca_db_format]
         runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'already exists. Not overwriting.' in runtmp.last_result.err
+    assert "already exists. Not overwriting." in runtmp.last_result.err
 
 
 def test_basic_index_bad_spreadsheet(runtmp, lca_db_format):
-    taxcsv = utils.get_test_data('lca/bad-spreadsheet.csv')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    lca_db = runtmp.output(f'delmont-1.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca/bad-spreadsheet.csv")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    lca_db = runtmp.output(f"delmont-1.lca.{lca_db_format}")
 
-    cmd = ['lca', 'index', taxcsv, lca_db, input_sig, '-F', lca_db_format]
+    cmd = ["lca", "index", taxcsv, lca_db, input_sig, "-F", lca_db_format]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -830,79 +811,112 @@ def test_basic_index_bad_spreadsheet(runtmp, lca_db_format):
 
     assert os.path.exists(lca_db), lca_db
 
-    assert "** assuming column 'MAGs' is identifiers in spreadsheet" in runtmp.last_result.err
-    assert "** assuming column 'Domain' is superkingdom in spreadsheet" in runtmp.last_result.err
-    assert '1 identifiers used out of 1 distinct identifiers in spreadsheet.' in runtmp.last_result.err
+    assert (
+        "** assuming column 'MAGs' is identifiers in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "** assuming column 'Domain' is superkingdom in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "1 identifiers used out of 1 distinct identifiers in spreadsheet."
+        in runtmp.last_result.err
+    )
 
 
 def test_basic_index_broken_spreadsheet(runtmp, lca_db_format):
     # duplicate identifiers in this spreadsheet
-    taxcsv = utils.get_test_data('lca/bad-spreadsheet-2.csv')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    lca_db = runtmp.output(f'delmont-1.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca/bad-spreadsheet-2.csv")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    lca_db = runtmp.output(f"delmont-1.lca.{lca_db_format}")
 
-    cmd = ['lca', 'index', taxcsv, lca_db, input_sig, '-F', lca_db_format]
+    cmd = ["lca", "index", taxcsv, lca_db, input_sig, "-F", lca_db_format]
     with pytest.raises(SourmashCommandFailed):
         runtmp.sourmash(*cmd)
 
     assert runtmp.last_result.status != 0
-    assert "multiple lineages for identifier TARA_ASE_MAG_00031" in runtmp.last_result.err
+    assert (
+        "multiple lineages for identifier TARA_ASE_MAG_00031" in runtmp.last_result.err
+    )
 
 
 def test_basic_index_too_many_strains_too_few_species(runtmp, lca_db_format):
     # explicit test for #841, where 'n_species' wasn't getting counted
     # if lineage was at strain level resolution.
-    taxcsv = utils.get_test_data('lca/podar-lineage.csv')
-    input_sig = utils.get_test_data('47.fa.sig')
-    lca_db = runtmp.output(f'out.lca.{lca_db_format}')
-
-    cmd = ['lca', 'index', taxcsv, lca_db, input_sig,
-            '-C', '3', '--split-identifiers', '-F', lca_db_format]
+    taxcsv = utils.get_test_data("lca/podar-lineage.csv")
+    input_sig = utils.get_test_data("47.fa.sig")
+    lca_db = runtmp.output(f"out.lca.{lca_db_format}")
+
+    cmd = [
+        "lca",
+        "index",
+        taxcsv,
+        lca_db,
+        input_sig,
+        "-C",
+        "3",
+        "--split-identifiers",
+        "-F",
+        lca_db_format,
+    ]
     runtmp.sourmash(*cmd)
 
-    assert not 'error: fewer than 20% of lineages' in runtmp.last_result.err
+    assert "error: fewer than 20% of lineages" not in runtmp.last_result.err
     assert runtmp.last_result.status == 0
 
 
 def test_basic_index_too_few_species(runtmp, lca_db_format):
     # spreadsheets with too few species should be flagged, unless -f specified
-    taxcsv = utils.get_test_data('lca/tully-genome-sigs.classify.csv')
+    taxcsv = utils.get_test_data("lca/tully-genome-sigs.classify.csv")
 
     # (these don't really matter, should break on load spreadsheet)
-    input_sig = utils.get_test_data('47.fa.sig')
-    lca_db = runtmp.output(f'out.lca.{lca_db_format}')
+    input_sig = utils.get_test_data("47.fa.sig")
+    lca_db = runtmp.output(f"out.lca.{lca_db_format}")
 
-    cmd = ['lca', 'index', taxcsv, lca_db, input_sig, '-C', '3',
-           '-F', lca_db_format]
+    cmd = ["lca", "index", taxcsv, lca_db, input_sig, "-C", "3", "-F", lca_db_format]
     with pytest.raises(SourmashCommandFailed):
         runtmp.sourmash(*cmd)
 
-    assert not '"ERROR: fewer than 20% of lineages have species-level resolution' in runtmp.last_result.err
+    assert (
+        '"ERROR: fewer than 20% of lineages have species-level resolution'
+        not in runtmp.last_result.err
+    )
     assert runtmp.last_result.status != 0
 
 
 def test_basic_index_require_taxonomy(runtmp, lca_db_format):
     # no taxonomy in here
-    taxcsv = utils.get_test_data('lca/bad-spreadsheet-3.csv')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    lca_db = runtmp.output(f'delmont-1.lca.{lca_db_format}')
-
-    cmd = ['lca', 'index', '--require-taxonomy', taxcsv, lca_db, input_sig,
-           '-F', lca_db_format]
+    taxcsv = utils.get_test_data("lca/bad-spreadsheet-3.csv")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    lca_db = runtmp.output(f"delmont-1.lca.{lca_db_format}")
+
+    cmd = [
+        "lca",
+        "index",
+        "--require-taxonomy",
+        taxcsv,
+        lca_db,
+        input_sig,
+        "-F",
+        lca_db_format,
+    ]
     with pytest.raises(SourmashCommandFailed):
         runtmp.sourmash(*cmd)
 
     assert runtmp.last_result.status != 0
-    assert "ERROR: no hash values found - are there any signatures?" in runtmp.last_result.err
+    assert (
+        "ERROR: no hash values found - are there any signatures?"
+        in runtmp.last_result.err
+    )
 
 
 def test_basic_index_column_start(runtmp, lca_db_format):
-    taxcsv = utils.get_test_data('lca/delmont-3.csv')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    lca_db = runtmp.output(f'delmont-1.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca/delmont-3.csv")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    lca_db = runtmp.output(f"delmont-1.lca.{lca_db_format}")
 
-    cmd = ['lca', 'index', '-C', '3', taxcsv, lca_db, input_sig,
-           '-F', lca_db_format]
+    cmd = ["lca", "index", "-C", "3", taxcsv, lca_db, input_sig, "-F", lca_db_format]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -911,49 +925,71 @@ def test_basic_index_column_start(runtmp, lca_db_format):
 
     assert os.path.exists(lca_db)
 
-    assert "** assuming column 'MAGs' is identifiers in spreadsheet" in runtmp.last_result.err
-    assert "** assuming column 'Domain' is superkingdom in spreadsheet" in runtmp.last_result.err
-    assert '1 identifiers used out of 1 distinct identifiers in spreadsheet.' in runtmp.last_result.err
+    assert (
+        "** assuming column 'MAGs' is identifiers in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "** assuming column 'Domain' is superkingdom in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "1 identifiers used out of 1 distinct identifiers in spreadsheet."
+        in runtmp.last_result.err
+    )
 
 
 def test_index_empty_sketch_name(runtmp, lca_db_format):
     c = runtmp
 
     # create two signatures with empty 'name' attributes
-    cmd = ['sketch', 'dna', utils.get_test_data('genome-s12.fa.gz'),
-           utils.get_test_data('genome-s11.fa.gz')]
+    cmd = [
+        "sketch",
+        "dna",
+        utils.get_test_data("genome-s12.fa.gz"),
+        utils.get_test_data("genome-s11.fa.gz"),
+    ]
     c.run_sourmash(*cmd)
 
-    sig1 = c.output('genome-s11.fa.gz.sig')
+    sig1 = c.output("genome-s11.fa.gz.sig")
     assert os.path.exists(sig1)
-    sig2 = c.output('genome-s12.fa.gz.sig')
+    sig2 = c.output("genome-s12.fa.gz.sig")
     assert os.path.exists(sig2)
 
-    outfile = f'zzz.lca.{lca_db_format}'
+    outfile = f"zzz.lca.{lca_db_format}"
 
     # can we insert them both?
-    taxcsv = utils.get_test_data('lca/delmont-1.csv')
-    cmd = ['lca', 'index', taxcsv, outfile, sig1, sig2, '-F', lca_db_format]
+    taxcsv = utils.get_test_data("lca/delmont-1.csv")
+    cmd = ["lca", "index", taxcsv, outfile, sig1, sig2, "-F", lca_db_format]
     c.run_sourmash(*cmd)
 
     assert os.path.exists(c.output(outfile))
 
     print(c.last_result.out)
     print(c.last_result.err)
-    assert 'WARNING: no lineage provided for 2 sig' in c.last_result.err
+    assert "WARNING: no lineage provided for 2 sig" in c.last_result.err
 
 
 def test_basic_index_and_classify_with_tsv_and_gz(runtmp, lca_db_format):
-    taxcsv = utils.get_test_data('lca/delmont-1.tsv')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
+    taxcsv = utils.get_test_data("lca/delmont-1.tsv")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
 
-    if lca_db_format == 'json':
-        lca_db = runtmp.output(f'delmont-1.lca.json.gz')
+    if lca_db_format == "json":
+        lca_db = runtmp.output("delmont-1.lca.json.gz")
     else:
-        lca_db = runtmp.output(f'delmont-1.lca.sql')
-
-    cmd = ['lca', 'index', '--tabs', '--no-header', taxcsv, lca_db, input_sig,
-           '-F', lca_db_format]
+        lca_db = runtmp.output("delmont-1.lca.sql")
+
+    cmd = [
+        "lca",
+        "index",
+        "--tabs",
+        "--no-header",
+        taxcsv,
+        lca_db,
+        input_sig,
+        "-F",
+        lca_db_format,
+    ]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -962,27 +998,36 @@ def test_basic_index_and_classify_with_tsv_and_gz(runtmp, lca_db_format):
 
     assert os.path.exists(lca_db)
 
-    assert '1 identifiers used out of 1 distinct identifiers in spreadsheet.' in runtmp.last_result.err
+    assert (
+        "1 identifiers used out of 1 distinct identifiers in spreadsheet."
+        in runtmp.last_result.err
+    )
 
-    cmd = ['lca', 'classify', '--db', lca_db, '--query', input_sig]
+    cmd = ["lca", "classify", "--db", lca_db, "--query", input_sig]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'ID,status,superkingdom,phylum,class,order,family,genus,species' in runtmp.last_result.out
-    assert 'TARA_ASE_MAG_00031,found,Bacteria,Proteobacteria,Gammaproteobacteria,Alteromonadales,Alteromonadaceae,Alteromonas,Alteromonas_macleodii' in runtmp.last_result.out
-    assert 'classified 1 signatures total' in runtmp.last_result.err
-    assert 'loaded 1 LCA databases' in runtmp.last_result.err
+    assert (
+        "ID,status,superkingdom,phylum,class,order,family,genus,species"
+        in runtmp.last_result.out
+    )
+    assert (
+        "TARA_ASE_MAG_00031,found,Bacteria,Proteobacteria,Gammaproteobacteria,Alteromonadales,Alteromonadaceae,Alteromonas,Alteromonas_macleodii"
+        in runtmp.last_result.out
+    )
+    assert "classified 1 signatures total" in runtmp.last_result.err
+    assert "loaded 1 LCA databases" in runtmp.last_result.err
 
 
 def test_basic_index_and_classify(runtmp, lca_db_format):
-    taxcsv = utils.get_test_data('lca/delmont-1.csv')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    lca_db = runtmp.output(f'delmont-1.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca/delmont-1.csv")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    lca_db = runtmp.output(f"delmont-1.lca.{lca_db_format}")
 
-    cmd = ['lca', 'index', taxcsv, lca_db, input_sig, '-F', lca_db_format]
+    cmd = ["lca", "index", taxcsv, lca_db, input_sig, "-F", lca_db_format]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -991,31 +1036,55 @@ def test_basic_index_and_classify(runtmp, lca_db_format):
 
     assert os.path.exists(lca_db)
 
-    assert "** assuming column 'MAGs' is identifiers in spreadsheet" in runtmp.last_result.err
-    assert "** assuming column 'Domain' is superkingdom in spreadsheet" in runtmp.last_result.err
-    assert '1 identifiers used out of 1 distinct identifiers in spreadsheet.' in runtmp.last_result.err
-
-    cmd = ['lca', 'classify', '--db', lca_db, '--query', input_sig]
+    assert (
+        "** assuming column 'MAGs' is identifiers in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "** assuming column 'Domain' is superkingdom in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "1 identifiers used out of 1 distinct identifiers in spreadsheet."
+        in runtmp.last_result.err
+    )
+
+    cmd = ["lca", "classify", "--db", lca_db, "--query", input_sig]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'ID,status,superkingdom,phylum,class,order,family,genus,species' in runtmp.last_result.out
-    assert 'TARA_ASE_MAG_00031,found,Bacteria,Proteobacteria,Gammaproteobacteria,Alteromonadales,Alteromonadaceae,Alteromonas,Alteromonas_macleodii' in runtmp.last_result.out
-    assert 'classified 1 signatures total' in runtmp.last_result.err
-    assert 'loaded 1 LCA databases' in runtmp.last_result.err
+    assert (
+        "ID,status,superkingdom,phylum,class,order,family,genus,species"
+        in runtmp.last_result.out
+    )
+    assert (
+        "TARA_ASE_MAG_00031,found,Bacteria,Proteobacteria,Gammaproteobacteria,Alteromonadales,Alteromonadaceae,Alteromonas,Alteromonas_macleodii"
+        in runtmp.last_result.out
+    )
+    assert "classified 1 signatures total" in runtmp.last_result.err
+    assert "loaded 1 LCA databases" in runtmp.last_result.err
 
 
 def test_basic_index_and_classify_dup_lineage(runtmp, lca_db_format):
-    taxcsv = utils.get_test_data('lca/tara-delmont-SuppTable3.csv')
-    input_sig1 = utils.get_test_data('lca/TARA_ASE_MAG_00007.sig')
-    input_sig2 = utils.get_test_data('lca/TARA_ANW_MAG_00005.sig')
-    lca_db = runtmp.output(f'delmont-dup.lca.{lca_db_format}')
-
-    cmd = ['lca', 'index', taxcsv, lca_db, input_sig1, input_sig2,
-           '-F', lca_db_format, '-f']
+    taxcsv = utils.get_test_data("lca/tara-delmont-SuppTable3.csv")
+    input_sig1 = utils.get_test_data("lca/TARA_ASE_MAG_00007.sig")
+    input_sig2 = utils.get_test_data("lca/TARA_ANW_MAG_00005.sig")
+    lca_db = runtmp.output(f"delmont-dup.lca.{lca_db_format}")
+
+    cmd = [
+        "lca",
+        "index",
+        taxcsv,
+        lca_db,
+        input_sig1,
+        input_sig2,
+        "-F",
+        lca_db_format,
+        "-f",
+    ]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -1024,35 +1093,41 @@ def test_basic_index_and_classify_dup_lineage(runtmp, lca_db_format):
 
     assert os.path.exists(lca_db)
 
-    cmd = ['lca', 'classify', '--db', lca_db, '--query', input_sig1]
+    cmd = ["lca", "classify", "--db", lca_db, "--query", input_sig1]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'TARA_ASE_MAG_00007,found,Bacteria,Proteobacteria,Gammaproteobacteria,,,,,' in runtmp.last_result.out
+    assert (
+        "TARA_ASE_MAG_00007,found,Bacteria,Proteobacteria,Gammaproteobacteria,,,,,"
+        in runtmp.last_result.out
+    )
 
-    cmd = ['lca', 'classify', '--db', lca_db, '--query', input_sig2]
+    cmd = ["lca", "classify", "--db", lca_db, "--query", input_sig2]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'TARA_ANW_MAG_00005,found,Bacteria,Proteobacteria,Gammaproteobacteria,,,,,' in runtmp.last_result.out
+    assert (
+        "TARA_ANW_MAG_00005,found,Bacteria,Proteobacteria,Gammaproteobacteria,,,,,"
+        in runtmp.last_result.out
+    )
 
 
 def test_index_traverse(runtmp, lca_db_format):
-    taxcsv = utils.get_test_data('lca/delmont-1.csv')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    lca_db = runtmp.output(f'delmont-1.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca/delmont-1.csv")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    lca_db = runtmp.output(f"delmont-1.lca.{lca_db_format}")
 
-    in_dir = runtmp.output('sigs')
+    in_dir = runtmp.output("sigs")
     os.mkdir(in_dir)
-    shutil.copyfile(input_sig, os.path.join(in_dir, 'q.sig'))
+    shutil.copyfile(input_sig, os.path.join(in_dir, "q.sig"))
 
-    cmd = ['lca', 'index', taxcsv, lca_db, in_dir, '-F', lca_db_format]
+    cmd = ["lca", "index", taxcsv, lca_db, in_dir, "-F", lca_db_format]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -1061,26 +1136,35 @@ def test_index_traverse(runtmp, lca_db_format):
 
     assert os.path.exists(lca_db)
 
-    assert "** assuming column 'MAGs' is identifiers in spreadsheet" in runtmp.last_result.err
-    assert "** assuming column 'Domain' is superkingdom in spreadsheet" in runtmp.last_result.err
-    assert '1 identifiers used out of 1 distinct identifiers in spreadsheet.' in runtmp.last_result.err
-    assert 'WARNING: 1 duplicate signatures.' not in runtmp.last_result.err
+    assert (
+        "** assuming column 'MAGs' is identifiers in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "** assuming column 'Domain' is superkingdom in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "1 identifiers used out of 1 distinct identifiers in spreadsheet."
+        in runtmp.last_result.err
+    )
+    assert "WARNING: 1 duplicate signatures." not in runtmp.last_result.err
 
 
 def test_index_traverse_force(runtmp, lca_db_format):
     c = runtmp
     # test the use of --force to load all files, not just .sig
-    taxcsv = utils.get_test_data('lca/delmont-1.csv')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    lca_db = c.output(f'delmont-1.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca/delmont-1.csv")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    lca_db = c.output(f"delmont-1.lca.{lca_db_format}")
 
-    in_dir = c.output('sigs')
+    in_dir = c.output("sigs")
     os.mkdir(in_dir)
     # name signature .txt instead of .sig:
-    shutil.copyfile(input_sig, os.path.join(in_dir, 'q.txt'))
+    shutil.copyfile(input_sig, os.path.join(in_dir, "q.txt"))
 
     # use --force
-    cmd = ['lca', 'index', taxcsv, lca_db, in_dir, '-f', '-F', lca_db_format]
+    cmd = ["lca", "index", taxcsv, lca_db, in_dir, "-f", "-F", lca_db_format]
     c.run_sourmash(*cmd)
 
     out = c.last_result.out
@@ -1092,22 +1176,31 @@ def test_index_traverse_force(runtmp, lca_db_format):
 
     assert "** assuming column 'MAGs' is identifiers in spreadsheet" in err
     assert "** assuming column 'Domain' is superkingdom in spreadsheet" in err
-    assert '1 identifiers used out of 1 distinct identifiers in spreadsheet.' in err
-    assert 'WARNING: 1 duplicate signatures.' not in err
+    assert "1 identifiers used out of 1 distinct identifiers in spreadsheet." in err
+    assert "WARNING: 1 duplicate signatures." not in err
 
 
 def test_index_from_file_cmdline_sig(runtmp, lca_db_format):
     c = runtmp
-    taxcsv = utils.get_test_data('lca/delmont-1.csv')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    lca_db = c.output(f'delmont-1.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca/delmont-1.csv")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    lca_db = c.output(f"delmont-1.lca.{lca_db_format}")
 
-    file_list = c.output('sigs.list')
-    with open(file_list, 'wt') as fp:
+    file_list = c.output("sigs.list")
+    with open(file_list, "w") as fp:
         print(input_sig, file=fp)
 
-    cmd = ['lca', 'index', taxcsv, lca_db, input_sig, '--from-file', file_list,
-           '-F', lca_db_format]
+    cmd = [
+        "lca",
+        "index",
+        taxcsv,
+        lca_db,
+        input_sig,
+        "--from-file",
+        file_list,
+        "-F",
+        lca_db_format,
+    ]
     c.run_sourmash(*cmd)
 
     out = c.last_result.out
@@ -1119,23 +1212,31 @@ def test_index_from_file_cmdline_sig(runtmp, lca_db_format):
 
     assert "** assuming column 'MAGs' is identifiers in spreadsheet" in err
     assert "** assuming column 'Domain' is superkingdom in spreadsheet" in err
-    assert '1 identifiers used out of 1 distinct identifiers in spreadsheet.' in err
-    assert 'WARNING: 1 duplicate signatures.' in err
+    assert "1 identifiers used out of 1 distinct identifiers in spreadsheet." in err
+    assert "WARNING: 1 duplicate signatures." in err
 
 
 def test_index_from_file(runtmp, lca_db_format):
     c = runtmp
 
-    taxcsv = utils.get_test_data('lca/delmont-1.csv')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    lca_db = c.output(f'delmont-1.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca/delmont-1.csv")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    lca_db = c.output(f"delmont-1.lca.{lca_db_format}")
 
-    file_list = c.output('sigs.list')
-    with open(file_list, 'wt') as fp:
+    file_list = c.output("sigs.list")
+    with open(file_list, "w") as fp:
         print(input_sig, file=fp)
 
-    cmd = ['lca', 'index', taxcsv, lca_db, '--from-file', file_list,
-           '-F', lca_db_format]
+    cmd = [
+        "lca",
+        "index",
+        taxcsv,
+        lca_db,
+        "--from-file",
+        file_list,
+        "-F",
+        lca_db_format,
+    ]
     c.run_sourmash(*cmd)
 
     out = c.last_result.out
@@ -1147,33 +1248,41 @@ def test_index_from_file(runtmp, lca_db_format):
 
     assert "** assuming column 'MAGs' is identifiers in spreadsheet" in err
     assert "** assuming column 'Domain' is superkingdom in spreadsheet" in err
-    assert '1 identifiers used out of 1 distinct identifiers in spreadsheet.' in err
+    assert "1 identifiers used out of 1 distinct identifiers in spreadsheet." in err
 
 
 def test_index_fail_on_num(runtmp, lca_db_format):
     c = runtmp
     # lca index should yield a decent error message when attempted on 'num'
-    sigfile = utils.get_test_data('num/63.fa.sig')
-    taxcsv = utils.get_test_data('lca/podar-lineage.csv')
+    sigfile = utils.get_test_data("num/63.fa.sig")
+    taxcsv = utils.get_test_data("lca/podar-lineage.csv")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('lca', 'index', taxcsv, f'xxx.lca.{lca_db_format}', sigfile,
-                       '-C', '3', '-F', lca_db_format)
+        c.run_sourmash(
+            "lca",
+            "index",
+            taxcsv,
+            f"xxx.lca.{lca_db_format}",
+            sigfile,
+            "-C",
+            "3",
+            "-F",
+            lca_db_format,
+        )
 
     err = c.last_result.err
     print(err)
 
-    assert 'ERROR: cannot insert signature ' in err
-    assert 'ERROR: cannot downsample signature; is it a scaled signature?' in err
+    assert "ERROR: cannot insert signature " in err
+    assert "ERROR: cannot downsample signature; is it a scaled signature?" in err
 
 
 def test_index_traverse_real_spreadsheet_no_report(runtmp, lca_db_format):
-    taxcsv = utils.get_test_data('lca/tara-delmont-SuppTable3.csv')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    lca_db = runtmp.output(f'delmont-1.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca/tara-delmont-SuppTable3.csv")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    lca_db = runtmp.output(f"delmont-1.lca.{lca_db_format}")
 
-    cmd = ['lca', 'index', taxcsv, lca_db, input_sig, '-f',
-           '-F', lca_db_format]
+    cmd = ["lca", "index", taxcsv, lca_db, input_sig, "-f", "-F", lca_db_format]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -1182,22 +1291,44 @@ def test_index_traverse_real_spreadsheet_no_report(runtmp, lca_db_format):
 
     assert os.path.exists(lca_db)
 
-    assert "** assuming column 'MAGs' is identifiers in spreadsheet" in runtmp.last_result.err
-    assert "** assuming column 'Domain' is superkingdom in spreadsheet" in runtmp.last_result.err
-    assert '1 identifiers used out of 957 distinct identifiers in spreadsheet.' in runtmp.last_result.err
-    assert 'WARNING: no signatures for 956 spreadsheet rows.' in runtmp.last_result.err
-    assert 'WARNING: 105 unused lineages.' in runtmp.last_result.err
-    assert '(You can use --report to generate a detailed report.)' in runtmp.last_result.err
+    assert (
+        "** assuming column 'MAGs' is identifiers in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "** assuming column 'Domain' is superkingdom in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "1 identifiers used out of 957 distinct identifiers in spreadsheet."
+        in runtmp.last_result.err
+    )
+    assert "WARNING: no signatures for 956 spreadsheet rows." in runtmp.last_result.err
+    assert "WARNING: 105 unused lineages." in runtmp.last_result.err
+    assert (
+        "(You can use --report to generate a detailed report.)"
+        in runtmp.last_result.err
+    )
 
 
 def test_index_traverse_real_spreadsheet_report(runtmp, lca_db_format):
-    taxcsv = utils.get_test_data('lca/tara-delmont-SuppTable3.csv')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    lca_db = runtmp.output(f'delmont-1.lca.{lca_db_format}')
-    report_loc = runtmp.output('report.txt')
-
-    cmd = ['lca', 'index', taxcsv, lca_db, input_sig, '--report',
-            report_loc, '-f', '-F', lca_db_format]
+    taxcsv = utils.get_test_data("lca/tara-delmont-SuppTable3.csv")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    lca_db = runtmp.output(f"delmont-1.lca.{lca_db_format}")
+    report_loc = runtmp.output("report.txt")
+
+    cmd = [
+        "lca",
+        "index",
+        taxcsv,
+        lca_db,
+        input_sig,
+        "--report",
+        report_loc,
+        "-f",
+        "-F",
+        lca_db_format,
+    ]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -1206,148 +1337,191 @@ def test_index_traverse_real_spreadsheet_report(runtmp, lca_db_format):
 
     assert os.path.exists(lca_db)
 
-    assert "** assuming column 'MAGs' is identifiers in spreadsheet" in runtmp.last_result.err
-    assert "** assuming column 'Domain' is superkingdom in spreadsheet" in runtmp.last_result.err
-    assert '1 identifiers used out of 957 distinct identifiers in spreadsheet.' in runtmp.last_result.err
-    assert 'WARNING: no signatures for 956 spreadsheet rows.' in runtmp.last_result.err
-    assert 'WARNING: 105 unused lineages.' in runtmp.last_result.err
-    assert '(You can use --report to generate a detailed report.)' not in runtmp.last_result.err
+    assert (
+        "** assuming column 'MAGs' is identifiers in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "** assuming column 'Domain' is superkingdom in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "1 identifiers used out of 957 distinct identifiers in spreadsheet."
+        in runtmp.last_result.err
+    )
+    assert "WARNING: no signatures for 956 spreadsheet rows." in runtmp.last_result.err
+    assert "WARNING: 105 unused lineages." in runtmp.last_result.err
+    assert (
+        "(You can use --report to generate a detailed report.)"
+        not in runtmp.last_result.err
+    )
     assert os.path.exists(report_loc)
 
 
 def test_single_classify(runtmp):
     # run a basic 'classify', check output.
-    db1 = utils.get_test_data('lca/delmont-1.lca.json')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
+    db1 = utils.get_test_data("lca/delmont-1.lca.json")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
 
-    cmd = ['lca', 'classify', '--db', db1, '--query', input_sig]
+    cmd = ["lca", "classify", "--db", db1, "--query", input_sig]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'TARA_ASE_MAG_00031,found,Bacteria,Proteobacteria,Gammaproteobacteria,Alteromonadales,Alteromonadaceae,Alteromonas,Alteromonas_macleodii' in runtmp.last_result.out
-    assert 'classified 1 signatures total' in runtmp.last_result.err
-    assert 'loaded 1 LCA databases' in runtmp.last_result.err
+    assert (
+        "TARA_ASE_MAG_00031,found,Bacteria,Proteobacteria,Gammaproteobacteria,Alteromonadales,Alteromonadaceae,Alteromonas,Alteromonas_macleodii"
+        in runtmp.last_result.out
+    )
+    assert "classified 1 signatures total" in runtmp.last_result.err
+    assert "loaded 1 LCA databases" in runtmp.last_result.err
 
 
 def test_single_classify_zip_query(runtmp):
     # run 'classify' with a query in a zipfile
-    db1 = utils.get_test_data('lca/delmont-1.lca.json')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
+    db1 = utils.get_test_data("lca/delmont-1.lca.json")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
 
     query_ss = sourmash.load_one_signature(input_sig, ksize=31)
-    query_zipfile = runtmp.output('query.zip')
+    query_zipfile = runtmp.output("query.zip")
     with sourmash_args.SaveSignaturesToLocation(query_zipfile) as save_sig:
         save_sig.add(query_ss)
 
-    cmd = ['lca', 'classify', '--db', db1, '--query', query_zipfile]
+    cmd = ["lca", "classify", "--db", db1, "--query", query_zipfile]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'TARA_ASE_MAG_00031,found,Bacteria,Proteobacteria,Gammaproteobacteria,Alteromonadales,Alteromonadaceae,Alteromonas,Alteromonas_macleodii' in runtmp.last_result.out
-    assert 'classified 1 signatures total' in runtmp.last_result.err
-    assert 'loaded 1 LCA databases' in runtmp.last_result.err
+    assert (
+        "TARA_ASE_MAG_00031,found,Bacteria,Proteobacteria,Gammaproteobacteria,Alteromonadales,Alteromonadaceae,Alteromonas,Alteromonas_macleodii"
+        in runtmp.last_result.out
+    )
+    assert "classified 1 signatures total" in runtmp.last_result.err
+    assert "loaded 1 LCA databases" in runtmp.last_result.err
 
 
 def test_single_classify_to_output(runtmp):
-    db1 = utils.get_test_data(f'lca/delmont-1.lca.json')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-
-    cmd = ['lca', 'classify', '--db', db1, '--query', input_sig,
-            '-o', runtmp.output('outfile.txt')]
+    db1 = utils.get_test_data("lca/delmont-1.lca.json")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+
+    cmd = [
+        "lca",
+        "classify",
+        "--db",
+        db1,
+        "--query",
+        input_sig,
+        "-o",
+        runtmp.output("outfile.txt"),
+    ]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    with open(runtmp.output('outfile.txt'), 'rt') as fp:
+    with open(runtmp.output("outfile.txt")) as fp:
         outdata = fp.read()
-    assert 'TARA_ASE_MAG_00031,found,Bacteria,Proteobacteria,Gammaproteobacteria,Alteromonadales,Alteromonadaceae,Alteromonas,Alteromonas_macleodii' in outdata
-    assert 'classified 1 signatures total' in runtmp.last_result.err
-    assert 'loaded 1 LCA databases' in runtmp.last_result.err
+    assert (
+        "TARA_ASE_MAG_00031,found,Bacteria,Proteobacteria,Gammaproteobacteria,Alteromonadales,Alteromonadaceae,Alteromonas,Alteromonas_macleodii"
+        in outdata
+    )
+    assert "classified 1 signatures total" in runtmp.last_result.err
+    assert "loaded 1 LCA databases" in runtmp.last_result.err
 
 
 def test_single_classify_to_output_no_name(runtmp):
-    db1 = utils.get_test_data(f'lca/delmont-1.lca.json')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
+    db1 = utils.get_test_data("lca/delmont-1.lca.json")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
     ss = sourmash.load_one_signature(input_sig, ksize=31)
 
-    outsig_filename = runtmp.output('q.sig')
-    with open(outsig_filename, 'wt') as fp:
+    outsig_filename = runtmp.output("q.sig")
+    with open(outsig_filename, "w") as fp:
         # remove name from signature here --
-        new_sig = sourmash.SourmashSignature(ss.minhash, filename='xyz')
+        new_sig = sourmash.SourmashSignature(ss.minhash, filename="xyz")
         sourmash.save_signatures([new_sig], fp)
 
-    cmd = ['lca', 'classify', '--db', db1, '--query', outsig_filename,
-            '-o', runtmp.output('outfile.txt')]
+    cmd = [
+        "lca",
+        "classify",
+        "--db",
+        db1,
+        "--query",
+        outsig_filename,
+        "-o",
+        runtmp.output("outfile.txt"),
+    ]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
-    with open(runtmp.output('outfile.txt'), 'rt') as fp:
+    with open(runtmp.output("outfile.txt")) as fp:
         outdata = fp.read()
     print((outdata,))
-    assert 'xyz,found,Bacteria,Proteobacteria,Gammaproteobacteria,Alteromonadales,Alteromonadaceae,Alteromonas,Alteromonas_macleodii' in outdata
-    assert 'classified 1 signatures total' in runtmp.last_result.err
-    assert 'loaded 1 LCA databases' in runtmp.last_result.err
+    assert (
+        "xyz,found,Bacteria,Proteobacteria,Gammaproteobacteria,Alteromonadales,Alteromonadaceae,Alteromonas,Alteromonas_macleodii"
+        in outdata
+    )
+    assert "classified 1 signatures total" in runtmp.last_result.err
+    assert "loaded 1 LCA databases" in runtmp.last_result.err
 
 
 def test_single_classify_empty(runtmp):
-    db1 = utils.get_test_data(f'lca/both.lca.json')
-    input_sig = utils.get_test_data('GCF_000005845.2_ASM584v2_genomic.fna.gz.sig')
+    db1 = utils.get_test_data("lca/both.lca.json")
+    input_sig = utils.get_test_data("GCF_000005845.2_ASM584v2_genomic.fna.gz.sig")
 
-    cmd = ['lca', 'classify', '--db', db1, '--query', input_sig]
+    cmd = ["lca", "classify", "--db", db1, "--query", input_sig]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'GCF_000005845,nomatch,,,,,,,,' in runtmp.last_result.out
-    assert 'classified 1 signatures total' in runtmp.last_result.err
-    assert 'loaded 1 LCA databases' in runtmp.last_result.err
+    assert "GCF_000005845,nomatch,,,,,,,," in runtmp.last_result.out
+    assert "classified 1 signatures total" in runtmp.last_result.err
+    assert "loaded 1 LCA databases" in runtmp.last_result.err
 
 
 def test_single_classify_traverse(runtmp):
-    db1 = utils.get_test_data(f'lca/delmont-1.lca.json')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    in_dir = runtmp.output('sigs')
+    db1 = utils.get_test_data("lca/delmont-1.lca.json")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    in_dir = runtmp.output("sigs")
     os.mkdir(in_dir)
-    shutil.copyfile(input_sig, os.path.join(in_dir, 'q.sig'))
+    shutil.copyfile(input_sig, os.path.join(in_dir, "q.sig"))
 
-    cmd = ['lca', 'classify', '--db', db1, '--query', input_sig]
+    cmd = ["lca", "classify", "--db", db1, "--query", input_sig]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'TARA_ASE_MAG_00031,found,Bacteria,Proteobacteria,Gammaproteobacteria,Alteromonadales,Alteromonadaceae,Alteromonas,Alteromonas_macleodii' in runtmp.last_result.out
-    assert 'classified 1 signatures total' in runtmp.last_result.err
-    assert 'loaded 1 LCA databases' in runtmp.last_result.err
+    assert (
+        "TARA_ASE_MAG_00031,found,Bacteria,Proteobacteria,Gammaproteobacteria,Alteromonadales,Alteromonadaceae,Alteromonas,Alteromonas_macleodii"
+        in runtmp.last_result.out
+    )
+    assert "classified 1 signatures total" in runtmp.last_result.err
+    assert "loaded 1 LCA databases" in runtmp.last_result.err
 
 
 def test_multi_query_classify_traverse(runtmp):
     # both.lca.json is built from both dir and dir2
-    db1 = utils.get_test_data(f'lca/both.lca.json')
-    dir1 = utils.get_test_data('lca/dir1')
-    dir2 = utils.get_test_data('lca/dir2')
+    db1 = utils.get_test_data("lca/both.lca.json")
+    dir1 = utils.get_test_data("lca/dir1")
+    dir2 = utils.get_test_data("lca/dir2")
 
-    cmd = ['lca', 'classify', '--db', db1, '--query', dir1, dir2]
+    cmd = ["lca", "classify", "--db", db1, "--query", dir1, dir2]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    with open(utils.get_test_data('lca/classify-by-both.csv')) as fp:
+    with open(utils.get_test_data("lca/classify-by-both.csv")) as fp:
         fp_lines = fp.readlines()
         out_lines = runtmp.last_result.out.splitlines()
 
@@ -1362,22 +1536,22 @@ def test_multi_query_classify_traverse(runtmp):
 @utils.in_tempdir
 def test_multi_query_classify_query_from_file(c):
     # both.lca.json is built from both dir and dir2
-    db1 = utils.get_test_data('lca/both.lca.json')
-    dir1_glob = utils.get_test_data('lca/dir1/*.sig')
+    db1 = utils.get_test_data("lca/both.lca.json")
+    dir1_glob = utils.get_test_data("lca/dir1/*.sig")
     dir1_files = glob.glob(dir1_glob)
-    dir2_glob = utils.get_test_data('lca/dir2/*.sig')
+    dir2_glob = utils.get_test_data("lca/dir2/*.sig")
     dir2_files = glob.glob(dir2_glob)
 
-    file_list = c.output('file.list')
-    with open(file_list, 'wt') as fp:
+    file_list = c.output("file.list")
+    with open(file_list, "w") as fp:
         print("\n".join(dir1_files), file=fp)
         print("\n".join(dir2_files), file=fp)
 
-    cmd = ['lca', 'classify', '--db', db1, '--query-from-file', file_list]
+    cmd = ["lca", "classify", "--db", db1, "--query-from-file", file_list]
     c.run_sourmash(*cmd)
     out = c.last_result.out
 
-    with open(utils.get_test_data('lca/classify-by-both.csv')) as fp:
+    with open(utils.get_test_data("lca/classify-by-both.csv")) as fp:
         fp_lines = fp.readlines()
         out_lines = out.splitlines()
 
@@ -1392,23 +1566,31 @@ def test_multi_query_classify_query_from_file(c):
 @utils.in_tempdir
 def test_multi_query_classify_query_from_file_and_query(c):
     # both.lca.json is built from both dir and dir2
-    db1 = utils.get_test_data(f'lca/both.lca.json')
-    dir1_glob = utils.get_test_data('lca/dir1/*.sig')
+    db1 = utils.get_test_data("lca/both.lca.json")
+    dir1_glob = utils.get_test_data("lca/dir1/*.sig")
     dir1_files = glob.glob(dir1_glob)
-    dir2_glob = utils.get_test_data('lca/dir2/*.sig')
+    dir2_glob = utils.get_test_data("lca/dir2/*.sig")
     dir2_files = glob.glob(dir2_glob)
 
-    file_list = c.output('file.list')
-    with open(file_list, 'wt') as fp:
-        print("\n".join(dir1_files[1:]), file=fp)   # leave off first one
+    file_list = c.output("file.list")
+    with open(file_list, "w") as fp:
+        print("\n".join(dir1_files[1:]), file=fp)  # leave off first one
         print("\n".join(dir2_files), file=fp)
 
-    cmd = ['lca', 'classify', '--db', db1, '--query', dir1_files[0],
-           '--query-from-file', file_list]
+    cmd = [
+        "lca",
+        "classify",
+        "--db",
+        db1,
+        "--query",
+        dir1_files[0],
+        "--query-from-file",
+        file_list,
+    ]
     c.run_sourmash(*cmd)
     out = c.last_result.out
 
-    with open(utils.get_test_data('lca/classify-by-both.csv'), 'rt') as fp:
+    with open(utils.get_test_data("lca/classify-by-both.csv")) as fp:
         fp_lines = fp.readlines()
         out_lines = out.splitlines()
 
@@ -1422,19 +1604,19 @@ def test_multi_query_classify_query_from_file_and_query(c):
 
 def test_multi_db_multi_query_classify_traverse(runtmp):
     # two halves of both.lca.json, see above test.
-    db1 = utils.get_test_data(f'lca/dir1.lca.json')
-    db2 = utils.get_test_data(f'lca/dir2.lca.json')
-    dir1 = utils.get_test_data('lca/dir1')
-    dir2 = utils.get_test_data('lca/dir2')
+    db1 = utils.get_test_data("lca/dir1.lca.json")
+    db2 = utils.get_test_data("lca/dir2.lca.json")
+    dir1 = utils.get_test_data("lca/dir1")
+    dir2 = utils.get_test_data("lca/dir2")
 
-    cmd = ['lca', 'classify', '--db', db1, db2, '--query', dir1, dir2]
+    cmd = ["lca", "classify", "--db", db1, db2, "--query", dir1, dir2]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    with open(utils.get_test_data('lca/classify-by-both.csv'), 'rt') as fp:
+    with open(utils.get_test_data("lca/classify-by-both.csv")) as fp:
         fp_lines = fp.readlines()
         out_lines = runtmp.last_result.out.splitlines()
 
@@ -1447,11 +1629,11 @@ def test_multi_db_multi_query_classify_traverse(runtmp):
 
 
 def test_unassigned_internal_index_and_classify(runtmp, lca_db_format):
-    taxcsv = utils.get_test_data('lca/delmont-4.csv')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    lca_db = runtmp.output(f'delmont-1.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca/delmont-4.csv")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    lca_db = runtmp.output(f"delmont-1.lca.{lca_db_format}")
 
-    cmd = ['lca', 'index', taxcsv, lca_db, input_sig, '-F', lca_db_format]
+    cmd = ["lca", "index", taxcsv, lca_db, input_sig, "-F", lca_db_format]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -1460,29 +1642,44 @@ def test_unassigned_internal_index_and_classify(runtmp, lca_db_format):
 
     assert os.path.exists(lca_db)
 
-    assert "** assuming column 'MAGs' is identifiers in spreadsheet" in runtmp.last_result.err
-    assert "** assuming column 'Domain' is superkingdom in spreadsheet" in runtmp.last_result.err
-    assert '1 identifiers used out of 1 distinct identifiers in spreadsheet.' in runtmp.last_result.err
-
-    cmd = ['lca', 'classify', '--db', lca_db, '--query', input_sig]
+    assert (
+        "** assuming column 'MAGs' is identifiers in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "** assuming column 'Domain' is superkingdom in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "1 identifiers used out of 1 distinct identifiers in spreadsheet."
+        in runtmp.last_result.err
+    )
+
+    cmd = ["lca", "classify", "--db", lca_db, "--query", input_sig]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'ID,status,superkingdom,phylum,class,order,family,genus,species' in runtmp.last_result.out
-    assert 'TARA_ASE_MAG_00031,found,Bacteria,Proteobacteria,Gammaproteobacteria,unassigned,Alteromonadaceae,unassigned,Alteromonas_macleodii' in runtmp.last_result.out
-    assert 'classified 1 signatures total' in runtmp.last_result.err
-    assert 'loaded 1 LCA databases' in runtmp.last_result.err
+    assert (
+        "ID,status,superkingdom,phylum,class,order,family,genus,species"
+        in runtmp.last_result.out
+    )
+    assert (
+        "TARA_ASE_MAG_00031,found,Bacteria,Proteobacteria,Gammaproteobacteria,unassigned,Alteromonadaceae,unassigned,Alteromonas_macleodii"
+        in runtmp.last_result.out
+    )
+    assert "classified 1 signatures total" in runtmp.last_result.err
+    assert "loaded 1 LCA databases" in runtmp.last_result.err
 
 
 def test_unassigned_last_index_and_classify(runtmp, lca_db_format):
-    taxcsv = utils.get_test_data('lca/delmont-5.csv')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    lca_db = runtmp.output(f'delmont-1.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca/delmont-5.csv")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    lca_db = runtmp.output(f"delmont-1.lca.{lca_db_format}")
 
-    cmd = ['lca', 'index', taxcsv, lca_db, input_sig, '-F', lca_db_format]
+    cmd = ["lca", "index", taxcsv, lca_db, input_sig, "-F", lca_db_format]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -1491,31 +1688,45 @@ def test_unassigned_last_index_and_classify(runtmp, lca_db_format):
 
     assert os.path.exists(lca_db)
 
-    assert "** assuming column 'MAGs' is identifiers in spreadsheet" in runtmp.last_result.err
-    assert "** assuming column 'Domain' is superkingdom in spreadsheet" in runtmp.last_result.err
-    assert '1 identifiers used out of 1 distinct identifiers in spreadsheet.' in runtmp.last_result.err
-
-    cmd = ['lca', 'classify', '--db', lca_db, '--query', input_sig]
+    assert (
+        "** assuming column 'MAGs' is identifiers in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "** assuming column 'Domain' is superkingdom in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "1 identifiers used out of 1 distinct identifiers in spreadsheet."
+        in runtmp.last_result.err
+    )
+
+    cmd = ["lca", "classify", "--db", lca_db, "--query", input_sig]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'ID,status,superkingdom,phylum,class,order,family,genus,species' in runtmp.last_result.out
-    assert 'TARA_ASE_MAG_00031,found,Bacteria,Proteobacteria,Gammaproteobacteria,Alteromonadales,Alteromonadaceae,,,\r\n' in runtmp.last_result.out
-    assert 'classified 1 signatures total' in runtmp.last_result.err
-    assert 'loaded 1 LCA databases' in runtmp.last_result.err
+    assert (
+        "ID,status,superkingdom,phylum,class,order,family,genus,species"
+        in runtmp.last_result.out
+    )
+    assert (
+        "TARA_ASE_MAG_00031,found,Bacteria,Proteobacteria,Gammaproteobacteria,Alteromonadales,Alteromonadaceae,,,\r\n"
+        in runtmp.last_result.out
+    )
+    assert "classified 1 signatures total" in runtmp.last_result.err
+    assert "loaded 1 LCA databases" in runtmp.last_result.err
 
 
 def test_index_and_classify_internal_unassigned_multi(runtmp, lca_db_format):
-    taxcsv = utils.get_test_data('lca/delmont-6.csv')
-    input_sig1 = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    input_sig2 = utils.get_test_data('lca/TARA_PSW_MAG_00136.sig')
-    lca_db = runtmp.output(f'delmont-1.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca/delmont-6.csv")
+    input_sig1 = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    input_sig2 = utils.get_test_data("lca/TARA_PSW_MAG_00136.sig")
+    lca_db = runtmp.output(f"delmont-1.lca.{lca_db_format}")
 
-    cmd = ['lca', 'index', taxcsv, lca_db, input_sig1, input_sig2,
-           '-F', lca_db_format]
+    cmd = ["lca", "index", taxcsv, lca_db, input_sig1, input_sig2, "-F", lca_db_format]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -1524,35 +1735,56 @@ def test_index_and_classify_internal_unassigned_multi(runtmp, lca_db_format):
 
     assert os.path.exists(lca_db)
 
-    assert "** assuming column 'MAGs' is identifiers in spreadsheet" in runtmp.last_result.err
-    assert "** assuming column 'Domain' is superkingdom in spreadsheet" in runtmp.last_result.err
-    assert '2 identifiers used out of 2 distinct identifiers in spreadsheet.' in runtmp.last_result.err
+    assert (
+        "** assuming column 'MAGs' is identifiers in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "** assuming column 'Domain' is superkingdom in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "2 identifiers used out of 2 distinct identifiers in spreadsheet."
+        in runtmp.last_result.err
+    )
 
     # classify input_sig1
-    cmd = ['lca', 'classify', '--db', lca_db, '--query', input_sig1]
+    cmd = ["lca", "classify", "--db", lca_db, "--query", input_sig1]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'ID,status,superkingdom,phylum,class,order,family,genus,species' in runtmp.last_result.out
-    assert 'TARA_ASE_MAG_00031,found,Bacteria,Proteobacteria,unassigned,unassigned,Alteromonadaceae,,,\r\n' in runtmp.last_result.out
-    assert 'classified 1 signatures total' in runtmp.last_result.err
-    assert 'loaded 1 LCA databases' in runtmp.last_result.err
+    assert (
+        "ID,status,superkingdom,phylum,class,order,family,genus,species"
+        in runtmp.last_result.out
+    )
+    assert (
+        "TARA_ASE_MAG_00031,found,Bacteria,Proteobacteria,unassigned,unassigned,Alteromonadaceae,,,\r\n"
+        in runtmp.last_result.out
+    )
+    assert "classified 1 signatures total" in runtmp.last_result.err
+    assert "loaded 1 LCA databases" in runtmp.last_result.err
 
     # classify input_sig2
-    cmd = ['lca', 'classify', '--db', lca_db, '--query', input_sig2]
+    cmd = ["lca", "classify", "--db", lca_db, "--query", input_sig2]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'ID,status,superkingdom,phylum,class,order,family,genus,species' in runtmp.last_result.out
-    assert 'TARA_PSW_MAG_00136,found,Eukaryota,Chlorophyta,Prasinophyceae,unassigned,unassigned,Ostreococcus,,\r\n' in runtmp.last_result.out
-    assert 'classified 1 signatures total' in runtmp.last_result.err
-    assert 'loaded 1 LCA databases' in runtmp.last_result.err
+    assert (
+        "ID,status,superkingdom,phylum,class,order,family,genus,species"
+        in runtmp.last_result.out
+    )
+    assert (
+        "TARA_PSW_MAG_00136,found,Eukaryota,Chlorophyta,Prasinophyceae,unassigned,unassigned,Ostreococcus,,\r\n"
+        in runtmp.last_result.out
+    )
+    assert "classified 1 signatures total" in runtmp.last_result.err
+    assert "loaded 1 LCA databases" in runtmp.last_result.err
 
 
 def test_classify_majority_vote_1(runtmp, lca_db_format):
@@ -1560,13 +1792,14 @@ def test_classify_majority_vote_1(runtmp, lca_db_format):
     c = runtmp
 
     # build database
-    taxcsv = utils.get_test_data('lca/delmont-6.csv')
-    input_sig1 = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    input_sig2 = utils.get_test_data('lca/TARA_PSW_MAG_00136.sig')
-    lca_db = c.output(f'delmont-1.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca/delmont-6.csv")
+    input_sig1 = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    input_sig2 = utils.get_test_data("lca/TARA_PSW_MAG_00136.sig")
+    lca_db = c.output(f"delmont-1.lca.{lca_db_format}")
 
-    c.run_sourmash('lca', 'index', taxcsv, lca_db, input_sig1, input_sig2,
-                   '-F', lca_db_format)
+    c.run_sourmash(
+        "lca", "index", taxcsv, lca_db, input_sig1, input_sig2, "-F", lca_db_format
+    )
 
     print(c.last_command)
     print(c.last_result.out)
@@ -1574,26 +1807,46 @@ def test_classify_majority_vote_1(runtmp, lca_db_format):
 
     assert os.path.exists(lca_db)
 
-    assert "** assuming column 'MAGs' is identifiers in spreadsheet" in c.last_result.err
-    assert "** assuming column 'Domain' is superkingdom in spreadsheet" in c.last_result.err
-    assert '2 identifiers used out of 2 distinct identifiers in spreadsheet.' in c.last_result.err
+    assert (
+        "** assuming column 'MAGs' is identifiers in spreadsheet" in c.last_result.err
+    )
+    assert (
+        "** assuming column 'Domain' is superkingdom in spreadsheet"
+        in c.last_result.err
+    )
+    assert (
+        "2 identifiers used out of 2 distinct identifiers in spreadsheet."
+        in c.last_result.err
+    )
 
     # merge input_sig1 and input_sig2
-    c.run_sourmash('signature', 'merge', input_sig1, input_sig2, '-k', '31', '--flatten', '-o', 'sig1and2.sig')
-    sig1and2 = c.output('sig1and2.sig')
+    c.run_sourmash(
+        "signature",
+        "merge",
+        input_sig1,
+        input_sig2,
+        "-k",
+        "31",
+        "--flatten",
+        "-o",
+        "sig1and2.sig",
+    )
+    sig1and2 = c.output("sig1and2.sig")
 
     # lca classify should yield no results
-    c.run_sourmash('lca', 'classify', '--db', lca_db, '--query', sig1and2)
+    c.run_sourmash("lca", "classify", "--db", lca_db, "--query", sig1and2)
 
     print(c.last_command)
     print(c.last_result.out)
     print(c.last_result.err)
 
-    assert 'ID,status,superkingdom,phylum,class,order,family,genus,species' in c.last_result.out
-    assert 'disagree,,,,,,,,' in c.last_result.out
-    assert 'classified 1 signatures total' in c.last_result.err
-    assert 'loaded 1 LCA databases' in c.last_result.err
-
+    assert (
+        "ID,status,superkingdom,phylum,class,order,family,genus,species"
+        in c.last_result.out
+    )
+    assert "disagree,,,,,,,," in c.last_result.out
+    assert "classified 1 signatures total" in c.last_result.err
+    assert "loaded 1 LCA databases" in c.last_result.err
 
 
 def test_classify_majority_vote_2(runtmp, lca_db_format):
@@ -1603,13 +1856,14 @@ def test_classify_majority_vote_2(runtmp, lca_db_format):
     c = runtmp
 
     # build database
-    taxcsv = utils.get_test_data('lca/delmont-6.csv')
-    input_sig1 = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    input_sig2 = utils.get_test_data('lca/TARA_PSW_MAG_00136.sig')
-    lca_db = c.output(f'delmont-1.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca/delmont-6.csv")
+    input_sig1 = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    input_sig2 = utils.get_test_data("lca/TARA_PSW_MAG_00136.sig")
+    lca_db = c.output(f"delmont-1.lca.{lca_db_format}")
 
-    c.run_sourmash('lca', 'index', taxcsv, lca_db, input_sig1, input_sig2,
-                   '-F', lca_db_format)
+    c.run_sourmash(
+        "lca", "index", taxcsv, lca_db, input_sig1, input_sig2, "-F", lca_db_format
+    )
 
     print(c.last_command)
     print(c.last_result.out)
@@ -1617,25 +1871,49 @@ def test_classify_majority_vote_2(runtmp, lca_db_format):
 
     assert os.path.exists(lca_db)
 
-    assert "** assuming column 'MAGs' is identifiers in spreadsheet" in c.last_result.err
-    assert "** assuming column 'Domain' is superkingdom in spreadsheet" in c.last_result.err
-    assert '2 identifiers used out of 2 distinct identifiers in spreadsheet.' in c.last_result.err
+    assert (
+        "** assuming column 'MAGs' is identifiers in spreadsheet" in c.last_result.err
+    )
+    assert (
+        "** assuming column 'Domain' is superkingdom in spreadsheet"
+        in c.last_result.err
+    )
+    assert (
+        "2 identifiers used out of 2 distinct identifiers in spreadsheet."
+        in c.last_result.err
+    )
 
     # merge input_sig1 and input_sig2
-    c.run_sourmash('signature', 'merge', input_sig1, input_sig2, '-k', '31', '--flatten', '-o', 'sig1and2.sig')
-    sig1and2 = c.output('sig1and2.sig')
+    c.run_sourmash(
+        "signature",
+        "merge",
+        input_sig1,
+        input_sig2,
+        "-k",
+        "31",
+        "--flatten",
+        "-o",
+        "sig1and2.sig",
+    )
+    sig1and2 = c.output("sig1and2.sig")
 
     # majority vote classify
-    c.run_sourmash('lca', 'classify', '--db', lca_db, '--query', sig1and2, '--majority')
+    c.run_sourmash("lca", "classify", "--db", lca_db, "--query", sig1and2, "--majority")
 
     print(c.last_command)
     print(c.last_result.out)
     print(c.last_result.err)
 
-    assert 'ID,status,superkingdom,phylum,class,order,family,genus,species' in c.last_result.out
-    assert 'found,Eukaryota,Chlorophyta,Prasinophyceae,unassigned,unassigned,Ostreococcus' in c.last_result.out
-    assert 'classified 1 signatures total' in c.last_result.err
-    assert 'loaded 1 LCA databases' in c.last_result.err
+    assert (
+        "ID,status,superkingdom,phylum,class,order,family,genus,species"
+        in c.last_result.out
+    )
+    assert (
+        "found,Eukaryota,Chlorophyta,Prasinophyceae,unassigned,unassigned,Ostreococcus"
+        in c.last_result.out
+    )
+    assert "classified 1 signatures total" in c.last_result.err
+    assert "loaded 1 LCA databases" in c.last_result.err
 
 
 def test_classify_majority_vote_3(runtmp, lca_db_format):
@@ -1643,13 +1921,14 @@ def test_classify_majority_vote_3(runtmp, lca_db_format):
     c = runtmp
 
     # build database
-    taxcsv = utils.get_test_data('lca/delmont-6.csv')
-    input_sig1 = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    input_sig2 = utils.get_test_data('lca/TARA_PSW_MAG_00136.sig')
-    lca_db = c.output(f'delmont-1.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca/delmont-6.csv")
+    input_sig1 = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    input_sig2 = utils.get_test_data("lca/TARA_PSW_MAG_00136.sig")
+    lca_db = c.output(f"delmont-1.lca.{lca_db_format}")
 
-    c.run_sourmash('lca', 'index', taxcsv, lca_db, input_sig1, input_sig2,
-                   '-F', lca_db_format)
+    c.run_sourmash(
+        "lca", "index", taxcsv, lca_db, input_sig1, input_sig2, "-F", lca_db_format
+    )
 
     print(c.last_command)
     print(c.last_result.out)
@@ -1657,51 +1936,70 @@ def test_classify_majority_vote_3(runtmp, lca_db_format):
 
     assert os.path.exists(lca_db)
 
-    assert "** assuming column 'MAGs' is identifiers in spreadsheet" in c.last_result.err
-    assert "** assuming column 'Domain' is superkingdom in spreadsheet" in c.last_result.err
-    assert '2 identifiers used out of 2 distinct identifiers in spreadsheet.' in c.last_result.err
+    assert (
+        "** assuming column 'MAGs' is identifiers in spreadsheet" in c.last_result.err
+    )
+    assert (
+        "** assuming column 'Domain' is superkingdom in spreadsheet"
+        in c.last_result.err
+    )
+    assert (
+        "2 identifiers used out of 2 distinct identifiers in spreadsheet."
+        in c.last_result.err
+    )
 
     # obtain testdata '47.fa.sig'
-    testdata1 = utils.get_test_data('47.fa.sig')
+    testdata1 = utils.get_test_data("47.fa.sig")
 
     # majority vote classify
-    c.run_sourmash('lca', 'classify', '--db', lca_db, '--query', testdata1, '--majority')
+    c.run_sourmash(
+        "lca", "classify", "--db", lca_db, "--query", testdata1, "--majority"
+    )
 
     print(c.last_command)
     print(c.last_result.out)
     print(c.last_result.err)
 
-    assert 'ID,status,superkingdom,phylum,class,order,family,genus,species' in c.last_result.out
-    assert 'nomatch,,,,,,,,' in c.last_result.out
-    assert 'classified 1 signatures total' in c.last_result.err
-    assert 'loaded 1 LCA databases' in c.last_result.err
+    assert (
+        "ID,status,superkingdom,phylum,class,order,family,genus,species"
+        in c.last_result.out
+    )
+    assert "nomatch,,,,,,,," in c.last_result.out
+    assert "classified 1 signatures total" in c.last_result.err
+    assert "loaded 1 LCA databases" in c.last_result.err
 
 
 def test_multi_db_classify(runtmp):
-    db1 = utils.get_test_data(f'lca/delmont-1.lca.json')
-    db2 = utils.get_test_data('lca/delmont-2.lca.json')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
+    db1 = utils.get_test_data("lca/delmont-1.lca.json")
+    db2 = utils.get_test_data("lca/delmont-2.lca.json")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
 
-    cmd = ['lca', 'classify', '--db', db1, db2, '--query', input_sig]
+    cmd = ["lca", "classify", "--db", db1, db2, "--query", input_sig]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'ID,status,superkingdom,phylum,class,order,family,genus,species' in runtmp.last_result.out
-    assert 'TARA_ASE_MAG_00031,found,Bacteria,Proteobacteria,Gammaproteobacteria,Alteromonadales,,,,' in runtmp.last_result.out
-    assert 'classified 1 signatures total' in runtmp.last_result.err
-    assert 'loaded 2 LCA databases' in runtmp.last_result.err
+    assert (
+        "ID,status,superkingdom,phylum,class,order,family,genus,species"
+        in runtmp.last_result.out
+    )
+    assert (
+        "TARA_ASE_MAG_00031,found,Bacteria,Proteobacteria,Gammaproteobacteria,Alteromonadales,,,,"
+        in runtmp.last_result.out
+    )
+    assert "classified 1 signatures total" in runtmp.last_result.err
+    assert "loaded 2 LCA databases" in runtmp.last_result.err
 
 
 def test_classify_unknown_hashes(runtmp, lca_db_format):
-    taxcsv = utils.get_test_data('lca-root/tax.csv')
-    input_sig1 = utils.get_test_data('lca-root/TARA_MED_MAG_00029.fa.sig')
-    input_sig2 = utils.get_test_data('lca-root/TOBG_MED-875.fna.gz.sig')
-    lca_db = runtmp.output(f'lca-root.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca-root/tax.csv")
+    input_sig1 = utils.get_test_data("lca-root/TARA_MED_MAG_00029.fa.sig")
+    input_sig2 = utils.get_test_data("lca-root/TOBG_MED-875.fna.gz.sig")
+    lca_db = runtmp.output(f"lca-root.lca.{lca_db_format}")
 
-    cmd = ['lca', 'index', taxcsv, lca_db, input_sig2, '-F', lca_db_format]
+    cmd = ["lca", "index", taxcsv, lca_db, input_sig2, "-F", lca_db_format]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -1710,59 +2008,81 @@ def test_classify_unknown_hashes(runtmp, lca_db_format):
 
     assert os.path.exists(lca_db)
 
-    assert '1 identifiers used out of 2 distinct identifiers in spreadsheet.' in runtmp.last_result.err
+    assert (
+        "1 identifiers used out of 2 distinct identifiers in spreadsheet."
+        in runtmp.last_result.err
+    )
 
-    cmd = ['lca', 'classify', '--db', lca_db, '--query', input_sig1]
+    cmd = ["lca", "classify", "--db", lca_db, "--query", input_sig1]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert '(root)' not in runtmp.last_result.out
-    assert 'TARA_MED_MAG_00029,found,Archaea,Euryarcheoata,unassigned,unassigned,novelFamily_I' in runtmp.last_result.out
+    assert "(root)" not in runtmp.last_result.out
+    assert (
+        "TARA_MED_MAG_00029,found,Archaea,Euryarcheoata,unassigned,unassigned,novelFamily_I"
+        in runtmp.last_result.out
+    )
 
 
 def test_single_summarize(runtmp):
-    db1 = utils.get_test_data('lca/delmont-1.lca.json')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
+    db1 = utils.get_test_data("lca/delmont-1.lca.json")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
 
-    cmd = ['lca', 'summarize', '--db', db1, '--query', input_sig]
+    cmd = ["lca", "summarize", "--db", db1, "--query", input_sig]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'loaded 1 signatures from 1 files total.' in runtmp.last_result.err
-    assert '100.0%   200   Bacteria;Proteobacteria;Gammaproteobacteria;Alteromonadales' in runtmp.last_result.out
+    assert "loaded 1 signatures from 1 files total." in runtmp.last_result.err
+    assert (
+        "100.0%   200   Bacteria;Proteobacteria;Gammaproteobacteria;Alteromonadales"
+        in runtmp.last_result.out
+    )
 
 
 def test_single_summarize_singleton(runtmp):
-    db1 = utils.get_test_data('lca/delmont-1.lca.json')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-
-    cmd = ['lca', 'summarize', '--db', db1, '--query', input_sig,]
+    db1 = utils.get_test_data("lca/delmont-1.lca.json")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+
+    cmd = [
+        "lca",
+        "summarize",
+        "--db",
+        db1,
+        "--query",
+        input_sig,
+    ]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'loaded 1 signatures from 1 files total.' in runtmp.last_result.err
-    assert '100.0%   200   Bacteria;Proteobacteria;Gammaproteobacteria;Alteromonadales' in runtmp.last_result.out
-    assert 'test-data/lca/TARA_ASE_MAG_00031.sig:5b438c6c TARA_ASE_MAG_00031' in runtmp.last_result.out
+    assert "loaded 1 signatures from 1 files total." in runtmp.last_result.err
+    assert (
+        "100.0%   200   Bacteria;Proteobacteria;Gammaproteobacteria;Alteromonadales"
+        in runtmp.last_result.out
+    )
+    assert (
+        "test-data/lca/TARA_ASE_MAG_00031.sig:5b438c6c TARA_ASE_MAG_00031"
+        in runtmp.last_result.out
+    )
 
 
 @utils.in_tempdir
 def test_single_summarize_traverse(c):
-    db1 = utils.get_test_data('lca/delmont-1.lca.json')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    in_dir = c.output('sigs')
+    db1 = utils.get_test_data("lca/delmont-1.lca.json")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    in_dir = c.output("sigs")
     os.mkdir(in_dir)
-    shutil.copyfile(input_sig, os.path.join(in_dir, 'q.sig'))
+    shutil.copyfile(input_sig, os.path.join(in_dir, "q.sig"))
 
-    cmd = ['lca', 'summarize', '--db', db1, '--query', in_dir]
+    cmd = ["lca", "summarize", "--db", db1, "--query", in_dir]
     c.run_sourmash(*cmd)
 
     out = c.last_result.out
@@ -1770,18 +2090,22 @@ def test_single_summarize_traverse(c):
     err = c.last_result.err
     print(err)
 
-    assert 'loaded 1 signatures from 1 files total.' in err
-    assert '100.0%   200   Bacteria;Proteobacteria;Gammaproteobacteria;Alteromonadales' in out
+    assert "loaded 1 signatures from 1 files total." in err
+    assert (
+        "100.0%   200   Bacteria;Proteobacteria;Gammaproteobacteria;Alteromonadales"
+        in out
+    )
+
 
 @utils.in_tempdir
 def test_single_summarize_singleton_traverse(c):
-    db1 = utils.get_test_data('lca/delmont-1.lca.json')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    in_dir = c.output('sigs')
+    db1 = utils.get_test_data("lca/delmont-1.lca.json")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    in_dir = c.output("sigs")
     os.mkdir(in_dir)
-    shutil.copyfile(input_sig, os.path.join(in_dir, 'q.sig'))
+    shutil.copyfile(input_sig, os.path.join(in_dir, "q.sig"))
 
-    cmd = ['lca', 'summarize', '--db', db1, '--query', in_dir]
+    cmd = ["lca", "summarize", "--db", db1, "--query", in_dir]
     c.run_sourmash(*cmd)
 
     out = c.last_result.out
@@ -1789,63 +2113,89 @@ def test_single_summarize_singleton_traverse(c):
     err = c.last_result.err
     print(err)
 
-    assert 'loaded 1 signatures from 1 files total.' in err
-    assert '100.0%   200   Bacteria;Proteobacteria;Gammaproteobacteria;Alteromonadales' in out
-    assert 'q.sig:5b438c6c TARA_ASE_MAG_00031' in out
+    assert "loaded 1 signatures from 1 files total." in err
+    assert (
+        "100.0%   200   Bacteria;Proteobacteria;Gammaproteobacteria;Alteromonadales"
+        in out
+    )
+    assert "q.sig:5b438c6c TARA_ASE_MAG_00031" in out
 
 
 def test_single_summarize_to_output(runtmp):
-    db1 = utils.get_test_data('lca/delmont-1.lca.json')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    in_dir = runtmp.output('sigs')
+    db1 = utils.get_test_data("lca/delmont-1.lca.json")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    in_dir = runtmp.output("sigs")
     os.mkdir(in_dir)
-    shutil.copyfile(input_sig, os.path.join(in_dir, 'q.sig'))
-
-    cmd = ['lca', 'summarize', '--db', db1, '--query', input_sig,
-            '-o', runtmp.output('output.txt')]
+    shutil.copyfile(input_sig, os.path.join(in_dir, "q.sig"))
+
+    cmd = [
+        "lca",
+        "summarize",
+        "--db",
+        db1,
+        "--query",
+        input_sig,
+        "-o",
+        runtmp.output("output.txt"),
+    ]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    with open(runtmp.output('output.txt'), 'rt') as fp:
+    with open(runtmp.output("output.txt")) as fp:
         outdata = fp.read()
-    assert 'loaded 1 signatures from 1 files total.' in runtmp.last_result.err
-    assert '200,Bacteria,Proteobacteria,Gammaproteobacteria' in outdata
-
+    assert "loaded 1 signatures from 1 files total." in runtmp.last_result.err
+    assert "200,Bacteria,Proteobacteria,Gammaproteobacteria" in outdata
 
 
 def test_single_summarize_to_output_check_filename(runtmp):
-    db1 = utils.get_test_data('lca/delmont-1.lca.json')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    in_dir = runtmp.output('sigs')
+    db1 = utils.get_test_data("lca/delmont-1.lca.json")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    in_dir = runtmp.output("sigs")
     os.mkdir(in_dir)
-    shutil.copyfile(input_sig, os.path.join(in_dir, 'q.sig'))
-
-    cmd = ['lca', 'summarize', '--db', db1, '--query', os.path.join(in_dir, 'q.sig'),
-            '-o', runtmp.output('output.txt')]
+    shutil.copyfile(input_sig, os.path.join(in_dir, "q.sig"))
+
+    cmd = [
+        "lca",
+        "summarize",
+        "--db",
+        db1,
+        "--query",
+        os.path.join(in_dir, "q.sig"),
+        "-o",
+        runtmp.output("output.txt"),
+    ]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    outdata = Path(runtmp.output('output.txt')).read_text()
-
-    assert 'loaded 1 signatures from 1 files total.' in runtmp.last_result.err
-    assert 'count,superkingdom,phylum,class,order,family,genus,species,strain,filename,sig_name,sig_md5,total_counts\n' in outdata
-    assert '200,Bacteria,Proteobacteria,Gammaproteobacteria,Alteromonadales,Alteromonadaceae,Alteromonas,Alteromonas_macleodii,,'+os.path.join(in_dir, 'q.sig')+',TARA_ASE_MAG_00031,5b438c6c858cdaf9e9b05a207fa3f9f0,200.0\n' in outdata
+    outdata = Path(runtmp.output("output.txt")).read_text()
+
+    assert "loaded 1 signatures from 1 files total." in runtmp.last_result.err
+    assert (
+        "count,superkingdom,phylum,class,order,family,genus,species,strain,filename,sig_name,sig_md5,total_counts\n"
+        in outdata
+    )
+    assert (
+        "200,Bacteria,Proteobacteria,Gammaproteobacteria,Alteromonadales,Alteromonadaceae,Alteromonas,Alteromonas_macleodii,,"
+        + os.path.join(in_dir, "q.sig")
+        + ",TARA_ASE_MAG_00031,5b438c6c858cdaf9e9b05a207fa3f9f0,200.0\n"
+        in outdata
+    )
     print(outdata)
 
 
 def test_summarize_unknown_hashes_to_output_check_total_counts(runtmp, lca_db_format):
-    taxcsv = utils.get_test_data('lca-root/tax.csv')
-    input_sig1 = utils.get_test_data('lca-root/TARA_MED_MAG_00029.fa.sig')
-    input_sig2 = utils.get_test_data('lca-root/TOBG_MED-875.fna.gz.sig')
-    lca_db = runtmp.output(f'lca-root.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca-root/tax.csv")
+    input_sig1 = utils.get_test_data("lca-root/TARA_MED_MAG_00029.fa.sig")
+    input_sig2 = utils.get_test_data("lca-root/TOBG_MED-875.fna.gz.sig")
+    lca_db = runtmp.output(f"lca-root.lca.{lca_db_format}")
 
-    cmd = ['lca', 'index', taxcsv, lca_db, input_sig2, '-F', lca_db_format]
+    cmd = ["lca", "index", taxcsv, lca_db, input_sig2, "-F", lca_db_format]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -1854,78 +2204,89 @@ def test_summarize_unknown_hashes_to_output_check_total_counts(runtmp, lca_db_fo
 
     assert os.path.exists(lca_db)
 
-    assert '1 identifiers used out of 2 distinct identifiers in spreadsheet.' in runtmp.last_result.err
+    assert (
+        "1 identifiers used out of 2 distinct identifiers in spreadsheet."
+        in runtmp.last_result.err
+    )
 
-    cmd = ['lca', 'summarize', '--db', lca_db, '--query', input_sig1,
-           '-o', 'out.csv']
+    cmd = ["lca", "summarize", "--db", lca_db, "--query", input_sig1, "-o", "out.csv"]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert '(root)' not in runtmp.last_result.out
-    assert '11.5%    27   Archaea;Euryarcheoata;unassigned;unassigned;novelFamily_I' in runtmp.last_result.out
+    assert "(root)" not in runtmp.last_result.out
+    assert (
+        "11.5%    27   Archaea;Euryarcheoata;unassigned;unassigned;novelFamily_I"
+        in runtmp.last_result.out
+    )
 
-    with open(runtmp.output('out.csv'), newline="") as fp:
+    with open(runtmp.output("out.csv"), newline="") as fp:
         r = csv.DictReader(fp)
         rows = list(r)
-        pairs = [ (row['count'], row['total_counts']) for row in rows ]
-        pairs = [ (float(x), float(y)) for x, y in pairs ]
+        pairs = [(row["count"], row["total_counts"]) for row in rows]
+        pairs = [(float(x), float(y)) for x, y in pairs]
         pairs = set(pairs)
 
-        assert pairs == { (27.0, 234.0) }
+        assert pairs == {(27.0, 234.0)}
 
 
 def test_single_summarize_scaled(runtmp):
-    db1 = utils.get_test_data('lca/delmont-1.lca.json')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    in_dir = runtmp.output('sigs')
+    db1 = utils.get_test_data("lca/delmont-1.lca.json")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    in_dir = runtmp.output("sigs")
     os.mkdir(in_dir)
-    shutil.copyfile(input_sig, os.path.join(in_dir, 'q.sig'))
+    shutil.copyfile(input_sig, os.path.join(in_dir, "q.sig"))
 
-    cmd = ['lca', 'summarize', '--db', db1, '--query', input_sig,
-            '--scaled', '100000']
+    cmd = ["lca", "summarize", "--db", db1, "--query", input_sig, "--scaled", "100000"]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'loaded 1 signatures from 1 files total.' in runtmp.last_result.err
-    assert '100.0%    27   Bacteria;Proteobacteria;Gammaproteobacteria;Alteromonadales'
+    assert "loaded 1 signatures from 1 files total." in runtmp.last_result.err
+    assert "100.0%    27   Bacteria;Proteobacteria;Gammaproteobacteria;Alteromonadales"
 
 
 def test_single_summarize_scaled_zip_query(runtmp):
     # check zipfile as query
-    db1 = utils.get_test_data('lca/delmont-1.lca.json')
-    input_sig = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
+    db1 = utils.get_test_data("lca/delmont-1.lca.json")
+    input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
 
     query_ss = sourmash.load_one_signature(input_sig, ksize=31)
-    query_zipfile = runtmp.output('query.zip')
+    query_zipfile = runtmp.output("query.zip")
     with sourmash_args.SaveSignaturesToLocation(query_zipfile) as save_sig:
         save_sig.add(query_ss)
 
-    cmd = ['lca', 'summarize', '--db', db1, '--query', query_zipfile,
-            '--scaled', '100000']
+    cmd = [
+        "lca",
+        "summarize",
+        "--db",
+        db1,
+        "--query",
+        query_zipfile,
+        "--scaled",
+        "100000",
+    ]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'loaded 1 signatures from 1 files total.' in runtmp.last_result.err
-    assert '100.0%    27   Bacteria;Proteobacteria;Gammaproteobacteria;Alteromonadales'
+    assert "loaded 1 signatures from 1 files total." in runtmp.last_result.err
+    assert "100.0%    27   Bacteria;Proteobacteria;Gammaproteobacteria;Alteromonadales"
 
 
 def test_multi_summarize_with_unassigned_singleton(runtmp, lca_db_format):
-    taxcsv = utils.get_test_data('lca/delmont-6.csv')
-    input_sig1 = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    input_sig2 = utils.get_test_data('lca/TARA_PSW_MAG_00136.sig')
-    lca_db = runtmp.output(f'delmont-1.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca/delmont-6.csv")
+    input_sig1 = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    input_sig2 = utils.get_test_data("lca/TARA_PSW_MAG_00136.sig")
+    lca_db = runtmp.output(f"delmont-1.lca.{lca_db_format}")
 
-    cmd = ['lca', 'index', taxcsv, lca_db, input_sig1, input_sig2,
-           '-F', lca_db_format]
+    cmd = ["lca", "index", taxcsv, lca_db, input_sig1, input_sig2, "-F", lca_db_format]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -1934,21 +2295,39 @@ def test_multi_summarize_with_unassigned_singleton(runtmp, lca_db_format):
 
     assert os.path.exists(lca_db)
 
-    assert "** assuming column 'MAGs' is identifiers in spreadsheet" in runtmp.last_result.err
-    assert "** assuming column 'Domain' is superkingdom in spreadsheet" in runtmp.last_result.err
-    assert '2 identifiers used out of 2 distinct identifiers in spreadsheet.' in runtmp.last_result.err
-
-    cmd = ['lca', 'summarize', '--db', lca_db, '--query', input_sig1,
-            input_sig2, '--ignore-abundance']
+    assert (
+        "** assuming column 'MAGs' is identifiers in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "** assuming column 'Domain' is superkingdom in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "2 identifiers used out of 2 distinct identifiers in spreadsheet."
+        in runtmp.last_result.err
+    )
+
+    cmd = [
+        "lca",
+        "summarize",
+        "--db",
+        lca_db,
+        "--query",
+        input_sig1,
+        input_sig2,
+        "--ignore-abundance",
+    ]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'loaded 2 signatures from 2 files total.' in runtmp.last_result.err
+    assert "loaded 2 signatures from 2 files total." in runtmp.last_result.err
 
     out_lines = runtmp.last_result.out.splitlines()
+
     def remove_line_startswith(x, check=None):
         for line in out_lines:
             if line.startswith(x):
@@ -1957,32 +2336,45 @@ def remove_line_startswith(x, check=None):
                     # make sure the check value is in there
                     assert check in line
                 return line
-        assert 0, "couldn't find {}".format(x)
+        assert 0, f"couldn't find {x}"
 
     # note, proportions/percentages are now per-file
-    remove_line_startswith('100.0%   200   Bacteria ', 'TARA_ASE_MAG_00031.sig:5b438c6c')
-    remove_line_startswith('100.0%   200   Bacteria;Proteobacteria;unassigned;unassigned ')
-    remove_line_startswith('100.0%  1231   Eukaryota;Chlorophyta ')
-    remove_line_startswith('100.0%  1231   Eukaryota ', 'TARA_PSW_MAG_00136.sig:db50b713')
-    remove_line_startswith('100.0%   200   Bacteria;Proteobacteria ')
-    remove_line_startswith('100.0%   200   Bacteria;Proteobacteria;unassigned ')
-    remove_line_startswith('100.0%  1231   Eukaryota;Chlorophyta;Prasinophyceae ')
-    remove_line_startswith('100.0%   200   Bacteria;Proteobacteria;unassigned;unassigned;Alteromonadaceae ')
-    remove_line_startswith('100.0%  1231   Eukaryota;Chlorophyta;Prasinophyceae;unassigned;unassigned ')
-    remove_line_startswith('100.0%  1231   Eukaryota;Chlorophyta;Prasinophyceae;unassigned ')
-    remove_line_startswith('100.0%  1231   Eukaryota;Chlorophyta;Prasinophyceae;unassigned;unassigned;Ostreococcus ')
+    remove_line_startswith(
+        "100.0%   200   Bacteria ", "TARA_ASE_MAG_00031.sig:5b438c6c"
+    )
+    remove_line_startswith(
+        "100.0%   200   Bacteria;Proteobacteria;unassigned;unassigned "
+    )
+    remove_line_startswith("100.0%  1231   Eukaryota;Chlorophyta ")
+    remove_line_startswith(
+        "100.0%  1231   Eukaryota ", "TARA_PSW_MAG_00136.sig:db50b713"
+    )
+    remove_line_startswith("100.0%   200   Bacteria;Proteobacteria ")
+    remove_line_startswith("100.0%   200   Bacteria;Proteobacteria;unassigned ")
+    remove_line_startswith("100.0%  1231   Eukaryota;Chlorophyta;Prasinophyceae ")
+    remove_line_startswith(
+        "100.0%   200   Bacteria;Proteobacteria;unassigned;unassigned;Alteromonadaceae "
+    )
+    remove_line_startswith(
+        "100.0%  1231   Eukaryota;Chlorophyta;Prasinophyceae;unassigned;unassigned "
+    )
+    remove_line_startswith(
+        "100.0%  1231   Eukaryota;Chlorophyta;Prasinophyceae;unassigned "
+    )
+    remove_line_startswith(
+        "100.0%  1231   Eukaryota;Chlorophyta;Prasinophyceae;unassigned;unassigned;Ostreococcus "
+    )
     assert not out_lines
 
 
 def test_multi_summarize_with_zip_unassigned_singleton(runtmp, lca_db_format):
     # test summarize on multiple queries, in a zipfile.
-    taxcsv = utils.get_test_data('lca/delmont-6.csv')
-    input_sig1 = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    input_sig2 = utils.get_test_data('lca/TARA_PSW_MAG_00136.sig')
-    lca_db = runtmp.output(f'delmont-1.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca/delmont-6.csv")
+    input_sig1 = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    input_sig2 = utils.get_test_data("lca/TARA_PSW_MAG_00136.sig")
+    lca_db = runtmp.output(f"delmont-1.lca.{lca_db_format}")
 
-    cmd = ['lca', 'index', taxcsv, lca_db, input_sig1, input_sig2,
-           '-F', lca_db_format]
+    cmd = ["lca", "index", taxcsv, lca_db, input_sig1, input_sig2, "-F", lca_db_format]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -1991,31 +2383,48 @@ def test_multi_summarize_with_zip_unassigned_singleton(runtmp, lca_db_format):
 
     assert os.path.exists(lca_db)
 
-    assert "** assuming column 'MAGs' is identifiers in spreadsheet" in runtmp.last_result.err
-    assert "** assuming column 'Domain' is superkingdom in spreadsheet" in runtmp.last_result.err
-    assert '2 identifiers used out of 2 distinct identifiers in spreadsheet.' in runtmp.last_result.err
-
-    query_zipfile = runtmp.output('query.zip')
+    assert (
+        "** assuming column 'MAGs' is identifiers in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "** assuming column 'Domain' is superkingdom in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "2 identifiers used out of 2 distinct identifiers in spreadsheet."
+        in runtmp.last_result.err
+    )
+
+    query_zipfile = runtmp.output("query.zip")
     with sourmash_args.SaveSignaturesToLocation(query_zipfile) as save_sig:
-        input_sig1 = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
+        input_sig1 = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
         sig1 = sourmash.load_one_signature(input_sig1, ksize=31)
-        input_sig2 = utils.get_test_data('lca/TARA_PSW_MAG_00136.sig')
+        input_sig2 = utils.get_test_data("lca/TARA_PSW_MAG_00136.sig")
         sig2 = sourmash.load_one_signature(input_sig2, ksize=31)
 
         save_sig.add(sig1)
         save_sig.add(sig2)
 
-    cmd = ['lca', 'summarize', '--db', lca_db, '--query', 'query.zip',
-           '--ignore-abundance']
+    cmd = [
+        "lca",
+        "summarize",
+        "--db",
+        lca_db,
+        "--query",
+        "query.zip",
+        "--ignore-abundance",
+    ]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'loaded 2 signatures from 1 files total.' in runtmp.last_result.err
+    assert "loaded 2 signatures from 1 files total." in runtmp.last_result.err
 
     out_lines = runtmp.last_result.out.splitlines()
+
     def remove_line_startswith(x, check=None):
         for line in out_lines:
             if line.startswith(x):
@@ -2024,31 +2433,40 @@ def remove_line_startswith(x, check=None):
                     # make sure the check value is in there
                     assert check in line
                 return line
-        assert 0, "couldn't find {}".format(x)
+        assert 0, f"couldn't find {x}"
 
     # note, proportions/percentages are now per-file
-    remove_line_startswith('100.0%   200   Bacteria ', ':5b438c6c')
-    remove_line_startswith('100.0%   200   Bacteria;Proteobacteria;unassigned;unassigned ')
-    remove_line_startswith('100.0%  1231   Eukaryota;Chlorophyta ')
-    remove_line_startswith('100.0%  1231   Eukaryota ', ':db50b713')
-    remove_line_startswith('100.0%   200   Bacteria;Proteobacteria ')
-    remove_line_startswith('100.0%   200   Bacteria;Proteobacteria;unassigned ')
-    remove_line_startswith('100.0%  1231   Eukaryota;Chlorophyta;Prasinophyceae ')
-    remove_line_startswith('100.0%   200   Bacteria;Proteobacteria;unassigned;unassigned;Alteromonadaceae ')
-    remove_line_startswith('100.0%  1231   Eukaryota;Chlorophyta;Prasinophyceae;unassigned;unassigned ')
-    remove_line_startswith('100.0%  1231   Eukaryota;Chlorophyta;Prasinophyceae;unassigned ')
-    remove_line_startswith('100.0%  1231   Eukaryota;Chlorophyta;Prasinophyceae;unassigned;unassigned;Ostreococcus ')
+    remove_line_startswith("100.0%   200   Bacteria ", ":5b438c6c")
+    remove_line_startswith(
+        "100.0%   200   Bacteria;Proteobacteria;unassigned;unassigned "
+    )
+    remove_line_startswith("100.0%  1231   Eukaryota;Chlorophyta ")
+    remove_line_startswith("100.0%  1231   Eukaryota ", ":db50b713")
+    remove_line_startswith("100.0%   200   Bacteria;Proteobacteria ")
+    remove_line_startswith("100.0%   200   Bacteria;Proteobacteria;unassigned ")
+    remove_line_startswith("100.0%  1231   Eukaryota;Chlorophyta;Prasinophyceae ")
+    remove_line_startswith(
+        "100.0%   200   Bacteria;Proteobacteria;unassigned;unassigned;Alteromonadaceae "
+    )
+    remove_line_startswith(
+        "100.0%  1231   Eukaryota;Chlorophyta;Prasinophyceae;unassigned;unassigned "
+    )
+    remove_line_startswith(
+        "100.0%  1231   Eukaryota;Chlorophyta;Prasinophyceae;unassigned "
+    )
+    remove_line_startswith(
+        "100.0%  1231   Eukaryota;Chlorophyta;Prasinophyceae;unassigned;unassigned;Ostreococcus "
+    )
     assert not out_lines
 
 
 def test_summarize_to_root(runtmp, lca_db_format):
-    taxcsv = utils.get_test_data('lca-root/tax.csv')
-    input_sig1 = utils.get_test_data('lca-root/TARA_MED_MAG_00029.fa.sig')
-    input_sig2 = utils.get_test_data('lca-root/TOBG_MED-875.fna.gz.sig')
-    lca_db = runtmp.output(f'lca-root.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca-root/tax.csv")
+    input_sig1 = utils.get_test_data("lca-root/TARA_MED_MAG_00029.fa.sig")
+    input_sig2 = utils.get_test_data("lca-root/TOBG_MED-875.fna.gz.sig")
+    lca_db = runtmp.output(f"lca-root.lca.{lca_db_format}")
 
-    cmd = ['lca', 'index', taxcsv, lca_db, input_sig1, input_sig2,
-           '-F', lca_db_format]
+    cmd = ["lca", "index", taxcsv, lca_db, input_sig1, input_sig2, "-F", lca_db_format]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -2057,27 +2475,37 @@ def test_summarize_to_root(runtmp, lca_db_format):
 
     assert os.path.exists(lca_db)
 
-    assert '2 identifiers used out of 2 distinct identifiers in spreadsheet.' in runtmp.last_result.err
-
-    cmd = ['lca', 'summarize', '--db', lca_db, '--query', input_sig2,
-            '--ignore-abundance']
+    assert (
+        "2 identifiers used out of 2 distinct identifiers in spreadsheet."
+        in runtmp.last_result.err
+    )
+
+    cmd = [
+        "lca",
+        "summarize",
+        "--db",
+        lca_db,
+        "--query",
+        input_sig2,
+        "--ignore-abundance",
+    ]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert '78.6%    99   Archaea' in runtmp.last_result.out
-    assert '21.4%    27   (root)' in runtmp.last_result.out
+    assert "78.6%    99   Archaea" in runtmp.last_result.out
+    assert "21.4%    27   (root)" in runtmp.last_result.out
 
 
 def test_summarize_unknown_hashes(runtmp, lca_db_format):
-    taxcsv = utils.get_test_data('lca-root/tax.csv')
-    input_sig1 = utils.get_test_data('lca-root/TARA_MED_MAG_00029.fa.sig')
-    input_sig2 = utils.get_test_data('lca-root/TOBG_MED-875.fna.gz.sig')
-    lca_db = runtmp.output(f'lca-root.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca-root/tax.csv")
+    input_sig1 = utils.get_test_data("lca-root/TARA_MED_MAG_00029.fa.sig")
+    input_sig2 = utils.get_test_data("lca-root/TOBG_MED-875.fna.gz.sig")
+    lca_db = runtmp.output(f"lca-root.lca.{lca_db_format}")
 
-    cmd = ['lca', 'index', taxcsv, lca_db, input_sig2, '-F', lca_db_format]
+    cmd = ["lca", "index", taxcsv, lca_db, input_sig2, "-F", lca_db_format]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -2086,27 +2514,32 @@ def test_summarize_unknown_hashes(runtmp, lca_db_format):
 
     assert os.path.exists(lca_db)
 
-    assert '1 identifiers used out of 2 distinct identifiers in spreadsheet.' in runtmp.last_result.err
+    assert (
+        "1 identifiers used out of 2 distinct identifiers in spreadsheet."
+        in runtmp.last_result.err
+    )
 
-    cmd = ['lca', 'summarize', '--db', lca_db, '--query', input_sig1]
+    cmd = ["lca", "summarize", "--db", lca_db, "--query", input_sig1]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert '(root)' not in runtmp.last_result.out
-    assert '11.5%    27   Archaea;Euryarcheoata;unassigned;unassigned;novelFamily_I' in runtmp.last_result.out
+    assert "(root)" not in runtmp.last_result.out
+    assert (
+        "11.5%    27   Archaea;Euryarcheoata;unassigned;unassigned;novelFamily_I"
+        in runtmp.last_result.out
+    )
 
 
 def test_summarize_to_root_abund(runtmp, lca_db_format):
-    taxcsv = utils.get_test_data('lca-root/tax.csv')
-    input_sig1 = utils.get_test_data('lca-root/TARA_MED_MAG_00029.fa.sig')
-    input_sig2 = utils.get_test_data('lca-root/TOBG_MED-875.fna.gz.sig')
-    lca_db = runtmp.output(f'lca-root.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca-root/tax.csv")
+    input_sig1 = utils.get_test_data("lca-root/TARA_MED_MAG_00029.fa.sig")
+    input_sig2 = utils.get_test_data("lca-root/TOBG_MED-875.fna.gz.sig")
+    lca_db = runtmp.output(f"lca-root.lca.{lca_db_format}")
 
-    cmd = ['lca', 'index', taxcsv, lca_db, input_sig1, input_sig2,
-           '-F', lca_db_format]
+    cmd = ["lca", "index", taxcsv, lca_db, input_sig1, input_sig2, "-F", lca_db_format]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -2115,26 +2548,29 @@ def test_summarize_to_root_abund(runtmp, lca_db_format):
 
     assert os.path.exists(lca_db)
 
-    assert '2 identifiers used out of 2 distinct identifiers in spreadsheet.' in runtmp.last_result.err
+    assert (
+        "2 identifiers used out of 2 distinct identifiers in spreadsheet."
+        in runtmp.last_result.err
+    )
 
-    cmd = ['lca', 'summarize', '--db', lca_db, '--query', input_sig2]
+    cmd = ["lca", "summarize", "--db", lca_db, "--query", input_sig2]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert '78.9%   101   Archaea' in runtmp.last_result.out
-    assert '21.1%    27   (root)' in runtmp.last_result.out
+    assert "78.9%   101   Archaea" in runtmp.last_result.out
+    assert "21.1%    27   (root)" in runtmp.last_result.out
 
 
 def test_summarize_unknown_hashes_abund(runtmp, lca_db_format):
-    taxcsv = utils.get_test_data('lca-root/tax.csv')
-    input_sig1 = utils.get_test_data('lca-root/TARA_MED_MAG_00029.fa.sig')
-    input_sig2 = utils.get_test_data('lca-root/TOBG_MED-875.fna.gz.sig')
-    lca_db = runtmp.output(f'lca-root.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca-root/tax.csv")
+    input_sig1 = utils.get_test_data("lca-root/TARA_MED_MAG_00029.fa.sig")
+    input_sig2 = utils.get_test_data("lca-root/TOBG_MED-875.fna.gz.sig")
+    lca_db = runtmp.output(f"lca-root.lca.{lca_db_format}")
 
-    cmd = ['lca', 'index', taxcsv, lca_db, input_sig2, '-F', lca_db_format]
+    cmd = ["lca", "index", taxcsv, lca_db, input_sig2, "-F", lca_db_format]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -2143,61 +2579,73 @@ def test_summarize_unknown_hashes_abund(runtmp, lca_db_format):
 
     assert os.path.exists(lca_db)
 
-    assert '1 identifiers used out of 2 distinct identifiers in spreadsheet.' in runtmp.last_result.err
+    assert (
+        "1 identifiers used out of 2 distinct identifiers in spreadsheet."
+        in runtmp.last_result.err
+    )
 
-    cmd = ['lca', 'summarize', '--db', lca_db, '--query', input_sig1]
+    cmd = ["lca", "summarize", "--db", lca_db, "--query", input_sig1]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert '(root)' not in runtmp.last_result.out
-    assert '11.5%    27   Archaea;Euryarcheoata;unassigned;unassigned;novelFamily_I' in runtmp.last_result.out
+    assert "(root)" not in runtmp.last_result.out
+    assert (
+        "11.5%    27   Archaea;Euryarcheoata;unassigned;unassigned;novelFamily_I"
+        in runtmp.last_result.out
+    )
 
 
 @utils.in_thisdir
 def test_summarize_abund_hmp(c):
     # test lca summarize --with-abundance on some real data
-    queryfile = utils.get_test_data('hmp-sigs/G36354.sig.gz')
-    dbname = utils.get_test_data('hmp-sigs/G36354-matches.lca.json.gz')
+    queryfile = utils.get_test_data("hmp-sigs/G36354.sig.gz")
+    dbname = utils.get_test_data("hmp-sigs/G36354-matches.lca.json.gz")
 
-    c.run_sourmash('lca', 'summarize', '--db', dbname, '--query', queryfile)
+    c.run_sourmash("lca", "summarize", "--db", dbname, "--query", queryfile)
 
-    assert '32.1%  1080   p__Firmicutes;c__Bacilli;o__Lactobacillales' in c.last_result.out
+    assert (
+        "32.1%  1080   p__Firmicutes;c__Bacilli;o__Lactobacillales" in c.last_result.out
+    )
 
 
 @utils.in_thisdir
 def test_summarize_abund_fake_no_abund(c):
     # test lca summarize on some known/fake data; see docs for explanation.
-    queryfile = utils.get_test_data('fake-abund/query.sig.gz')
-    dbname = utils.get_test_data('fake-abund/matches.lca.json.gz')
+    queryfile = utils.get_test_data("fake-abund/query.sig.gz")
+    dbname = utils.get_test_data("fake-abund/matches.lca.json.gz")
 
-    c.run_sourmash('lca', 'summarize', '--db', dbname, '--query', queryfile,
-                   '--ignore-abundance')
+    c.run_sourmash(
+        "lca", "summarize", "--db", dbname, "--query", queryfile, "--ignore-abundance"
+    )
 
-    assert 'NOTE: discarding abundances in query, since --ignore-abundance' in c.last_result.err
-    assert '79.6%   550   Bacteria' in c.last_result.out
-    assert '20.4%   141   Archaea' in c.last_result.out
+    assert (
+        "NOTE: discarding abundances in query, since --ignore-abundance"
+        in c.last_result.err
+    )
+    assert "79.6%   550   Bacteria" in c.last_result.out
+    assert "20.4%   141   Archaea" in c.last_result.out
 
 
 @utils.in_thisdir
 def test_summarize_abund_fake_yes_abund(c):
     # test lca summarize abundance weighting on some known/fake data
-    queryfile = utils.get_test_data('fake-abund/query.sig.gz')
-    dbname = utils.get_test_data('fake-abund/matches.lca.json.gz')
+    queryfile = utils.get_test_data("fake-abund/query.sig.gz")
+    dbname = utils.get_test_data("fake-abund/matches.lca.json.gz")
 
-    c.run_sourmash('lca', 'summarize', '--db', dbname, '--query', queryfile)
+    c.run_sourmash("lca", "summarize", "--db", dbname, "--query", queryfile)
 
-    assert '43.2%   563   Bacteria' in c.last_result.out
-    assert '56.8%   740   Archaea' in c.last_result.out
+    assert "43.2%   563   Bacteria" in c.last_result.out
+    assert "56.8%   740   Archaea" in c.last_result.out
 
 
 def test_rankinfo_on_multi(runtmp):
-    db1 = utils.get_test_data('lca/dir1.lca.json')
-    db2 = utils.get_test_data('lca/dir2.lca.json')
+    db1 = utils.get_test_data("lca/dir1.lca.json")
+    db2 = utils.get_test_data("lca/dir2.lca.json")
 
-    cmd = ['lca', 'rankinfo', db1, db2]
+    cmd = ["lca", "rankinfo", db1, db2]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -2205,22 +2653,22 @@ def test_rankinfo_on_multi(runtmp):
     print(runtmp.last_result.err)
 
     lines = runtmp.last_result.out.splitlines()
-    lines.remove('superkingdom: 0 (0.0%)')
-    lines.remove('phylum: 464 (12.8%)')
-    lines.remove('class: 533 (14.7%)')
-    lines.remove('order: 1050 (29.0%)')
-    lines.remove('family: 695 (19.2%)')
-    lines.remove('genus: 681 (18.8%)')
-    lines.remove('species: 200 (5.5%)')
-    lines.remove('strain: 0 (0.0%)')
+    lines.remove("superkingdom: 0 (0.0%)")
+    lines.remove("phylum: 464 (12.8%)")
+    lines.remove("class: 533 (14.7%)")
+    lines.remove("order: 1050 (29.0%)")
+    lines.remove("family: 695 (19.2%)")
+    lines.remove("genus: 681 (18.8%)")
+    lines.remove("species: 200 (5.5%)")
+    lines.remove("strain: 0 (0.0%)")
 
     assert not lines
 
 
 def test_rankinfo_on_single(runtmp):
-    db1 = utils.get_test_data('lca/both.lca.json')
+    db1 = utils.get_test_data("lca/both.lca.json")
 
-    cmd = ['lca', 'rankinfo', db1]
+    cmd = ["lca", "rankinfo", db1]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -2228,46 +2676,55 @@ def test_rankinfo_on_single(runtmp):
     print(runtmp.last_result.err)
 
     lines = runtmp.last_result.out.splitlines()
-    lines.remove('superkingdom: 0 (0.0%)')
-    lines.remove('phylum: 464 (12.8%)')
-    lines.remove('class: 533 (14.7%)')
-    lines.remove('order: 1050 (29.0%)')
-    lines.remove('family: 695 (19.2%)')
-    lines.remove('genus: 681 (18.8%)')
-    lines.remove('species: 200 (5.5%)')
-    lines.remove('strain: 0 (0.0%)')
+    lines.remove("superkingdom: 0 (0.0%)")
+    lines.remove("phylum: 464 (12.8%)")
+    lines.remove("class: 533 (14.7%)")
+    lines.remove("order: 1050 (29.0%)")
+    lines.remove("family: 695 (19.2%)")
+    lines.remove("genus: 681 (18.8%)")
+    lines.remove("species: 200 (5.5%)")
+    lines.remove("strain: 0 (0.0%)")
 
     assert not lines
 
 
 def test_rankinfo_no_tax(runtmp, lca_db_format):
     # note: TARA_PSW_MAG_00136 is _not_ in delmont-1.csv.
-    taxcsv = utils.get_test_data('lca/delmont-1.csv')
-    input_sig = utils.get_test_data('lca/TARA_PSW_MAG_00136.sig')
-    lca_db = runtmp.output(f'delmont-1.lca.{lca_db_format}')
+    taxcsv = utils.get_test_data("lca/delmont-1.csv")
+    input_sig = utils.get_test_data("lca/TARA_PSW_MAG_00136.sig")
+    lca_db = runtmp.output(f"delmont-1.lca.{lca_db_format}")
 
-    cmd = ['lca', 'index', taxcsv, lca_db, input_sig, '-F', lca_db_format]
+    cmd = ["lca", "index", taxcsv, lca_db, input_sig, "-F", lca_db_format]
     runtmp.sourmash(*cmd)
 
-    print('cmd:', cmd)
-    print('out:', runtmp.last_result.out)
-    print('err:', runtmp.last_result.err)
+    print("cmd:", cmd)
+    print("out:", runtmp.last_result.out)
+    print("err:", runtmp.last_result.err)
 
     assert os.path.exists(lca_db)
 
-    assert "** assuming column 'MAGs' is identifiers in spreadsheet" in runtmp.last_result.err
-    assert "** assuming column 'Domain' is superkingdom in spreadsheet" in runtmp.last_result.err
-    assert '0 identifiers used out of 1 distinct identifiers in spreadsheet.' in runtmp.last_result.err
-
-    cmd = ['lca', 'rankinfo', lca_db]
+    assert (
+        "** assuming column 'MAGs' is identifiers in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "** assuming column 'Domain' is superkingdom in spreadsheet"
+        in runtmp.last_result.err
+    )
+    assert (
+        "0 identifiers used out of 1 distinct identifiers in spreadsheet."
+        in runtmp.last_result.err
+    )
+
+    cmd = ["lca", "rankinfo", lca_db]
     runtmp.sourmash(*cmd)
 
 
 def test_rankinfo_with_min(runtmp):
-    db1 = utils.get_test_data('lca/dir1.lca.json')
-    db2 = utils.get_test_data('lca/dir2.lca.json')
+    db1 = utils.get_test_data("lca/dir1.lca.json")
+    db2 = utils.get_test_data("lca/dir2.lca.json")
 
-    cmd = ['lca', 'rankinfo', db1, db2, '--minimum-num', '1']
+    cmd = ["lca", "rankinfo", db1, db2, "--minimum-num", "1"]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -2275,23 +2732,23 @@ def test_rankinfo_with_min(runtmp):
     print(runtmp.last_result.err)
 
     lines = runtmp.last_result.out.splitlines()
-    lines.remove('superkingdom: 0 (0.0%)')
-    lines.remove('phylum: 464 (12.8%)')
-    lines.remove('class: 533 (14.7%)')
-    lines.remove('order: 1050 (29.0%)')
-    lines.remove('family: 695 (19.2%)')
-    lines.remove('genus: 681 (18.8%)')
-    lines.remove('species: 200 (5.5%)')
-    lines.remove('strain: 0 (0.0%)')
+    lines.remove("superkingdom: 0 (0.0%)")
+    lines.remove("phylum: 464 (12.8%)")
+    lines.remove("class: 533 (14.7%)")
+    lines.remove("order: 1050 (29.0%)")
+    lines.remove("family: 695 (19.2%)")
+    lines.remove("genus: 681 (18.8%)")
+    lines.remove("species: 200 (5.5%)")
+    lines.remove("strain: 0 (0.0%)")
 
     assert not lines
 
 
 def test_rankinfo_with_min_2(runtmp):
-    db1 = utils.get_test_data('lca/dir1.lca.json')
-    db2 = utils.get_test_data('lca/dir2.lca.json')
+    db1 = utils.get_test_data("lca/dir1.lca.json")
+    db2 = utils.get_test_data("lca/dir2.lca.json")
 
-    cmd = ['lca', 'rankinfo', db1, db2, '--minimum-num', '2']
+    cmd = ["lca", "rankinfo", db1, db2, "--minimum-num", "2"]
     runtmp.sourmash(*cmd)
 
     print(cmd)
@@ -2302,126 +2759,186 @@ def test_rankinfo_with_min_2(runtmp):
 
 
 def test_compare_csv(runtmp):
-    a = utils.get_test_data('lca/classify-by-both.csv')
-    b = utils.get_test_data('lca/tara-delmont-SuppTable3.csv')
+    a = utils.get_test_data("lca/classify-by-both.csv")
+    b = utils.get_test_data("lca/tara-delmont-SuppTable3.csv")
 
-    cmd = ['lca', 'compare_csv', a, b, '-f']
+    cmd = ["lca", "compare_csv", a, b, "-f"]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'loaded 106 distinct lineages, 957 rows' in runtmp.last_result.err
-    assert 'missing 937 assignments in classify spreadsheet.' in runtmp.last_result.err
-    assert '20 total assignments, 0 differ between spreadsheets.' in runtmp.last_result.err
+    assert "loaded 106 distinct lineages, 957 rows" in runtmp.last_result.err
+    assert "missing 937 assignments in classify spreadsheet." in runtmp.last_result.err
+    assert (
+        "20 total assignments, 0 differ between spreadsheets." in runtmp.last_result.err
+    )
 
 
 def test_compare_csv_real(runtmp):
-    a = utils.get_test_data('lca/tully-genome-sigs.classify.csv')
-    b = utils.get_test_data('lca/tully-query.delmont-db.sigs.classify.csv')
+    a = utils.get_test_data("lca/tully-genome-sigs.classify.csv")
+    b = utils.get_test_data("lca/tully-query.delmont-db.sigs.classify.csv")
 
-    cmd = ['lca', 'compare_csv', a, b, '--start-column=3', '-f']
+    cmd = ["lca", "compare_csv", a, b, "--start-column=3", "-f"]
     runtmp.sourmash(*cmd)
 
     print(cmd)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'loaded 87 distinct lineages, 2631 rows' in runtmp.last_result.err
-    assert 'missing 71 assignments in classify spreadsheet.' in runtmp.last_result.err
-    assert 'missing 1380 assignments in custom spreadsheet.' in runtmp.last_result.err
-    assert '(these will not be evaluated any further)' in runtmp.last_result.err
-    assert '987 total assignments, 889 differ between spreadsheets.' in runtmp.last_result.err
-    assert '296 are compatible (one lineage is ancestor of another.' in runtmp.last_result.err
-    assert '593 are incompatible (there is a disagreement in the trees).' in runtmp.last_result.err
-    assert '164 incompatible at rank superkingdom' in runtmp.last_result.err
-    assert '255 incompatible at rank phylum' in runtmp.last_result.err
-    assert '107 incompatible at rank class' in runtmp.last_result.err
-    assert '54 incompatible at rank order' in runtmp.last_result.err
-    assert '13 incompatible at rank family' in runtmp.last_result.err
-    assert '0 incompatible at rank genus' in runtmp.last_result.err
-    assert '0 incompatible at rank species' in runtmp.last_result.err
+    assert "loaded 87 distinct lineages, 2631 rows" in runtmp.last_result.err
+    assert "missing 71 assignments in classify spreadsheet." in runtmp.last_result.err
+    assert "missing 1380 assignments in custom spreadsheet." in runtmp.last_result.err
+    assert "(these will not be evaluated any further)" in runtmp.last_result.err
+    assert (
+        "987 total assignments, 889 differ between spreadsheets."
+        in runtmp.last_result.err
+    )
+    assert (
+        "296 are compatible (one lineage is ancestor of another."
+        in runtmp.last_result.err
+    )
+    assert (
+        "593 are incompatible (there is a disagreement in the trees)."
+        in runtmp.last_result.err
+    )
+    assert "164 incompatible at rank superkingdom" in runtmp.last_result.err
+    assert "255 incompatible at rank phylum" in runtmp.last_result.err
+    assert "107 incompatible at rank class" in runtmp.last_result.err
+    assert "54 incompatible at rank order" in runtmp.last_result.err
+    assert "13 incompatible at rank family" in runtmp.last_result.err
+    assert "0 incompatible at rank genus" in runtmp.last_result.err
+    assert "0 incompatible at rank species" in runtmp.last_result.err
 
 
 def test_incompat_lca_db_ksize_2_fail(runtmp, lca_db_format):
     # test on gather - create a database with ksize of 25 => fail
     # because of incompatibility.
     c = runtmp
-    testdata1 = utils.get_test_data('lca/TARA_ASE_MAG_00031.fa.gz')
-    c.run_sourmash('sketch', 'dna', '-p', 'k=25,scaled=1000', testdata1,
-                   '-o', 'test_db.sig')
+    testdata1 = utils.get_test_data("lca/TARA_ASE_MAG_00031.fa.gz")
+    c.run_sourmash(
+        "sketch", "dna", "-p", "k=25,scaled=1000", testdata1, "-o", "test_db.sig"
+    )
     print(c)
 
-    c.run_sourmash('lca', 'index', utils.get_test_data('lca/delmont-1.csv',),
-                   f'test.lca.{lca_db_format}', 'test_db.sig',
-                    '-k', '25', '--scaled', '10000',
-                   '-F', lca_db_format)
+    c.run_sourmash(
+        "lca",
+        "index",
+        utils.get_test_data(
+            "lca/delmont-1.csv",
+        ),
+        f"test.lca.{lca_db_format}",
+        "test_db.sig",
+        "-k",
+        "25",
+        "--scaled",
+        "10000",
+        "-F",
+        lca_db_format,
+    )
     print(c)
 
     # this should fail: the LCA database has ksize 25, and the query sig has
     # no compatible ksizes.
-    with pytest.raises(SourmashCommandFailed) as e:
-        c.run_sourmash('gather', utils.get_test_data('lca/TARA_ASE_MAG_00031.sig'), f'test.lca.{lca_db_format}')
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash(
+            "gather",
+            utils.get_test_data("lca/TARA_ASE_MAG_00031.sig"),
+            f"test.lca.{lca_db_format}",
+        )
 
     err = c.last_result.err
     print(err)
 
-    if lca_db_format == 'sql':
+    if lca_db_format == "sql":
         assert "no compatible signatures found in 'test.lca.sql'" in err
     else:
         assert "ERROR: cannot use 'test.lca.json' for this query." in err
-        assert "ksize on this database is 25; this is different from requested ksize of 31"
+        assert (
+            "ksize on this database is 25; this is different from requested ksize of 31"
+        )
 
 
 def test_incompat_lca_db_ksize_2_nofail(runtmp, lca_db_format):
     # test on gather - create a database with ksize of 25, no fail
     # because of --no-fail-on-empty-databases
     c = runtmp
-    testdata1 = utils.get_test_data('lca/TARA_ASE_MAG_00031.fa.gz')
-    c.run_sourmash('sketch', 'dna', '-p', 'k=25,scaled=1000', testdata1,
-                   '-o', 'test_db.sig')
+    testdata1 = utils.get_test_data("lca/TARA_ASE_MAG_00031.fa.gz")
+    c.run_sourmash(
+        "sketch", "dna", "-p", "k=25,scaled=1000", testdata1, "-o", "test_db.sig"
+    )
     print(c)
 
-    c.run_sourmash('lca', 'index', utils.get_test_data('lca/delmont-1.csv',),
-                   f'test.lca.{lca_db_format}', 'test_db.sig',
-                    '-k', '25', '--scaled', '10000',
-                   '-F', lca_db_format)
+    c.run_sourmash(
+        "lca",
+        "index",
+        utils.get_test_data(
+            "lca/delmont-1.csv",
+        ),
+        f"test.lca.{lca_db_format}",
+        "test_db.sig",
+        "-k",
+        "25",
+        "--scaled",
+        "10000",
+        "-F",
+        lca_db_format,
+    )
     print(c)
 
     # this should not fail despite mismatched ksize, b/c of --no-fail flag.
-    c.run_sourmash('gather', utils.get_test_data('lca/TARA_ASE_MAG_00031.sig'), f'test.lca.{lca_db_format}', '--no-fail-on-empty-database')
+    c.run_sourmash(
+        "gather",
+        utils.get_test_data("lca/TARA_ASE_MAG_00031.sig"),
+        f"test.lca.{lca_db_format}",
+        "--no-fail-on-empty-database",
+    )
 
     err = c.last_result.err
     print(err)
 
-    if lca_db_format == 'sql':
+    if lca_db_format == "sql":
         assert "no compatible signatures found in 'test.lca.sql'" in err
     else:
         assert "ERROR: cannot use 'test.lca.json' for this query." in err
-        assert "ksize on this database is 25; this is different from requested ksize of 31"
+        assert (
+            "ksize on this database is 25; this is different from requested ksize of 31"
+        )
 
 
 def test_lca_index_empty(runtmp, lca_db_format):
     c = runtmp
     # test lca index with an empty taxonomy CSV, followed by a load & gather.
-    sig2file = utils.get_test_data('2.fa.sig')
-    sig47file = utils.get_test_data('47.fa.sig')
-    sig63file = utils.get_test_data('63.fa.sig')
+    sig2file = utils.get_test_data("2.fa.sig")
+    sig47file = utils.get_test_data("47.fa.sig")
+    sig63file = utils.get_test_data("63.fa.sig")
 
     sig63 = load_one_signature(sig63file, ksize=31)
 
     # create an empty spreadsheet
-    with open(c.output('empty.csv'), 'wt') as fp:
-        fp.write('accession,superkingdom,phylum,class,order,family,genus,species,strain')
+    with open(c.output("empty.csv"), "w") as fp:
+        fp.write(
+            "accession,superkingdom,phylum,class,order,family,genus,species,strain"
+        )
 
     # index!
-    c.run_sourmash('lca', 'index', 'empty.csv', 'xxx',
-                   sig2file, sig47file, sig63file, '--scaled', '1000',
-                   '-F', lca_db_format)
+    c.run_sourmash(
+        "lca",
+        "index",
+        "empty.csv",
+        "xxx",
+        sig2file,
+        sig47file,
+        sig63file,
+        "--scaled",
+        "1000",
+        "-F",
+        lca_db_format,
+    )
 
     # can we load and search?
-    lca_db_filename = c.output(f'xxx.lca.{lca_db_format}')
+    lca_db_filename = c.output(f"xxx.lca.{lca_db_format}")
     db, ksize, scaled = lca_utils.load_single_database(lca_db_filename)
 
     result = db.best_containment(sig63)
@@ -2434,9 +2951,9 @@ def test_lca_index_empty(runtmp, lca_db_format):
 
 def test_lca_gather_threshold_1():
     # test gather() method, in some detail; see same tests for sbt.
-    sig2file = utils.get_test_data('2.fa.sig')
-    sig47file = utils.get_test_data('47.fa.sig')
-    sig63file = utils.get_test_data('63.fa.sig')
+    sig2file = utils.get_test_data("2.fa.sig")
+    sig47file = utils.get_test_data("47.fa.sig")
+    sig63file = utils.get_test_data("63.fa.sig")
 
     sig2 = load_one_signature(sig2file, ksize=31)
     sig47 = load_one_signature(sig47file, ksize=31)
@@ -2468,7 +2985,7 @@ def test_lca_gather_threshold_1():
     containment, match_sig, name = result
     assert containment == 1.0
     assert match_sig.minhash == sig2.minhash
-    assert name == None
+    assert name is None
 
     # check with a threshold -> should be no results.
     with pytest.raises(ValueError):
@@ -2485,7 +3002,7 @@ def test_lca_gather_threshold_1():
     containment, match_sig, name = result
     assert containment == 1.0
     assert match_sig.minhash == sig2.minhash
-    assert name == None
+    assert name is None
 
     # check with a too-high threshold -> should be no results.
     with pytest.raises(ValueError):
@@ -2494,9 +3011,9 @@ def test_lca_gather_threshold_1():
 
 def test_lca_gather_threshold_5():
     # test gather() method, in some detail; see same tests for sbt.
-    sig2file = utils.get_test_data('2.fa.sig')
-    sig47file = utils.get_test_data('47.fa.sig')
-    sig63file = utils.get_test_data('63.fa.sig')
+    sig2file = utils.get_test_data("2.fa.sig")
+    sig47file = utils.get_test_data("47.fa.sig")
+    sig63file = utils.get_test_data("63.fa.sig")
 
     sig2 = load_one_signature(sig2file, ksize=31)
     sig47 = load_one_signature(sig47file, ksize=31)
@@ -2528,7 +3045,7 @@ def test_lca_gather_threshold_5():
     containment, match_sig, name = result
     assert containment == 1.0
     assert match_sig.minhash == sig2.minhash
-    assert name == None
+    assert name is None
 
     # now, check with a threshold_bp that should be meet-able.
     result = db.best_containment(SourmashSignature(new_mh), threshold_bp=5000)
@@ -2536,13 +3053,13 @@ def test_lca_gather_threshold_5():
     containment, match_sig, name = result
     assert containment == 1.0
     assert match_sig.minhash == sig2.minhash
-    assert name == None
+    assert name is None
 
 
 def test_gather_multiple_return():
-    sig2file = utils.get_test_data('2.fa.sig')
-    sig47file = utils.get_test_data('47.fa.sig')
-    sig63file = utils.get_test_data('63.fa.sig')
+    sig2file = utils.get_test_data("2.fa.sig")
+    sig47file = utils.get_test_data("47.fa.sig")
+    sig63file = utils.get_test_data("63.fa.sig")
 
     sig2 = load_one_signature(sig2file, ksize=31)
     sig47 = load_one_signature(sig47file, ksize=31)
@@ -2564,18 +3081,22 @@ def test_gather_multiple_return():
 
 def test_lca_db_protein_build():
     # test programmatic creation of LCA database with protein sigs in it
-    sigfile1 = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    sigfile2 = utils.get_test_data('prot/protein/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig')
+    sigfile1 = utils.get_test_data(
+        "prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    sigfile2 = utils.get_test_data(
+        "prot/protein/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig"
+    )
 
     sig1 = sourmash.load_one_signature(sigfile1)
     sig2 = sourmash.load_one_signature(sigfile2)
 
-    db = sourmash.lca.LCA_Database(ksize=19, scaled=100, moltype='protein')
+    db = sourmash.lca.LCA_Database(ksize=19, scaled=100, moltype="protein")
     assert db.insert(sig1)
     assert db.insert(sig2)
 
     # check reconstruction --
-    mh_list = [ x.minhash for x in db.signatures() ]
+    mh_list = [x.minhash for x in db.signatures()]
     assert len(mh_list) == 2
     assert sig1.minhash in mh_list
     assert sig2.minhash in mh_list
@@ -2591,28 +3112,32 @@ def test_lca_db_protein_build():
 @utils.in_tempdir
 def test_lca_db_protein_save_load(c):
     # test save/load of programmatically created db with protein sigs in it
-    sigfile1 = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    sigfile2 = utils.get_test_data('prot/protein/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig')
+    sigfile1 = utils.get_test_data(
+        "prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    sigfile2 = utils.get_test_data(
+        "prot/protein/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig"
+    )
 
     sig1 = sourmash.load_one_signature(sigfile1)
     sig2 = sourmash.load_one_signature(sigfile2)
 
-    db = sourmash.lca.LCA_Database(ksize=19, scaled=100, moltype='protein')
+    db = sourmash.lca.LCA_Database(ksize=19, scaled=100, moltype="protein")
     assert db.insert(sig1)
     assert db.insert(sig2)
 
-    db.save(c.output('xxx.lca.json'))
+    db.save(c.output("xxx.lca.json"))
     del db
 
-    x = sourmash.lca.lca_db.load_single_database(c.output('xxx.lca.json'))
+    x = sourmash.lca.lca_db.load_single_database(c.output("xxx.lca.json"))
     db2 = x[0]
-    assert db2.moltype == 'protein'
+    assert db2.moltype == "protein"
 
     # check reconstruction --
-    mh_list = [ x.minhash for x in db2.signatures() ]
+    mh_list = [x.minhash for x in db2.signatures()]
     assert len(mh_list) == 2
-    print('XXX', mh_list[0].ksize)
-    print('YYY', sig1.minhash.ksize)
+    print("XXX", mh_list[0].ksize)
+    print("YYY", sig1.minhash.ksize)
     assert sig1.minhash in mh_list
     assert sig2.minhash in mh_list
 
@@ -2628,26 +3153,45 @@ def test_lca_db_protein_command_index(runtmp, lca_db_format):
     # test command-line creation of LCA database with protein sigs
     c = runtmp
 
-    sigfile1 = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    sigfile2 = utils.get_test_data('prot/protein/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig')
-    lineages = utils.get_test_data('prot/gtdb-subset-lineages.csv')
-
-    db_out = c.output(f'protein.lca.{lca_db_format}')
-
-    c.run_sourmash('lca', 'index', lineages, db_out, sigfile1, sigfile2,
-                   '-C', '2', '--split-identifiers', '--require-taxonomy',
-                   '--scaled', '100', '-k', '19', '--protein',
-                   '-F', lca_db_format)
+    sigfile1 = utils.get_test_data(
+        "prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    sigfile2 = utils.get_test_data(
+        "prot/protein/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig"
+    )
+    lineages = utils.get_test_data("prot/gtdb-subset-lineages.csv")
+
+    db_out = c.output(f"protein.lca.{lca_db_format}")
+
+    c.run_sourmash(
+        "lca",
+        "index",
+        lineages,
+        db_out,
+        sigfile1,
+        sigfile2,
+        "-C",
+        "2",
+        "--split-identifiers",
+        "--require-taxonomy",
+        "--scaled",
+        "100",
+        "-k",
+        "19",
+        "--protein",
+        "-F",
+        lca_db_format,
+    )
 
     x = sourmash.lca.lca_db.load_single_database(db_out)
     db2 = x[0]
-    assert db2.moltype == 'protein'
+    assert db2.moltype == "protein"
 
     sig1 = sourmash.load_one_signature(sigfile1)
     sig2 = sourmash.load_one_signature(sigfile2)
 
     # check reconstruction --
-    mh_list = [ x.minhash for x in db2.signatures() ]
+    mh_list = [x.minhash for x in db2.signatures()]
     assert len(mh_list) == 2
     assert sig1.minhash in mh_list
     assert sig2.minhash in mh_list
@@ -2664,31 +3208,37 @@ def test_lca_db_protein_command_index(runtmp, lca_db_format):
 def test_lca_db_protein_command_search(c):
     # test command-line search/gather of LCA database with protein sigs
     # (LCA database created as above)
-    sigfile1 = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    db_out = utils.get_test_data('prot/protein.lca.json.gz')
+    sigfile1 = utils.get_test_data(
+        "prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    db_out = utils.get_test_data("prot/protein.lca.json.gz")
 
-    c.run_sourmash('search', sigfile1, db_out, '--threshold', '0.0')
-    assert '2 matches' in c.last_result.out
+    c.run_sourmash("search", sigfile1, db_out, "--threshold", "0.0")
+    assert "2 matches" in c.last_result.out
 
-    c.run_sourmash('gather', sigfile1, db_out)
-    assert 'found 1 matches total' in c.last_result.out
-    assert 'the recovered matches hit 100.0% of the query' in c.last_result.out
+    c.run_sourmash("gather", sigfile1, db_out)
+    assert "found 1 matches total" in c.last_result.out
+    assert "the recovered matches hit 100.0% of the query" in c.last_result.out
 
 
 def test_lca_db_hp_build():
     # test programmatic creation of LCA database with hp sigs in it
-    sigfile1 = utils.get_test_data('prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    sigfile2 = utils.get_test_data('prot/hp/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig')
+    sigfile1 = utils.get_test_data(
+        "prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    sigfile2 = utils.get_test_data(
+        "prot/hp/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig"
+    )
 
     sig1 = sourmash.load_one_signature(sigfile1)
     sig2 = sourmash.load_one_signature(sigfile2)
 
-    db = sourmash.lca.LCA_Database(ksize=19, scaled=100, moltype='hp')
+    db = sourmash.lca.LCA_Database(ksize=19, scaled=100, moltype="hp")
     assert db.insert(sig1)
     assert db.insert(sig2)
 
     # check reconstruction --
-    mh_list = [ x.minhash for x in db.signatures() ]
+    mh_list = [x.minhash for x in db.signatures()]
     assert len(mh_list) == 2
     assert sig1.minhash in mh_list
     assert sig2.minhash in mh_list
@@ -2704,25 +3254,29 @@ def test_lca_db_hp_build():
 @utils.in_tempdir
 def test_lca_db_hp_save_load(c):
     # test save/load of programmatically created db with hp sigs in it
-    sigfile1 = utils.get_test_data('prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    sigfile2 = utils.get_test_data('prot/hp/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig')
+    sigfile1 = utils.get_test_data(
+        "prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    sigfile2 = utils.get_test_data(
+        "prot/hp/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig"
+    )
 
     sig1 = sourmash.load_one_signature(sigfile1)
     sig2 = sourmash.load_one_signature(sigfile2)
 
-    db = sourmash.lca.LCA_Database(ksize=19, scaled=100, moltype='hp')
+    db = sourmash.lca.LCA_Database(ksize=19, scaled=100, moltype="hp")
     assert db.insert(sig1)
     assert db.insert(sig2)
 
-    db.save(c.output('xxx.lca.json'))
+    db.save(c.output("xxx.lca.json"))
     del db
 
-    x = sourmash.lca.lca_db.load_single_database(c.output('xxx.lca.json'))
+    x = sourmash.lca.lca_db.load_single_database(c.output("xxx.lca.json"))
     db2 = x[0]
-    assert db2.moltype == 'hp'
+    assert db2.moltype == "hp"
 
     # check reconstruction --
-    mh_list = [ x.minhash for x in db2.signatures() ]
+    mh_list = [x.minhash for x in db2.signatures()]
     assert len(mh_list) == 2
     assert sig1.minhash in mh_list
     assert sig2.minhash in mh_list
@@ -2739,26 +3293,45 @@ def test_lca_db_hp_command_index(runtmp, lca_db_format):
     # test command-line creation of LCA database with hp sigs
     c = runtmp
 
-    sigfile1 = utils.get_test_data('prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    sigfile2 = utils.get_test_data('prot/hp/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig')
-    lineages = utils.get_test_data('prot/gtdb-subset-lineages.csv')
-
-    db_out = c.output(f'hp.lca.{lca_db_format}')
-
-    c.run_sourmash('lca', 'index', lineages, db_out, sigfile1, sigfile2,
-                   '-C', '2', '--split-identifiers', '--require-taxonomy',
-                   '--scaled', '100', '-k', '19', '--hp',
-                   '-F', lca_db_format)
+    sigfile1 = utils.get_test_data(
+        "prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    sigfile2 = utils.get_test_data(
+        "prot/hp/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig"
+    )
+    lineages = utils.get_test_data("prot/gtdb-subset-lineages.csv")
+
+    db_out = c.output(f"hp.lca.{lca_db_format}")
+
+    c.run_sourmash(
+        "lca",
+        "index",
+        lineages,
+        db_out,
+        sigfile1,
+        sigfile2,
+        "-C",
+        "2",
+        "--split-identifiers",
+        "--require-taxonomy",
+        "--scaled",
+        "100",
+        "-k",
+        "19",
+        "--hp",
+        "-F",
+        lca_db_format,
+    )
 
     x = sourmash.lca.lca_db.load_single_database(db_out)
     db2 = x[0]
-    assert db2.moltype == 'hp'
+    assert db2.moltype == "hp"
 
     sig1 = sourmash.load_one_signature(sigfile1)
     sig2 = sourmash.load_one_signature(sigfile2)
 
     # check reconstruction --
-    mh_list = [ x.minhash for x in db2.signatures() ]
+    mh_list = [x.minhash for x in db2.signatures()]
     assert len(mh_list) == 2
     assert sig1.minhash in mh_list
     assert sig2.minhash in mh_list
@@ -2775,31 +3348,37 @@ def test_lca_db_hp_command_index(runtmp, lca_db_format):
 def test_lca_db_hp_command_search(c):
     # test command-line search/gather of LCA database with hp sigs
     # (LCA database created as above)
-    sigfile1 = utils.get_test_data('prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    db_out = utils.get_test_data('prot/hp.lca.json.gz')
+    sigfile1 = utils.get_test_data(
+        "prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    db_out = utils.get_test_data("prot/hp.lca.json.gz")
 
-    c.run_sourmash('search', sigfile1, db_out, '--threshold', '0.0')
-    assert '2 matches' in c.last_result.out
+    c.run_sourmash("search", sigfile1, db_out, "--threshold", "0.0")
+    assert "2 matches" in c.last_result.out
 
-    c.run_sourmash('gather', sigfile1, db_out, '--threshold', '0.0')
-    assert 'found 1 matches total' in c.last_result.out
-    assert 'the recovered matches hit 100.0% of the query' in c.last_result.out
+    c.run_sourmash("gather", sigfile1, db_out, "--threshold", "0.0")
+    assert "found 1 matches total" in c.last_result.out
+    assert "the recovered matches hit 100.0% of the query" in c.last_result.out
 
 
 def test_lca_db_dayhoff_build():
     # test programmatic creation of LCA database with dayhoff sigs in it
-    sigfile1 = utils.get_test_data('prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    sigfile2 = utils.get_test_data('prot/dayhoff/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig')
+    sigfile1 = utils.get_test_data(
+        "prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    sigfile2 = utils.get_test_data(
+        "prot/dayhoff/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig"
+    )
 
     sig1 = sourmash.load_one_signature(sigfile1)
     sig2 = sourmash.load_one_signature(sigfile2)
 
-    db = sourmash.lca.LCA_Database(ksize=19, scaled=100, moltype='dayhoff')
+    db = sourmash.lca.LCA_Database(ksize=19, scaled=100, moltype="dayhoff")
     assert db.insert(sig1)
     assert db.insert(sig2)
 
     # check reconstruction --
-    mh_list = [ x.minhash for x in db.signatures() ]
+    mh_list = [x.minhash for x in db.signatures()]
     assert len(mh_list) == 2
     assert sig1.minhash in mh_list
     assert sig2.minhash in mh_list
@@ -2815,25 +3394,29 @@ def test_lca_db_dayhoff_build():
 @utils.in_tempdir
 def test_lca_db_dayhoff_save_load(c):
     # test save/load of programmatically created db with dayhoff sigs in it
-    sigfile1 = utils.get_test_data('prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    sigfile2 = utils.get_test_data('prot/dayhoff/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig')
+    sigfile1 = utils.get_test_data(
+        "prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    sigfile2 = utils.get_test_data(
+        "prot/dayhoff/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig"
+    )
 
     sig1 = sourmash.load_one_signature(sigfile1)
     sig2 = sourmash.load_one_signature(sigfile2)
 
-    db = sourmash.lca.LCA_Database(ksize=19, scaled=100, moltype='dayhoff')
+    db = sourmash.lca.LCA_Database(ksize=19, scaled=100, moltype="dayhoff")
     assert db.insert(sig1)
     assert db.insert(sig2)
 
-    db.save(c.output('xxx.lca.json'))
+    db.save(c.output("xxx.lca.json"))
     del db
 
-    x = sourmash.lca.lca_db.load_single_database(c.output('xxx.lca.json'))
+    x = sourmash.lca.lca_db.load_single_database(c.output("xxx.lca.json"))
     db2 = x[0]
-    assert db2.moltype == 'dayhoff'
+    assert db2.moltype == "dayhoff"
 
     # check reconstruction --
-    mh_list = [ x.minhash for x in db2.signatures() ]
+    mh_list = [x.minhash for x in db2.signatures()]
     assert len(mh_list) == 2
     assert sig1.minhash in mh_list
     assert sig2.minhash in mh_list
@@ -2850,26 +3433,45 @@ def test_lca_db_dayhoff_command_index(runtmp, lca_db_format):
     # test command-line creation of LCA database with dayhoff sigs
     c = runtmp
 
-    sigfile1 = utils.get_test_data('prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    sigfile2 = utils.get_test_data('prot/dayhoff/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig')
-    lineages = utils.get_test_data('prot/gtdb-subset-lineages.csv')
-
-    db_out = c.output(f'dayhoff.lca.{lca_db_format}')
-
-    c.run_sourmash('lca', 'index', lineages, db_out, sigfile1, sigfile2,
-                   '-C', '2', '--split-identifiers', '--require-taxonomy',
-                   '--scaled', '100', '-k', '19', '--dayhoff',
-                   '-F', lca_db_format)
+    sigfile1 = utils.get_test_data(
+        "prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    sigfile2 = utils.get_test_data(
+        "prot/dayhoff/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig"
+    )
+    lineages = utils.get_test_data("prot/gtdb-subset-lineages.csv")
+
+    db_out = c.output(f"dayhoff.lca.{lca_db_format}")
+
+    c.run_sourmash(
+        "lca",
+        "index",
+        lineages,
+        db_out,
+        sigfile1,
+        sigfile2,
+        "-C",
+        "2",
+        "--split-identifiers",
+        "--require-taxonomy",
+        "--scaled",
+        "100",
+        "-k",
+        "19",
+        "--dayhoff",
+        "-F",
+        lca_db_format,
+    )
 
     x = sourmash.lca.lca_db.load_single_database(db_out)
     db2 = x[0]
-    assert db2.moltype == 'dayhoff'
+    assert db2.moltype == "dayhoff"
 
     sig1 = sourmash.load_one_signature(sigfile1)
     sig2 = sourmash.load_one_signature(sigfile2)
 
     # check reconstruction --
-    mh_list = [ x.minhash for x in db2.signatures() ]
+    mh_list = [x.minhash for x in db2.signatures()]
     assert len(mh_list) == 2
     assert sig1.minhash in mh_list
     assert sig2.minhash in mh_list
@@ -2886,29 +3488,43 @@ def test_lca_db_dayhoff_command_index(runtmp, lca_db_format):
 def test_lca_db_dayhoff_command_search(c):
     # test command-line search/gather of LCA database with dayhoff sigs
     # (LCA database created as above)
-    sigfile1 = utils.get_test_data('prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    db_out = utils.get_test_data('prot/dayhoff.lca.json.gz')
+    sigfile1 = utils.get_test_data(
+        "prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    db_out = utils.get_test_data("prot/dayhoff.lca.json.gz")
 
-    c.run_sourmash('search', sigfile1, db_out, '--threshold', '0.0')
-    assert '2 matches' in c.last_result.out
+    c.run_sourmash("search", sigfile1, db_out, "--threshold", "0.0")
+    assert "2 matches" in c.last_result.out
 
-    c.run_sourmash('gather', sigfile1, db_out, '--threshold', '0.0')
-    assert 'found 1 matches total' in c.last_result.out
-    assert 'the recovered matches hit 100.0% of the query' in c.last_result.out
+    c.run_sourmash("gather", sigfile1, db_out, "--threshold", "0.0")
+    assert "found 1 matches total" in c.last_result.out
+    assert "the recovered matches hit 100.0% of the query" in c.last_result.out
 
 
 def test_lca_index_with_picklist(runtmp, lca_db_format):
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    outdb = runtmp.output(f'gcf.lca.{lca_db_format}')
-    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    outdb = runtmp.output(f"gcf.lca.{lca_db_format}")
+    picklist = utils.get_test_data("gather/thermotoga-picklist.csv")
 
     # create an empty spreadsheet
-    with open(runtmp.output('empty.csv'), 'wt') as fp:
-        fp.write('accession,superkingdom,phylum,class,order,family,genus,species,strain')
-
-    runtmp.sourmash('lca', 'index', 'empty.csv', outdb, *gcf_sigs,
-                    '-k', '21', '--picklist', f"{picklist}:md5:md5",
-                    '-F', lca_db_format)
+    with open(runtmp.output("empty.csv"), "w") as fp:
+        fp.write(
+            "accession,superkingdom,phylum,class,order,family,genus,species,strain"
+        )
+
+    runtmp.sourmash(
+        "lca",
+        "index",
+        "empty.csv",
+        outdb,
+        *gcf_sigs,
+        "-k",
+        "21",
+        "--picklist",
+        f"{picklist}:md5:md5",
+        "-F",
+        lca_db_format,
+    )
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -2923,21 +3539,33 @@ def test_lca_index_with_picklist(runtmp, lca_db_format):
     siglist = list(sourmash.load_file_as_signatures(outdb))
     assert len(siglist) == 3
     for ss in siglist:
-        assert 'Thermotoga' in ss.name
+        assert "Thermotoga" in ss.name
 
 
 def test_lca_index_with_picklist_exclude(runtmp, lca_db_format):
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    outdb = runtmp.output(f'gcf.lca.{lca_db_format}')
-    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    outdb = runtmp.output(f"gcf.lca.{lca_db_format}")
+    picklist = utils.get_test_data("gather/thermotoga-picklist.csv")
 
     # create an empty spreadsheet
-    with open(runtmp.output('empty.csv'), 'wt') as fp:
-        fp.write('accession,superkingdom,phylum,class,order,family,genus,species,strain')
-
-    runtmp.sourmash('lca', 'index', 'empty.csv', outdb, *gcf_sigs,
-                    '-k', '21', '--picklist', f"{picklist}:md5:md5:exclude",
-                    '-F', lca_db_format)
+    with open(runtmp.output("empty.csv"), "w") as fp:
+        fp.write(
+            "accession,superkingdom,phylum,class,order,family,genus,species,strain"
+        )
+
+    runtmp.sourmash(
+        "lca",
+        "index",
+        "empty.csv",
+        outdb,
+        *gcf_sigs,
+        "-k",
+        "21",
+        "--picklist",
+        f"{picklist}:md5:md5:exclude",
+        "-F",
+        lca_db_format,
+    )
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -2948,21 +3576,24 @@ def test_lca_index_with_picklist_exclude(runtmp, lca_db_format):
     siglist = list(sourmash.load_file_as_signatures(outdb))
     assert len(siglist) == 9
     for ss in siglist:
-        assert 'Thermotoga' not in ss.name
+        assert "Thermotoga" not in ss.name
 
 
 def test_lca_index_select_with_picklist(runtmp, lca_db_format):
     # check what happens with picklists after index
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    outdb = runtmp.output(f'gcf.lca.{lca_db_format}')
-    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    outdb = runtmp.output(f"gcf.lca.{lca_db_format}")
+    picklist = utils.get_test_data("gather/thermotoga-picklist.csv")
 
     # create an empty spreadsheet
-    with open(runtmp.output('empty.csv'), 'wt') as fp:
-        fp.write('accession,superkingdom,phylum,class,order,family,genus,species,strain')
+    with open(runtmp.output("empty.csv"), "w") as fp:
+        fp.write(
+            "accession,superkingdom,phylum,class,order,family,genus,species,strain"
+        )
 
-    runtmp.sourmash('lca', 'index', 'empty.csv', outdb, *gcf_sigs,
-                    '-k', '21', '-F', lca_db_format)
+    runtmp.sourmash(
+        "lca", "index", "empty.csv", outdb, *gcf_sigs, "-k", "21", "-F", lca_db_format
+    )
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -2979,21 +3610,24 @@ def test_lca_index_select_with_picklist(runtmp, lca_db_format):
     siglist = list(idx.signatures())
     assert len(siglist) == 3
     for ss in siglist:
-        assert 'Thermotoga' in ss.name
+        assert "Thermotoga" in ss.name
 
 
 def test_lca_index_select_with_picklist_exclude(runtmp, lca_db_format):
     # check what happens with picklists after index
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    outdb = runtmp.output(f'gcf.lca.{lca_db_format}')
-    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    outdb = runtmp.output(f"gcf.lca.{lca_db_format}")
+    picklist = utils.get_test_data("gather/thermotoga-picklist.csv")
 
     # create an empty spreadsheet
-    with open(runtmp.output('empty.csv'), 'wt') as fp:
-        fp.write('accession,superkingdom,phylum,class,order,family,genus,species,strain')
+    with open(runtmp.output("empty.csv"), "w") as fp:
+        fp.write(
+            "accession,superkingdom,phylum,class,order,family,genus,species,strain"
+        )
 
-    runtmp.sourmash('lca', 'index', 'empty.csv', outdb, *gcf_sigs,
-                    '-k', '21', '-F', lca_db_format)
+    runtmp.sourmash(
+        "lca", "index", "empty.csv", outdb, *gcf_sigs, "-k", "21", "-F", lca_db_format
+    )
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -3009,7 +3643,7 @@ def test_lca_index_select_with_picklist_exclude(runtmp, lca_db_format):
     siglist = list(idx.signatures())
     assert len(siglist) == 9
     for ss in siglist:
-        assert 'Thermotoga' not in ss.name
+        assert "Thermotoga" not in ss.name
 
 
 def test_lca_jaccard_ordering():
@@ -3028,10 +3662,10 @@ def test_lca_jaccard_ordering():
     def _intersect(x, y):
         return x.intersection_and_union_size(y)[0]
 
-    print('a intersect b:', _intersect(a, b))
-    print('a intersect c:', _intersect(a, c))
-    print('a jaccard b:', a.jaccard(b))
-    print('a jaccard c:', a.jaccard(c))
+    print("a intersect b:", _intersect(a, b))
+    print("a intersect c:", _intersect(a, c))
+    print("a jaccard b:", a.jaccard(b))
+    print("a jaccard c:", a.jaccard(c))
     assert _intersect(a, b) > _intersect(a, c)
     assert a.jaccard(b) < a.jaccard(c)
 
@@ -3040,9 +3674,9 @@ def _intersect(x, y):
     assert a.jaccard(c) > 0.15
 
     # now - make signatures, try out :)
-    ss_a = sourmash.SourmashSignature(a, name='A')
-    ss_b = sourmash.SourmashSignature(b, name='B')
-    ss_c = sourmash.SourmashSignature(c, name='C')
+    ss_a = sourmash.SourmashSignature(a, name="A")
+    ss_b = sourmash.SourmashSignature(b, name="B")
+    ss_c = sourmash.SourmashSignature(c, name="C")
 
     db = sourmash.lca.LCA_Database(ksize=31, scaled=2)
     db.insert(ss_a)
@@ -3060,17 +3694,21 @@ def _intersect(x, y):
 
 def test_lca_db_protein_save_twice(runtmp, lca_db_format):
     # test save twice
-    sigfile1 = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    sigfile2 = utils.get_test_data('prot/protein/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig')
+    sigfile1 = utils.get_test_data(
+        "prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    sigfile2 = utils.get_test_data(
+        "prot/protein/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig"
+    )
 
     sig1 = sourmash.load_one_signature(sigfile1)
     sig2 = sourmash.load_one_signature(sigfile2)
 
-    db = sourmash.lca.LCA_Database(ksize=19, scaled=100, moltype='protein')
+    db = sourmash.lca.LCA_Database(ksize=19, scaled=100, moltype="protein")
     assert db.insert(sig1)
     assert db.insert(sig2)
 
-    db.save(runtmp.output('xxx'), format=lca_db_format)
+    db.save(runtmp.output("xxx"), format=lca_db_format)
 
     with pytest.raises(ValueError):
-        db.save(runtmp.output('xxx'), format=lca_db_format)
+        db.save(runtmp.output("xxx"), format=lca_db_format)
diff --git a/tests/test_lca_db_protocol.py b/tests/test_lca_db_protocol.py
index a3fc57b085..eb2f76fe07 100644
--- a/tests/test_lca_db_protocol.py
+++ b/tests/test_lca_db_protocol.py
@@ -7,26 +7,30 @@
 
 import sourmash
 from sourmash.tax.tax_utils import MultiLineageDB
-from sourmash.lca.lca_db import (LCA_Database, load_single_database)
+from sourmash.lca.lca_db import LCA_Database, load_single_database
 
 
 def build_inmem_lca_db(runtmp):
     # test in-memory LCA_Database
-    sigfile1 = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    sigfile2 = utils.get_test_data('prot/protein/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig')
+    sigfile1 = utils.get_test_data(
+        "prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    sigfile2 = utils.get_test_data(
+        "prot/protein/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig"
+    )
 
     ss1 = sourmash.load_one_signature(sigfile1)
     ss2 = sourmash.load_one_signature(sigfile2)
 
-    lineages_file = utils.get_test_data('prot/gtdb-subset-lineages.csv')
+    lineages_file = utils.get_test_data("prot/gtdb-subset-lineages.csv")
     lineages = MultiLineageDB.load([lineages_file])
 
-    db = LCA_Database(ksize=19, scaled=100, moltype='protein')
+    db = LCA_Database(ksize=19, scaled=100, moltype="protein")
 
-    ident1 = ss1.name.split(' ')[0].split('.')[0]
+    ident1 = ss1.name.split(" ")[0].split(".")[0]
     assert lineages[ident1]
     db.insert(ss1, ident=ident1, lineage=lineages[ident1])
-    ident2 = ss2.name.split(' ')[0].split('.')[0]
+    ident2 = ss2.name.split(" ")[0].split(".")[0]
     assert lineages[ident2]
     db.insert(ss2, ident=ident2, lineage=lineages[ident2])
 
@@ -36,9 +40,9 @@ def build_inmem_lca_db(runtmp):
 def build_json_lca_db(runtmp):
     # test saved/loaded JSON database
     db = build_inmem_lca_db(runtmp)
-    db_out = runtmp.output('protein.lca.json')
+    db_out = runtmp.output("protein.lca.json")
 
-    db.save(db_out, format='json')
+    db.save(db_out, format="json")
 
     x = load_single_database(db_out)
     db_load = x[0]
@@ -49,9 +53,9 @@ def build_json_lca_db(runtmp):
 def build_sql_lca_db(runtmp):
     # test saved/loaded SQL database
     db = build_inmem_lca_db(runtmp)
-    db_out = runtmp.output('protein.lca.json')
+    db_out = runtmp.output("protein.lca.json")
 
-    db.save(db_out, format='sql')
+    db.save(db_out, format="sql")
 
     x = load_single_database(db_out)
     db_load = x[0]
@@ -59,9 +63,7 @@ def build_sql_lca_db(runtmp):
     return db_load
 
 
-@pytest.fixture(params=[build_inmem_lca_db,
-                        build_json_lca_db,
-                        build_sql_lca_db])
+@pytest.fixture(params=[build_inmem_lca_db, build_json_lca_db, build_sql_lca_db])
 def lca_db_obj(request, runtmp):
     build_fn = request.param
 
@@ -77,16 +79,18 @@ def test_get_lineage_assignments(lca_db_obj):
 
     x = []
     for tup in lineage:
-        if tup[0] != 'strain' or tup[1]: # ignore empty strain
+        if tup[0] != "strain" or tup[1]:  # ignore empty strain
             x.append((tup[0], tup[1]))
 
-    assert x == [('superkingdom', 'd__Archaea'),
-                 ('phylum', 'p__Crenarchaeota'),
-                 ('class', 'c__Bathyarchaeia'),
-                 ('order', 'o__B26-1'),
-                 ('family', 'f__B26-1'), 
-                 ('genus', 'g__B26-1'),
-                 ('species', 's__B26-1 sp001593925'),]
+    assert x == [
+        ("superkingdom", "d__Archaea"),
+        ("phylum", "p__Crenarchaeota"),
+        ("class", "c__Bathyarchaeia"),
+        ("order", "o__B26-1"),
+        ("family", "f__B26-1"),
+        ("genus", "g__B26-1"),
+        ("species", "s__B26-1 sp001593925"),
+    ]
 
 
 def test_hashvals(lca_db_obj):
@@ -102,7 +106,7 @@ def test_get_identifiers_for_hashval(lca_db_obj):
     assert len(idents) == 1
 
     ident = idents[0]
-    assert ident == 'GCA_001593925'
+    assert ident == "GCA_001593925"
 
 
 def test_get_identifiers_for_hashval_2(lca_db_obj):
@@ -111,15 +115,15 @@ def test_get_identifiers_for_hashval_2(lca_db_obj):
 
     for hashval in lca_db_obj.hashvals:
         idents = lca_db_obj.get_identifiers_for_hashval(hashval)
-        #idents = list(idents)
+        # idents = list(idents)
         all_idents.update(idents)
 
     all_idents = list(all_idents)
     print(all_idents)
     assert len(all_idents) == 2
 
-    assert 'GCA_001593925' in all_idents
-    assert 'GCA_001593935' in all_idents
+    assert "GCA_001593925" in all_idents
+    assert "GCA_001593935" in all_idents
 
 
 def test_downsample_scaled(lca_db_obj):
diff --git a/tests/test_lca_functions.py b/tests/test_lca_functions.py
index 0674df80df..9add0df47f 100644
--- a/tests/test_lca_functions.py
+++ b/tests/test_lca_functions.py
@@ -4,14 +4,21 @@
 import pytest
 
 from sourmash.lca import lca_utils
-from sourmash.lca.lca_utils import (LineagePair, build_tree, find_lca,
-                                    taxlist, count_lca_for_assignments,
-                                    zip_lineage, display_lineage,
-                                    make_lineage, is_lineage_match,
-                                    pop_to_rank)
-
-
-class FakeLCA_Database(object):
+from sourmash.lca.lca_utils import (
+    LineagePair,
+    build_tree,
+    find_lca,
+    taxlist,
+    count_lca_for_assignments,
+    zip_lineage,
+    display_lineage,
+    make_lineage,
+    is_lineage_match,
+    pop_to_rank,
+)
+
+
+class FakeLCA_Database:
     def __init__(self):
         self._assignments = {}
 
@@ -26,139 +33,194 @@ def get_lineage_assignments(self, hashval):
 
 
 def test_taxlist_1():
-    assert list(taxlist()) == ['superkingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'strain']
+    assert list(taxlist()) == [
+        "superkingdom",
+        "phylum",
+        "class",
+        "order",
+        "family",
+        "genus",
+        "species",
+        "strain",
+    ]
 
 
 def test_taxlist_2():
-    assert list(taxlist(include_strain=False)) == ['superkingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species']
+    assert list(taxlist(include_strain=False)) == [
+        "superkingdom",
+        "phylum",
+        "class",
+        "order",
+        "family",
+        "genus",
+        "species",
+    ]
 
 
 def test_zip_lineage_1():
-    x = [ LineagePair('superkingdom', 'a'), LineagePair('phylum', 'b') ]
-    assert zip_lineage(x) == ['a', 'b', '', '', '', '', '', '']
+    x = [LineagePair("superkingdom", "a"), LineagePair("phylum", "b")]
+    assert zip_lineage(x) == ["a", "b", "", "", "", "", "", ""]
 
 
 def test_zip_lineage_2():
-    x = [ LineagePair('superkingdom', 'a'), LineagePair('phylum', 'b') ]
-    assert zip_lineage(x, truncate_empty=True) == ['a', 'b']
+    x = [LineagePair("superkingdom", "a"), LineagePair("phylum", "b")]
+    assert zip_lineage(x, truncate_empty=True) == ["a", "b"]
 
 
 def test_zip_lineage_3():
-    x = [ LineagePair('superkingdom', 'a'), LineagePair(None, ''), LineagePair('class', 'c') ]
-    assert zip_lineage(x) == ['a', '', 'c', '', '', '', '', '']
+    x = [
+        LineagePair("superkingdom", "a"),
+        LineagePair(None, ""),
+        LineagePair("class", "c"),
+    ]
+    assert zip_lineage(x) == ["a", "", "c", "", "", "", "", ""]
 
 
 def test_zip_lineage_3_truncate():
-    x = [ LineagePair('superkingdom', 'a'), LineagePair(None, ''), LineagePair('class', 'c') ]
-    assert zip_lineage(x, truncate_empty=True) == ['a', '', 'c']
+    x = [
+        LineagePair("superkingdom", "a"),
+        LineagePair(None, ""),
+        LineagePair("class", "c"),
+    ]
+    assert zip_lineage(x, truncate_empty=True) == ["a", "", "c"]
 
 
 def test_zip_lineage_4():
-    x = [ LineagePair('superkingdom', 'a'), LineagePair('class', 'c') ]
+    x = [LineagePair("superkingdom", "a"), LineagePair("class", "c")]
     with pytest.raises(ValueError) as e:
         zip_lineage(x)
 
-    assert 'incomplete lineage at phylum - is class instead' in str(e.value)
+    assert "incomplete lineage at phylum - is class instead" in str(e.value)
 
 
 def test_display_lineage_1():
-    x = [ LineagePair('superkingdom', 'a'), LineagePair('phylum', 'b') ]
+    x = [LineagePair("superkingdom", "a"), LineagePair("phylum", "b")]
     assert display_lineage(x) == "a;b", display_lineage(x)
 
 
 def test_display_lineage_2():
-    x = [ LineagePair('superkingdom', 'a'), LineagePair(None, ''), LineagePair('class', 'c') ]
+    x = [
+        LineagePair("superkingdom", "a"),
+        LineagePair(None, ""),
+        LineagePair("class", "c"),
+    ]
     assert display_lineage(x) == "a;;c", display_lineage(x)
 
 
 def test_build_tree():
-    tree = build_tree([[LineagePair('rank1', 'name1'),
-                        LineagePair('rank2', 'name2')]])
-    assert tree == { LineagePair('rank1', 'name1'):
-                         { LineagePair('rank2', 'name2') : {}} }
+    tree = build_tree([[LineagePair("rank1", "name1"), LineagePair("rank2", "name2")]])
+    assert tree == {LineagePair("rank1", "name1"): {LineagePair("rank2", "name2"): {}}}
 
 
 def test_build_tree_2():
-    tree = build_tree([[LineagePair('rank1', 'name1'), LineagePair('rank2', 'name2a')],
-                       [LineagePair('rank1', 'name1'), LineagePair('rank2', 'name2b')],
-                      ])
+    tree = build_tree(
+        [
+            [LineagePair("rank1", "name1"), LineagePair("rank2", "name2a")],
+            [LineagePair("rank1", "name1"), LineagePair("rank2", "name2b")],
+        ]
+    )
 
-    assert tree == { LineagePair('rank1', 'name1'): { LineagePair('rank2', 'name2a') : {},
-                                           LineagePair('rank2', 'name2b') : {}} }
+    assert tree == {
+        LineagePair("rank1", "name1"): {
+            LineagePair("rank2", "name2a"): {},
+            LineagePair("rank2", "name2b"): {},
+        }
+    }
 
 
-def test_build_tree_3():                  # empty 'rank2' name
-    tree = build_tree([[LineagePair('rank1', 'name1'), LineagePair('rank2', '')]])
-    assert tree == { LineagePair('rank1', 'name1'): {} }
+def test_build_tree_3():  # empty 'rank2' name
+    tree = build_tree([[LineagePair("rank1", "name1"), LineagePair("rank2", "")]])
+    assert tree == {LineagePair("rank1", "name1"): {}}
 
 
 def test_build_tree_4():
-    tree = build_tree([[LineagePair('rank1', 'name1'), LineagePair('rank2', 'name2a')],
-                      ])
-
-    tree = build_tree([[LineagePair('rank1', 'name1'), LineagePair('rank2', 'name2b')],
-                      ], tree)
+    tree = build_tree(
+        [
+            [LineagePair("rank1", "name1"), LineagePair("rank2", "name2a")],
+        ]
+    )
+
+    tree = build_tree(
+        [
+            [LineagePair("rank1", "name1"), LineagePair("rank2", "name2b")],
+        ],
+        tree,
+    )
+
+    assert tree == {
+        LineagePair("rank1", "name1"): {
+            LineagePair("rank2", "name2a"): {},
+            LineagePair("rank2", "name2b"): {},
+        }
+    }
 
-    assert tree == { LineagePair('rank1', 'name1'): { LineagePair('rank2', 'name2a') : {},
-                                           LineagePair('rank2', 'name2b') : {}} }
 
 def test_build_tree_5():
     with pytest.raises(ValueError):
-        tree = build_tree([])
+        build_tree([])
 
 
 def test_find_lca():
-    tree = build_tree([[LineagePair('rank1', 'name1'), LineagePair('rank2', 'name2')]])
+    tree = build_tree([[LineagePair("rank1", "name1"), LineagePair("rank2", "name2")]])
     lca = find_lca(tree)
 
-    assert lca == ((LineagePair('rank1', 'name1'), LineagePair('rank2', 'name2'),), 0)
+    assert lca == (
+        (
+            LineagePair("rank1", "name1"),
+            LineagePair("rank2", "name2"),
+        ),
+        0,
+    )
 
 
 def test_find_lca_2():
-    tree = build_tree([[LineagePair('rank1', 'name1'), LineagePair('rank2', 'name2a')],
-                       [LineagePair('rank1', 'name1'), LineagePair('rank2', 'name2b')],
-                      ])
+    tree = build_tree(
+        [
+            [LineagePair("rank1", "name1"), LineagePair("rank2", "name2a")],
+            [LineagePair("rank1", "name1"), LineagePair("rank2", "name2b")],
+        ]
+    )
     lca = find_lca(tree)
 
-    assert lca == ((LineagePair('rank1', 'name1'),), 2)
+    assert lca == ((LineagePair("rank1", "name1"),), 2)
 
 
 def test_find_lca_3():
-    lin1 = lca_utils.make_lineage('a;b;c')
-    lin2 = lca_utils.make_lineage('a;b')
+    lin1 = lca_utils.make_lineage("a;b;c")
+    lin2 = lca_utils.make_lineage("a;b")
 
     tree = build_tree([lin1, lin2])
     lca, reason = find_lca(tree)
-    assert lca == lin1                    # find most specific leaf node
+    assert lca == lin1  # find most specific leaf node
 
 
 def test_gather_assignments_1():
     # test basic mechanics of gather_assignments function
     hashval = 12345678
-    lin = lca_utils.make_lineage('a;b;c')
+    lin = lca_utils.make_lineage("a;b;c")
 
     db = FakeLCA_Database()
-    db._set_lineage_assignment(hashval, set([ lin ]))
+    db._set_lineage_assignment(hashval, set([lin]))
 
     assignments = lca_utils.gather_assignments([hashval], [db])
     print(assignments)
 
-    assert assignments[hashval] == set([ lin ])
+    assert assignments[hashval] == set([lin])
 
 
 def test_gather_assignments_2():
     # test basic mechanics of gather_assignments function with two lineages
     hashval = 12345678
-    lin = lca_utils.make_lineage('a;b;c')
-    lin2 = lca_utils.make_lineage('a;b;d')
+    lin = lca_utils.make_lineage("a;b;c")
+    lin2 = lca_utils.make_lineage("a;b;d")
 
     db = FakeLCA_Database()
-    db._set_lineage_assignment(hashval, set([ lin, lin2 ]))
+    db._set_lineage_assignment(hashval, set([lin, lin2]))
 
     assignments = lca_utils.gather_assignments([hashval], [db])
     print(assignments)
 
-    assert assignments[hashval] == set([ lin, lin2 ])
+    assert assignments[hashval] == set([lin, lin2])
 
 
 def test_gather_assignments_3():
@@ -166,27 +228,27 @@ def test_gather_assignments_3():
     # and two hashvals
     hashval = 12345678
     hashval2 = 87654321
-    lin = lca_utils.make_lineage('a;b;c')
-    lin2 = lca_utils.make_lineage('a;b;d')
+    lin = lca_utils.make_lineage("a;b;c")
+    lin2 = lca_utils.make_lineage("a;b;d")
 
     db = FakeLCA_Database()
-    db._set_lineage_assignment(hashval, set([ lin, lin2 ]))
-    db._set_lineage_assignment(hashval2, set([ lin ]))
+    db._set_lineage_assignment(hashval, set([lin, lin2]))
+    db._set_lineage_assignment(hashval2, set([lin]))
 
     assignments = lca_utils.gather_assignments([hashval, hashval2], [db])
     print(assignments)
 
-    assert assignments[hashval] == set([ lin, lin2 ])
-    assert assignments[hashval2] == set([ lin ])
+    assert assignments[hashval] == set([lin, lin2])
+    assert assignments[hashval2] == set([lin])
 
 
 def test_count_lca_for_assignments_1():
     # test basic mechanics of gather_assignments function
     hashval = 12345678
-    lin = lca_utils.make_lineage('a;b;c')
+    lin = lca_utils.make_lineage("a;b;c")
 
     db = FakeLCA_Database()
-    db._set_lineage_assignment(hashval, set([ lin ]))
+    db._set_lineage_assignment(hashval, set([lin]))
 
     assignments = lca_utils.gather_assignments([hashval], [db])
     counts = count_lca_for_assignments(assignments)
@@ -199,11 +261,11 @@ def test_count_lca_for_assignments_1():
 def test_count_lca_for_assignments_2():
     # test basic mechanics of gather_assignments function with two lineages
     hashval = 12345678
-    lin = lca_utils.make_lineage('a;b;c')
-    lin2 = lca_utils.make_lineage('a;b;d')
+    lin = lca_utils.make_lineage("a;b;c")
+    lin2 = lca_utils.make_lineage("a;b;d")
 
     db = FakeLCA_Database()
-    db._set_lineage_assignment(hashval, set([ lin, lin2 ]))
+    db._set_lineage_assignment(hashval, set([lin, lin2]))
 
     assignments = lca_utils.gather_assignments([hashval], [db])
     counts = count_lca_for_assignments(assignments)
@@ -213,7 +275,7 @@ def test_count_lca_for_assignments_2():
     assert counts[lin2] == 0
 
     assert len(counts) == 1
-    lca_lin = lca_utils.make_lineage('a;b')
+    lca_lin = lca_utils.make_lineage("a;b")
     assert counts[lca_lin] == 1
 
 
@@ -222,12 +284,12 @@ def test_count_lca_for_assignments_3():
     # and two hashvals
     hashval = 12345678
     hashval2 = 87654321
-    lin = lca_utils.make_lineage('a;b;c')
-    lin2 = lca_utils.make_lineage('a;b;d')
+    lin = lca_utils.make_lineage("a;b;c")
+    lin2 = lca_utils.make_lineage("a;b;d")
 
     db = FakeLCA_Database()
-    db._set_lineage_assignment(hashval, set([ lin, lin2 ]))
-    db._set_lineage_assignment(hashval2, set([ lin ]))
+    db._set_lineage_assignment(hashval, set([lin, lin2]))
+    db._set_lineage_assignment(hashval2, set([lin]))
 
     assignments = lca_utils.gather_assignments([hashval, hashval2], [db])
     counts = count_lca_for_assignments(assignments)
@@ -237,20 +299,20 @@ def test_count_lca_for_assignments_3():
     assert counts[lin] == 1
     assert counts[lin2] == 0
 
-    lca_lin = lca_utils.make_lineage('a;b')
+    lca_lin = lca_utils.make_lineage("a;b")
     assert counts[lca_lin] == 1
 
 
 def test_count_lca_for_assignments_abund_1():
     # test basic mechanics of gather_assignments function
     hashval = 12345678
-    hashval_counts = dict()
+    hashval_counts = {}
     hashval_counts[hashval] = 3
 
-    lin = lca_utils.make_lineage('a;b;c')
+    lin = lca_utils.make_lineage("a;b;c")
 
     db = FakeLCA_Database()
-    db._set_lineage_assignment(hashval, set([ lin ]))
+    db._set_lineage_assignment(hashval, set([lin]))
 
     assignments = lca_utils.gather_assignments(hashval_counts.keys(), [db])
     counts = count_lca_for_assignments(assignments, hashval_counts)
@@ -263,14 +325,14 @@ def test_count_lca_for_assignments_abund_1():
 def test_count_lca_for_assignments_abund_2():
     # test basic mechanics of gather_assignments function with two lineages
     hashval = 12345678
-    hashval_counts = dict()
+    hashval_counts = {}
     hashval_counts[hashval] = 3
 
-    lin = lca_utils.make_lineage('a;b;c')
-    lin2 = lca_utils.make_lineage('a;b;d')
+    lin = lca_utils.make_lineage("a;b;c")
+    lin2 = lca_utils.make_lineage("a;b;d")
 
     db = FakeLCA_Database()
-    db._set_lineage_assignment(hashval, set([ lin, lin2 ]))
+    db._set_lineage_assignment(hashval, set([lin, lin2]))
 
     assignments = lca_utils.gather_assignments(hashval_counts, [db])
     counts = count_lca_for_assignments(assignments, hashval_counts)
@@ -280,8 +342,8 @@ def test_count_lca_for_assignments_abund_2():
     assert counts[lin2] == 0
 
     assert len(counts) == 1
-    lca_lin = lca_utils.make_lineage('a;b')
-    assert counts[lca_lin] == 3           # yes!
+    lca_lin = lca_utils.make_lineage("a;b")
+    assert counts[lca_lin] == 3  # yes!
 
 
 def test_count_lca_for_assignments_abund_3():
@@ -289,27 +351,28 @@ def test_count_lca_for_assignments_abund_3():
     # and two hashvals
     hashval = 12345678
     hashval2 = 87654321
-    hashval_counts = dict()
+    hashval_counts = {}
     hashval_counts[hashval] = 2
     hashval_counts[hashval2] = 5
 
-    lin = lca_utils.make_lineage('a;b;c')
-    lin2 = lca_utils.make_lineage('a;b;d')
+    lin = lca_utils.make_lineage("a;b;c")
+    lin2 = lca_utils.make_lineage("a;b;d")
 
     db = FakeLCA_Database()
-    db._set_lineage_assignment(hashval, set([ lin, lin2 ]))
-    db._set_lineage_assignment(hashval2, set([ lin ]))
+    db._set_lineage_assignment(hashval, set([lin, lin2]))
+    db._set_lineage_assignment(hashval2, set([lin]))
 
     assignments = lca_utils.gather_assignments(hashval_counts, [db])
     counts = count_lca_for_assignments(assignments, hashval_counts)
     print(counts)
 
     assert len(counts) == 2
-    assert counts[lin] == 5               # makes sense
-    assert counts[lin2] == 0              # makes sense
+    assert counts[lin] == 5  # makes sense
+    assert counts[lin2] == 0  # makes sense
+
+    lca_lin = lca_utils.make_lineage("a;b")
+    assert counts[lca_lin] == 2  # yes!
 
-    lca_lin = lca_utils.make_lineage('a;b')
-    assert counts[lca_lin] == 2           # yes!
 
 def test_count_lca_for_assignments_abund_4():
     # test basic mechanics of gather_assignments function with three lineages
@@ -317,112 +380,113 @@ def test_count_lca_for_assignments_abund_4():
     hashval = 12345678
     hashval2 = 87654321
     hashval3 = 34567891
-    hashval_counts = dict()
+    hashval_counts = {}
     hashval_counts[hashval] = 2
     hashval_counts[hashval2] = 5
     hashval_counts[hashval3] = 3
 
-    lin = lca_utils.make_lineage('a;b;c')
-    lin2 = lca_utils.make_lineage('a;b;d')
-    lin3 = lca_utils.make_lineage('a;b;d;e')
+    lin = lca_utils.make_lineage("a;b;c")
+    lin2 = lca_utils.make_lineage("a;b;d")
+    lin3 = lca_utils.make_lineage("a;b;d;e")
 
     db = FakeLCA_Database()
-    db._set_lineage_assignment(hashval, set([ lin, lin2 ])) # lca: a;b
-    db._set_lineage_assignment(hashval2, set([ lin ])) # lca: a;b;c
-    db._set_lineage_assignment(hashval3, set([ lin2, lin3 ])) # a;b;d;e
+    db._set_lineage_assignment(hashval, set([lin, lin2]))  # lca: a;b
+    db._set_lineage_assignment(hashval2, set([lin]))  # lca: a;b;c
+    db._set_lineage_assignment(hashval3, set([lin2, lin3]))  # a;b;d;e
 
     assignments = lca_utils.gather_assignments(hashval_counts, [db])
     counts = count_lca_for_assignments(assignments, hashval_counts)
     print(counts)
 
     assert len(counts) == 3
-    assert counts[lin] == 5               # makes sense b/c hashval2
-    assert counts[lin2] == 0              # a;b;d (lin2) + a;b;d;e (lin3) -->a;b;d;e (lin3) only
-    assert counts[lin3] == 3              # hashval3
+    assert counts[lin] == 5  # makes sense b/c hashval2
+    assert counts[lin2] == 0  # a;b;d (lin2) + a;b;d;e (lin3) -->a;b;d;e (lin3) only
+    assert counts[lin3] == 3  # hashval3
+
+    lca_lin = lca_utils.make_lineage("a;b")
+    assert counts[lca_lin] == 2  # yes, b/c hashval
 
-    lca_lin = lca_utils.make_lineage('a;b')
-    assert counts[lca_lin] == 2           # yes, b/c hashval
 
 def test_count_lca_for_assignments_abund_5():
     # test basic mechanics of gather_assignments function with two lineages
     # and two hashvals when linages match but one has lower taxo detail
     hashval = 12345678
     hashval2 = 87654321
-    hashval_counts = dict()
+    hashval_counts = {}
     hashval_counts[hashval] = 2
     hashval_counts[hashval2] = 5
 
-    lin = lca_utils.make_lineage('a;b;d')
-    lin2 = lca_utils.make_lineage('a;b;d;e')
+    lin = lca_utils.make_lineage("a;b;d")
+    lin2 = lca_utils.make_lineage("a;b;d;e")
 
     db = FakeLCA_Database()
-    db._set_lineage_assignment(hashval, set([ lin, lin2 ]))
-    db._set_lineage_assignment(hashval2, set([ lin ]))
+    db._set_lineage_assignment(hashval, set([lin, lin2]))
+    db._set_lineage_assignment(hashval2, set([lin]))
 
     assignments = lca_utils.gather_assignments(hashval_counts, [db])
     counts = count_lca_for_assignments(assignments, hashval_counts)
     print(counts)
 
     assert len(counts) == 2
-    assert counts[lin] == 5               # makes sense
-    assert counts[lin2] == 2              # lin+lin2 yield just lin2
+    assert counts[lin] == 5  # makes sense
+    assert counts[lin2] == 2  # lin+lin2 yield just lin2
 
 
 def test_is_lineage_match_1():
     # basic behavior: match at order and above, but not at family or below.
-    lin1 = make_lineage('d__a;p__b;c__c;o__d;f__e')
-    lin2 = make_lineage('d__a;p__b;c__c;o__d;f__f')
+    lin1 = make_lineage("d__a;p__b;c__c;o__d;f__e")
+    lin2 = make_lineage("d__a;p__b;c__c;o__d;f__f")
 
-    assert is_lineage_match(lin1, lin2, 'superkingdom')
-    assert is_lineage_match(lin1, lin2, 'phylum')
-    assert is_lineage_match(lin1, lin2, 'class')
-    assert is_lineage_match(lin1, lin2, 'order')
-    assert not is_lineage_match(lin1, lin2, 'family')
-    assert not is_lineage_match(lin1, lin2, 'genus')
-    assert not is_lineage_match(lin1, lin2, 'species')
+    assert is_lineage_match(lin1, lin2, "superkingdom")
+    assert is_lineage_match(lin1, lin2, "phylum")
+    assert is_lineage_match(lin1, lin2, "class")
+    assert is_lineage_match(lin1, lin2, "order")
+    assert not is_lineage_match(lin1, lin2, "family")
+    assert not is_lineage_match(lin1, lin2, "genus")
+    assert not is_lineage_match(lin1, lin2, "species")
 
 
 def test_is_lineage_match_2():
     # match at family, and above, levels; no genus or species to match
-    lin1 = make_lineage('d__a;p__b;c__c;o__d;f__f')
-    lin2 = make_lineage('d__a;p__b;c__c;o__d;f__f')
+    lin1 = make_lineage("d__a;p__b;c__c;o__d;f__f")
+    lin2 = make_lineage("d__a;p__b;c__c;o__d;f__f")
 
-    assert is_lineage_match(lin1, lin2, 'superkingdom')
-    assert is_lineage_match(lin1, lin2, 'phylum')
-    assert is_lineage_match(lin1, lin2, 'class')
-    assert is_lineage_match(lin1, lin2, 'order')
-    assert is_lineage_match(lin1, lin2, 'family')
-    assert not is_lineage_match(lin1, lin2, 'genus')
-    assert not is_lineage_match(lin1, lin2, 'species')
+    assert is_lineage_match(lin1, lin2, "superkingdom")
+    assert is_lineage_match(lin1, lin2, "phylum")
+    assert is_lineage_match(lin1, lin2, "class")
+    assert is_lineage_match(lin1, lin2, "order")
+    assert is_lineage_match(lin1, lin2, "family")
+    assert not is_lineage_match(lin1, lin2, "genus")
+    assert not is_lineage_match(lin1, lin2, "species")
 
 
 def test_is_lineage_match_3():
     # one lineage is empty
-    lin1 = make_lineage('')
-    lin2 = make_lineage('d__a;p__b;c__c;o__d;f__f')
+    lin1 = make_lineage("")
+    lin2 = make_lineage("d__a;p__b;c__c;o__d;f__f")
 
-    assert not is_lineage_match(lin1, lin2, 'superkingdom')
-    assert not is_lineage_match(lin1, lin2, 'family')
-    assert not is_lineage_match(lin1, lin2, 'order')
-    assert not is_lineage_match(lin1, lin2, 'class')
-    assert not is_lineage_match(lin1, lin2, 'phylum')
-    assert not is_lineage_match(lin1, lin2, 'genus')
-    assert not is_lineage_match(lin1, lin2, 'species')
+    assert not is_lineage_match(lin1, lin2, "superkingdom")
+    assert not is_lineage_match(lin1, lin2, "family")
+    assert not is_lineage_match(lin1, lin2, "order")
+    assert not is_lineage_match(lin1, lin2, "class")
+    assert not is_lineage_match(lin1, lin2, "phylum")
+    assert not is_lineage_match(lin1, lin2, "genus")
+    assert not is_lineage_match(lin1, lin2, "species")
 
 
 def test_pop_to_rank_1():
     # basic behavior - pop to order?
-    lin1 = make_lineage('d__a;p__b;c__c;o__d')
-    lin2 = make_lineage('d__a;p__b;c__c;o__d;f__f')
+    lin1 = make_lineage("d__a;p__b;c__c;o__d")
+    lin2 = make_lineage("d__a;p__b;c__c;o__d;f__f")
 
     print(lin1)
-    print(pop_to_rank(lin2, 'order'))
-    assert pop_to_rank(lin2, 'order') == lin1
+    print(pop_to_rank(lin2, "order"))
+    assert pop_to_rank(lin2, "order") == lin1
 
 
 def test_pop_to_rank_2():
     # what if we're already above rank?
-    lin2 = make_lineage('d__a;p__b;c__c;o__d;f__f')
+    lin2 = make_lineage("d__a;p__b;c__c;o__d;f__f")
 
-    print(pop_to_rank(lin2, 'species'))
-    assert pop_to_rank(lin2, 'species') == lin2
+    print(pop_to_rank(lin2, "species"))
+    assert pop_to_rank(lin2, "species") == lin2
diff --git a/tests/test_manifest.py b/tests/test_manifest.py
index 074d72d705..138ae0f829 100644
--- a/tests/test_manifest.py
+++ b/tests/test_manifest.py
@@ -12,13 +12,13 @@
 
 def test_generate_manifest():
     # test basic manifest-generating functionality.
-    protzip = utils.get_test_data('prot/protein.zip')
+    protzip = utils.get_test_data("prot/protein.zip")
 
     loader = sourmash.load_file_as_index(protzip)
 
     rows = []
     siglist = []
-    for (sig, loc) in loader._signatures_with_internal():
+    for sig, loc in loader._signatures_with_internal():
         row = index.CollectionManifest.make_manifest_row(sig, loc)
         rows.append(row)
         siglist.append(sig)
@@ -28,9 +28,9 @@ def test_generate_manifest():
     assert len(manifest) == len(rows)
     assert len(manifest) == 2
 
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list
-    assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "16869d2c8a1d29d1c8e56f5c561e585e" in md5_list
+    assert "120d311cc785cc9d0df9dc0646b2b857" in md5_list
 
     for sig in siglist:
         assert sig in manifest
@@ -38,13 +38,13 @@ def test_generate_manifest():
 
 def test_manifest_operations():
     # test basic manifest operations - +=
-    protzip = utils.get_test_data('prot/protein.zip')
+    protzip = utils.get_test_data("prot/protein.zip")
 
     loader = sourmash.load_file_as_index(protzip)
 
     rows = []
     siglist = []
-    for (sig, loc) in loader._signatures_with_internal():
+    for sig, loc in loader._signatures_with_internal():
         row = index.CollectionManifest.make_manifest_row(sig, loc)
         rows.append(row)
         siglist.append(sig)
@@ -53,24 +53,24 @@ def test_manifest_operations():
     manifest2 = index.CollectionManifest(rows)
     manifest += manifest2
 
-    assert len(manifest) == 2*len(rows)
+    assert len(manifest) == 2 * len(rows)
     assert len(manifest) == 4
 
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list
-    assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "16869d2c8a1d29d1c8e56f5c561e585e" in md5_list
+    assert "120d311cc785cc9d0df9dc0646b2b857" in md5_list
 
 
 def test_manifest_operations_fail():
     # should not be able to add a manifest to itself - not only makes
     # no sense, but it means you're modifying a generator in place, sometimes.
-    protzip = utils.get_test_data('prot/protein.zip')
+    protzip = utils.get_test_data("prot/protein.zip")
 
     loader = sourmash.load_file_as_index(protzip)
 
     rows = []
     siglist = []
-    for (sig, loc) in loader._signatures_with_internal():
+    for sig, loc in loader._signatures_with_internal():
         row = index.CollectionManifest.make_manifest_row(sig, loc)
         rows.append(row)
         siglist.append(sig)
@@ -82,13 +82,13 @@ def test_manifest_operations_fail():
 
 def test_manifest_to_picklist():
     # test manifest/picklist interaction basics
-    protzip = utils.get_test_data('prot/protein.zip')
+    protzip = utils.get_test_data("prot/protein.zip")
 
     loader = sourmash.load_file_as_index(protzip)
 
     rows = []
     siglist = []
-    for (sig, loc) in loader._signatures_with_internal():
+    for sig, loc in loader._signatures_with_internal():
         row = index.CollectionManifest.make_manifest_row(sig, loc)
         rows.append(row)
         siglist.append(sig)
@@ -103,7 +103,7 @@ def test_manifest_to_picklist():
 
 def test_manifest_compare():
     # test saving and loading manifests
-    protzip = utils.get_test_data('prot/protein.zip')
+    protzip = utils.get_test_data("prot/protein.zip")
 
     loader = sourmash.load_file_as_index(protzip)
     manifest = loader.manifest
@@ -124,7 +124,7 @@ def test_manifest_compare():
     # not equal / diff values
     rows = list(manifest.rows)
     rows[0] = dict(rows[0])
-    rows[0]['internal_location'] += '.foo'
+    rows[0]["internal_location"] += ".foo"
 
     short_mf = index.CollectionManifest(rows)
     assert short_mf != manifest
@@ -132,13 +132,13 @@ def test_manifest_compare():
 
 def test_save_load_manifest():
     # test saving and loading manifests
-    protzip = utils.get_test_data('prot/protein.zip')
+    protzip = utils.get_test_data("prot/protein.zip")
 
     loader = sourmash.load_file_as_index(protzip)
 
     rows = []
     siglist = []
-    for (sig, loc) in loader._signatures_with_internal():
+    for sig, loc in loader._signatures_with_internal():
         row = index.CollectionManifest.make_manifest_row(sig, loc)
         rows.append(row)
         siglist.append(sig)
@@ -179,7 +179,7 @@ def test_save_load_manifest():
     # not equal / diff values
     rows = list(manifest.rows)
     rows[0] = dict(rows[0])
-    rows[0]['internal_location'] += '.foo'
+    rows[0]["internal_location"] += ".foo"
 
     short_mf = index.CollectionManifest(rows)
     assert short_mf != manifest
@@ -189,8 +189,7 @@ def test_manifest_to_picklist_bug(runtmp):
     # this tests a fun combination of things that led to a bug.
     # tl;dr we only want to iterate once across a generator...
     # ref #2762
-    c = runtmp
-    all_zip = utils.get_test_data('prot/all.zip')
+    all_zip = utils.get_test_data("prot/all.zip")
 
     idx = sourmash_args.load_file_as_index(all_zip)
     assert len(idx) == 8
@@ -201,7 +200,7 @@ def test_manifest_to_picklist_bug(runtmp):
     def filter_fn(row):
         # match?
         keep = False
-        if "09a0869" in row['md5']:
+        if "09a0869" in row["md5"]:
             keep = True
 
         return keep
@@ -219,17 +218,17 @@ def filter_fn(row):
 
 def test_generate_manifest_iterate_once():
     # we should only iterate across manifest rows once
-    protzip = utils.get_test_data('prot/protein.zip')
+    protzip = utils.get_test_data("prot/protein.zip")
 
     loader = sourmash.load_file_as_index(protzip)
 
     siglist = []
-    for (sig, loc) in loader._signatures_with_internal():
+    for sig, loc in loader._signatures_with_internal():
         siglist.append(sig)
 
     # build generator function => will not allow iteration twice
     def genfn():
-        for (sig, loc) in loader._signatures_with_internal():
+        for sig, loc in loader._signatures_with_internal():
             row = index.CollectionManifest.make_manifest_row(sig, loc)
             yield row
 
@@ -238,9 +237,9 @@ def genfn():
     assert len(manifest) == 2
     assert len(manifest._md5_set) == 2
 
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list
-    assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "16869d2c8a1d29d1c8e56f5c561e585e" in md5_list
+    assert "120d311cc785cc9d0df9dc0646b2b857" in md5_list
 
     for sig in siglist:
         assert sig in manifest
diff --git a/tests/test_manifest_protocol.py b/tests/test_manifest_protocol.py
index 5b9ea003d5..d36e8a309c 100644
--- a/tests/test_manifest_protocol.py
+++ b/tests/test_manifest_protocol.py
@@ -13,7 +13,7 @@
 
 def build_simple_manifest(runtmp):
     # load and return the manifest from prot/all.zip
-    filename = utils.get_test_data('prot/all.zip')
+    filename = utils.get_test_data("prot/all.zip")
     idx = sourmash.load_file_as_index(filename)
     mf = idx.manifest
     assert len(mf) == 8
@@ -22,29 +22,29 @@ def build_simple_manifest(runtmp):
 
 def build_sqlite_manifest(runtmp):
     # return the manifest from prot/all.zip
-    filename = utils.get_test_data('prot/all.zip')
+    filename = utils.get_test_data("prot/all.zip")
     idx = sourmash.load_file_as_index(filename)
     mf = idx.manifest
 
     # build sqlite manifest from this 'un
-    mfdb = runtmp.output('test.sqlmf')
+    mfdb = runtmp.output("test.sqlmf")
     return SqliteCollectionManifest.load_from_manifest(mf, dbfile=mfdb)
-    
+
 
 def save_load_manifest(runtmp):
     # save/load the manifest from a CSV.
     mf = build_simple_manifest(runtmp)
 
-    mf_csv = runtmp.output('mf.csv')
+    mf_csv = runtmp.output("mf.csv")
     mf.write_to_filename(mf_csv)
 
     load_mf = CollectionManifest.load_from_filename(mf_csv)
     return load_mf
-    
 
-@pytest.fixture(params=[build_simple_manifest,
-                        save_load_manifest,
-                        build_sqlite_manifest])
+
+@pytest.fixture(
+    params=[build_simple_manifest, save_load_manifest, build_sqlite_manifest]
+)
 def manifest_obj(request, runtmp):
     build_fn = request.param
 
@@ -55,6 +55,7 @@ def manifest_obj(request, runtmp):
 ### generic CollectionManifeset tests go here
 ###
 
+
 def test_manifest_len(manifest_obj):
     # check that 'len' works
     assert len(manifest_obj) == 8
@@ -78,39 +79,38 @@ def test_manifest_bool(manifest_obj):
 
 def test_make_manifest_row(manifest_obj):
     # build a manifest row from a signature
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss = sourmash.load_one_signature(sig47)
 
-    row = manifest_obj.make_manifest_row(ss, 'foo', include_signature=False)
-    assert not 'signature' in row
-    assert row['internal_location'] == 'foo'
-
-    assert row['md5'] == ss.md5sum()
-    assert row['md5short'] == ss.md5sum()[:8]
-    assert row['ksize'] == 31
-    assert row['moltype'] == 'DNA'
-    assert row['num'] == 0
-    assert row['scaled'] == 1000
-    assert row['n_hashes'] == len(ss.minhash)
-    assert not row['with_abundance']
-    assert row['name'] == ss.name
-    assert row['filename'] == ss.filename
-
-    
+    row = manifest_obj.make_manifest_row(ss, "foo", include_signature=False)
+    assert "signature" not in row
+    assert row["internal_location"] == "foo"
+
+    assert row["md5"] == ss.md5sum()
+    assert row["md5short"] == ss.md5sum()[:8]
+    assert row["ksize"] == 31
+    assert row["moltype"] == "DNA"
+    assert row["num"] == 0
+    assert row["scaled"] == 1000
+    assert row["n_hashes"] == len(ss.minhash)
+    assert not row["with_abundance"]
+    assert row["name"] == ss.name
+    assert row["filename"] == ss.filename
+
+
 def test_manifest_create_manifest(manifest_obj):
     # test the 'create_manifest' method
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss = sourmash.load_one_signature(sig47)
 
     def yield_sigs():
-        yield ss, 'fiz'
+        yield ss, "fiz"
 
-    new_mf = manifest_obj.create_manifest(yield_sigs(),
-                                          include_signature=False)
+    new_mf = manifest_obj.create_manifest(yield_sigs(), include_signature=False)
     assert len(new_mf) == 1
     new_row = list(new_mf.rows)[0]
-    
-    row = manifest_obj.make_manifest_row(ss, 'fiz', include_signature=False)
+
+    row = manifest_obj.make_manifest_row(ss, "fiz", include_signature=False)
 
     required_keys = BaseCollectionManifest.required_keys
     for k in required_keys:
@@ -119,32 +119,37 @@ def yield_sigs():
 
 def test_manifest_select_to_manifest(manifest_obj):
     # do some light testing of 'select_to_manifest'
-    new_mf = manifest_obj.select_to_manifest(moltype='DNA')
+    new_mf = manifest_obj.select_to_manifest(moltype="DNA")
     assert len(new_mf) == 2
 
 
 def test_manifest_locations(manifest_obj):
     # check the 'locations' method
-    locs = set(['dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig',
-                'dayhoff/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig',
-                'hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig',
-                'hp/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig',
-                'protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig',
-                'protein/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig',
-                'dna-sig.noext',
-                'dna-sig.sig.gz']
-               )
+    locs = set(
+        [
+            "dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig",
+            "dayhoff/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig",
+            "hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig",
+            "hp/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig",
+            "protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig",
+            "protein/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig",
+            "dna-sig.noext",
+            "dna-sig.sig.gz",
+        ]
+    )
     assert set(manifest_obj.locations()) == locs
 
 
 def test_manifest___contains__(manifest_obj):
     # check the 'in' operator
-    sigfile = utils.get_test_data('prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
+    sigfile = utils.get_test_data(
+        "prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
     ss = sourmash.load_one_signature(sigfile)
 
     assert ss in manifest_obj
 
-    sigfile2 = utils.get_test_data('2.fa.sig')
+    sigfile2 = utils.get_test_data("2.fa.sig")
     ss2 = sourmash.load_one_signature(sigfile2, ksize=31)
     assert ss2 not in manifest_obj
 
@@ -159,36 +164,37 @@ def test_manifest_to_picklist(manifest_obj):
 
 def test_manifest_filter_rows(manifest_obj):
     # test filter_rows
-    filter_fn = lambda x: 'OS223' in x['name']
+    def filter_fn(x):
+        return "OS223" in x["name"]
 
     mf = manifest_obj.filter_rows(filter_fn)
 
     assert len(mf) == 1
     row = list(mf.rows)[0]
-    assert row['name'] == 'NC_011663.1 Shewanella baltica OS223, complete genome'
+    assert row["name"] == "NC_011663.1 Shewanella baltica OS223, complete genome"
 
 
 def test_manifest_filter_cols(manifest_obj):
     # test filter_rows
-    col_filter_fn = lambda x: 'OS223' in x[0]
+    def col_filter_fn(x):
+        return "OS223" in x[0]
 
-    mf = manifest_obj.filter_on_columns(col_filter_fn, ['name'])
+    mf = manifest_obj.filter_on_columns(col_filter_fn, ["name"])
 
     assert len(mf) == 1
     row = list(mf.rows)[0]
-    assert row['name'] == 'NC_011663.1 Shewanella baltica OS223, complete genome'
+    assert row["name"] == "NC_011663.1 Shewanella baltica OS223, complete genome"
 
 
 def test_manifest_iadd(manifest_obj):
     # test the 'create_manifest' method
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss = sourmash.load_one_signature(sig47)
 
     def yield_sigs():
-        yield ss, 'fiz'
+        yield ss, "fiz"
 
-    new_mf = manifest_obj.create_manifest(yield_sigs(),
-                                          include_signature=False)
+    new_mf = manifest_obj.create_manifest(yield_sigs(), include_signature=False)
     assert len(new_mf) == 1
 
     new_mf += manifest_obj
@@ -197,14 +203,13 @@ def yield_sigs():
 
 def test_manifest_add(manifest_obj):
     # test the 'create_manifest' method
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss = sourmash.load_one_signature(sig47)
 
     def yield_sigs():
-        yield ss, 'fiz'
+        yield ss, "fiz"
 
-    new_mf = manifest_obj.create_manifest(yield_sigs(),
-                                          include_signature=False)
+    new_mf = manifest_obj.create_manifest(yield_sigs(), include_signature=False)
     assert len(new_mf) == 1
 
     new_mf2 = new_mf + manifest_obj
diff --git a/tests/test_minhash.py b/tests/test_minhash.py
index 05802c0bff..474f1e231a 100644
--- a/tests/test_minhash.py
+++ b/tests/test_minhash.py
@@ -49,7 +49,7 @@
     hash_murmur,
     _get_scaled_for_max_hash,
     _get_max_hash_for_scaled,
-    translate_codon
+    translate_codon,
 )
 from sourmash import signature
 
@@ -79,18 +79,18 @@ def _kmers_from_all_coding_frames(sequence, ksize):
     for frame in (0, 1, 2):
         # get forward k-mers
         for start in range(0, len(sequence) - ksize + 1 - frame, 3):
-            kmer = sequence[start + frame:start + frame + ksize]
+            kmer = sequence[start + frame : start + frame + ksize]
             yield kmer
 
         # get rc k-mers
         for start in range(0, len(seqrc) - ksize + 1 - frame, 3):
-            kmer = seqrc[start + frame:start + frame + ksize]
+            kmer = seqrc[start + frame : start + frame + ksize]
             yield kmer
 
 
 def _hash_fwd_only(mh_translate, seq):
     "Return the first hashval only, for coding frame +1."
-    assert len(seq) == mh_translate.ksize*3
+    assert len(seq) == mh_translate.ksize * 3
     xx = mh_translate.seq_to_hashes(seq)[0]
     return xx
 
@@ -98,12 +98,12 @@ def _hash_fwd_only(mh_translate, seq):
 def test_basic_dna(track_abundance):
     # verify that MHs of size 1 stay size 1, & act properly as bottom sketches.
     mh = MinHash(1, 4, track_abundance=track_abundance)
-    assert mh.moltype == 'DNA'
+    assert mh.moltype == "DNA"
 
-    mh.add_sequence('ATGC')
+    mh.add_sequence("ATGC")
     a = mh.hashes
 
-    mh.add_sequence('GCAT')             # this will not get added; hash > ATGC
+    mh.add_sequence("GCAT")  # this will not get added; hash > ATGC
     b = mh.hashes
 
     print(a, b)
@@ -117,7 +117,7 @@ def test_div_zero(track_abundance):
     mh = MinHash(1, 4, track_abundance=track_abundance)
     mh2 = mh.copy_and_clear()
 
-    mh.add_sequence('ATGC')
+    mh.add_sequence("ATGC")
     assert mh.similarity(mh2) == 0
     assert mh2.similarity(mh) == 0
 
@@ -127,7 +127,7 @@ def test_div_zero_contained(track_abundance):
     mh = MinHash(0, 4, scaled=1, track_abundance=track_abundance)
     mh2 = mh.copy_and_clear()
 
-    mh.add_sequence('ATGC')
+    mh.add_sequence("ATGC")
     assert mh.contained_by(mh2) == 0
     assert mh2.contained_by(mh) == 0
 
@@ -137,8 +137,8 @@ def test_contained_requires_scaled(track_abundance):
     mh1 = MinHash(1, 4, track_abundance=track_abundance)
     mh2 = MinHash(0, 4, scaled=1, track_abundance=track_abundance)
 
-    mh1.add_sequence('ATGC')
-    mh2.add_sequence('ATGC')
+    mh1.add_sequence("ATGC")
+    mh2.add_sequence("ATGC")
 
     with pytest.raises(TypeError):
         mh2.contained_by(mh1)
@@ -152,8 +152,8 @@ def test_contained_requires_scaled_2(track_abundance):
     mh1 = MinHash(1, 4, track_abundance=track_abundance)
     mh2 = MinHash(0, 4, scaled=1, track_abundance=track_abundance)
 
-    mh1.add_sequence('ATGC')
-    mh2.add_sequence('ATGC')
+    mh1.add_sequence("ATGC")
+    mh2.add_sequence("ATGC")
 
     with pytest.raises(TypeError):
         mh2.max_containment(mh1)
@@ -167,8 +167,8 @@ def test_contained_requires_scaled_3(track_abundance):
     mh1 = MinHash(1, 4, track_abundance=track_abundance)
     mh2 = MinHash(0, 4, scaled=1, track_abundance=track_abundance)
 
-    mh1.add_sequence('ATGC')
-    mh2.add_sequence('ATGC')
+    mh1.add_sequence("ATGC")
+    mh2.add_sequence("ATGC")
 
     with pytest.raises(TypeError):
         mh2.avg_containment(mh1)
@@ -179,36 +179,39 @@ def test_contained_requires_scaled_3(track_abundance):
 
 def test_bytes_dna(track_abundance):
     mh = MinHash(1, 4, track_abundance=track_abundance)
-    mh.add_sequence('ATGC')
-    mh.add_sequence(b'ATGC')
-    mh.add_sequence('ATGC')
+    mh.add_sequence("ATGC")
+    mh.add_sequence(b"ATGC")
+    mh.add_sequence("ATGC")
     a = mh.hashes
 
-    mh.add_sequence('GCAT')             # this will not get added; hash > ATGC
-    mh.add_sequence(b'GCAT')             # this will not get added; hash > ATGC
-    mh.add_sequence('GCAT')             # this will not get added; hash > ATGC
+    mh.add_sequence("GCAT")  # this will not get added; hash > ATGC
+    mh.add_sequence(b"GCAT")  # this will not get added; hash > ATGC
+    mh.add_sequence("GCAT")  # this will not get added; hash > ATGC
     b = mh.hashes
 
     print(a, b)
     assert list(a) == list(b)
     assert len(b) == 1
 
+
 def test_add_long_seqs_force():
     # Test for (All kmers are invalid)
 
-    mh = sourmash.minhash.MinHash(n = 0, ksize=21, scaled =10, seed = 42)
+    mh = sourmash.minhash.MinHash(n=0, ksize=21, scaled=10, seed=42)
     seq = "ACGTN" * 100000
-    hashes = mh.seq_to_hashes(seq, force = True)
-    assert(len(mh.hashes) == 0)
+    mh.seq_to_hashes(seq, force=True)
+    assert len(mh.hashes) == 0
 
 
 def test_seq_to_hashes(track_abundance):
-    mh = sourmash.minhash.MinHash(n=0, ksize=21, scaled=1, track_abundance=track_abundance)
+    mh = sourmash.minhash.MinHash(
+        n=0, ksize=21, scaled=1, track_abundance=track_abundance
+    )
     seq = "ATGAGAGACGATAGACAGATGACC"
     mh.add_sequence(seq)
 
     golden_hashes = mh.hashes
-    
+
     # New seq to hashes without adding to the sketch
     new_hashes = mh.seq_to_hashes(seq)
 
@@ -216,7 +219,14 @@ def test_seq_to_hashes(track_abundance):
 
 
 def test_seq_to_hashes_protein_1(track_abundance, dayhoff):
-    mh = MinHash(10, 2, is_protein=True, dayhoff=dayhoff, hp=False, track_abundance=track_abundance)
+    mh = MinHash(
+        10,
+        2,
+        is_protein=True,
+        dayhoff=dayhoff,
+        hp=False,
+        track_abundance=track_abundance,
+    )
     prot_seq = "AGYYG"
 
     mh.add_protein(prot_seq)
@@ -224,16 +234,19 @@ def test_seq_to_hashes_protein_1(track_abundance, dayhoff):
     golden_hashes = mh.hashes
 
     # New seq to hashes without adding to the sketch
-    new_hashes = mh.seq_to_hashes(prot_seq, is_protein = True)
+    new_hashes = mh.seq_to_hashes(prot_seq, is_protein=True)
 
     assert set(golden_hashes) == set(new_hashes)
 
+
 def test_seq_to_hashes_protein_2(track_abundance):
-    mh = sourmash.minhash.MinHash(n=0, ksize=21, scaled=1, track_abundance=track_abundance)
+    mh = sourmash.minhash.MinHash(
+        n=0, ksize=21, scaled=1, track_abundance=track_abundance
+    )
     seq = "ATGAGAGACGATAGACAGATGACC"
 
     with pytest.raises(ValueError):
-        mh.seq_to_hashes(seq, is_protein = True)
+        mh.seq_to_hashes(seq, is_protein=True)
 
 
 def test_seq_to_hashes_translated(track_abundance):
@@ -252,7 +265,7 @@ def test_seq_to_hashes_translated(track_abundance):
 def test_seq_to_hashes_bad_kmers_as_zeroes_1():
     mh = sourmash.minhash.MinHash(n=0, ksize=21, scaled=1)
     seq = "ATGAGAGACGATAGACAGATGACN"
-    
+
     # New seq to hashes without adding to the sketch
     hashes = mh.seq_to_hashes(seq, force=True, bad_kmers_as_zeroes=True)
 
@@ -262,54 +275,69 @@ def test_seq_to_hashes_bad_kmers_as_zeroes_1():
 def test_seq_to_hashes_bad_kmers_as_zeroes_2():
     mh = sourmash.minhash.MinHash(n=0, ksize=21, scaled=1)
     seq = "ATGAGAGACGATAGACAGATGACN"
-    
+
     with pytest.raises(ValueError):
-        hashes = mh.seq_to_hashes(seq, bad_kmers_as_zeroes=True)
+        mh.seq_to_hashes(seq, bad_kmers_as_zeroes=True)
 
 
 def test_seq_to_hashes_translated_short():
-    mh = MinHash(0, 2, is_protein=True, dayhoff=True, hp=False, scaled = 1)
+    mh = MinHash(0, 2, is_protein=True, dayhoff=True, hp=False, scaled=1)
     hashes = mh.seq_to_hashes("ACTGA")
 
-    assert(len(hashes) == 0)
+    assert len(hashes) == 0
 
 
 def test_bytes_protein_dayhoff(track_abundance, dayhoff):
     # verify that we can hash protein/aa sequences
-    mh = MinHash(10, 2, is_protein=True, dayhoff=dayhoff, hp=False,
-                 track_abundance=track_abundance)
-
-    expected_moltype = 'protein'
+    mh = MinHash(
+        10,
+        2,
+        is_protein=True,
+        dayhoff=dayhoff,
+        hp=False,
+        track_abundance=track_abundance,
+    )
+
+    expected_moltype = "protein"
     if dayhoff:
-        expected_moltype = 'dayhoff'
+        expected_moltype = "dayhoff"
     assert mh.moltype == expected_moltype
 
-    mh.add_protein('AGYYG')
-    mh.add_protein('AGYYG')
-    mh.add_protein(b'AGYYG')
+    mh.add_protein("AGYYG")
+    mh.add_protein("AGYYG")
+    mh.add_protein(b"AGYYG")
 
     assert len(mh.hashes) == 4
 
 
 def test_protein_dayhoff(track_abundance, dayhoff):
     # verify that we can hash protein/aa sequences
-    mh = MinHash(10, 2, is_protein=True, dayhoff=dayhoff, hp=False, track_abundance=track_abundance)
-    mh.add_protein('AGYYG')
+    mh = MinHash(
+        10,
+        2,
+        is_protein=True,
+        dayhoff=dayhoff,
+        hp=False,
+        track_abundance=track_abundance,
+    )
+    mh.add_protein("AGYYG")
 
     assert len(mh.hashes) == 4
 
 
 def test_bytes_protein_hp(track_abundance, hp):
     # verify that we can hash protein/aa sequences
-    mh = MinHash(10, 2, is_protein=True, dayhoff=False, hp=hp, track_abundance=track_abundance)
-    expected_moltype = 'protein'
+    mh = MinHash(
+        10, 2, is_protein=True, dayhoff=False, hp=hp, track_abundance=track_abundance
+    )
+    expected_moltype = "protein"
     if hp:
-        expected_moltype = 'hp'
+        expected_moltype = "hp"
     assert mh.moltype == expected_moltype
 
-    mh.add_protein('AGYYG')
-    mh.add_protein(u'AGYYG')
-    mh.add_protein(b'AGYYG')
+    mh.add_protein("AGYYG")
+    mh.add_protein("AGYYG")
+    mh.add_protein(b"AGYYG")
 
     if hp:
         assert len(mh.hashes) == 1
@@ -319,8 +347,10 @@ def test_bytes_protein_hp(track_abundance, hp):
 
 def test_protein_hp(track_abundance, hp):
     # verify that we can hash protein/aa sequences
-    mh = MinHash(10, 2, is_protein=True, dayhoff=False, hp=hp, track_abundance=track_abundance)
-    mh.add_protein('AGYYG')
+    mh = MinHash(
+        10, 2, is_protein=True, dayhoff=False, hp=hp, track_abundance=track_abundance
+    )
+    mh.add_protein("AGYYG")
 
     if hp:
         assert len(mh.hashes) == 1
@@ -330,8 +360,8 @@ def test_protein_hp(track_abundance, hp):
 
 def test_module_translate_codon(track_abundance):
     # Ensure that translation occurs properly - module level function tests
-    assert "S" == translate_codon('TCT')
-    assert "S" == translate_codon('TC')
+    assert "S" == translate_codon("TCT")
+    assert "S" == translate_codon("TC")
     assert "X" == translate_codon("T")
 
     with pytest.raises(ValueError):
@@ -341,14 +371,15 @@ def test_module_translate_codon(track_abundance):
 
 def test_dayhoff(track_abundance):
     # verify that we can hash to dayhoff-encoded protein/aa sequences
-    mh_dayhoff = MinHash(10, 2, is_protein=True,
-                         dayhoff=True, hp=False, track_abundance=track_abundance)
-    mh_dayhoff.add_sequence('ACTGAC')
+    mh_dayhoff = MinHash(
+        10, 2, is_protein=True, dayhoff=True, hp=False, track_abundance=track_abundance
+    )
+    mh_dayhoff.add_sequence("ACTGAC")
 
     assert len(mh_dayhoff.hashes) == 2
     # verify that dayhoff-encoded hashes are different from protein/aa hashes
     mh_protein = MinHash(10, 2, is_protein=True, track_abundance=track_abundance)
-    mh_protein.add_sequence('ACTGAC')
+    mh_protein.add_sequence("ACTGAC")
 
     assert len(mh_protein.hashes) == 2
     print(mh_protein.hashes)
@@ -360,39 +391,40 @@ def test_dayhoff_2(track_abundance):
     mh = MinHash(0, 7, scaled=1, dayhoff=True, track_abundance=1)
 
     # first, check protein -> dayhoff hashes via minhash
-    mh.add_protein('CADHIFC')
+    mh.add_protein("CADHIFC")
     assert len(mh) == 1
     hashval = list(mh.hashes)[0]
-    assert hashval == hash_murmur('abcdefa')
+    assert hashval == hash_murmur("abcdefa")
 
     # also check seq_to_hashes
-    hashes = list(mh.seq_to_hashes('CADHIFC', is_protein=True))
+    hashes = list(mh.seq_to_hashes("CADHIFC", is_protein=True))
     assert hashval == hashes[0]
 
     # do we handle stop codons properly?
     mh = mh.copy_and_clear()
-    mh.add_protein('CADHIF*')
+    mh.add_protein("CADHIF*")
     assert len(mh) == 1
     hashval = list(mh.hashes)[0]
-    assert hashval == hash_murmur('abcdef*')
+    assert hashval == hash_murmur("abcdef*")
 
     # again, check seq_to_hashes
-    hashes = list(mh.seq_to_hashes('CADHIF*', is_protein=True))
+    hashes = list(mh.seq_to_hashes("CADHIF*", is_protein=True))
     assert hashval == hashes[0]
 
 
 def test_hp(track_abundance):
     # verify that we can hash to hp-encoded protein/aa sequences
-    mh_hp = MinHash(10, 2, is_protein=True,
-                    dayhoff=False, hp=True, track_abundance=track_abundance)
-    assert mh_hp.moltype == 'hp'
+    mh_hp = MinHash(
+        10, 2, is_protein=True, dayhoff=False, hp=True, track_abundance=track_abundance
+    )
+    assert mh_hp.moltype == "hp"
 
-    mh_hp.add_sequence('ACTGAC')
+    mh_hp.add_sequence("ACTGAC")
 
     assert len(mh_hp.hashes) == 2
     # verify that hp-encoded hashes are different from protein/aa hashes
     mh_protein = MinHash(10, 2, is_protein=True, track_abundance=track_abundance)
-    mh_protein.add_sequence('ACTGAC')
+    mh_protein.add_sequence("ACTGAC")
 
     assert len(mh_protein.hashes) == 2
     assert mh_protein.hashes != mh_hp.hashes
@@ -401,30 +433,30 @@ def test_hp(track_abundance):
 def test_hp_2(track_abundance):
     mh = MinHash(0, 3, scaled=1, hp=True, track_abundance=track_abundance)
 
-    mh.add_protein('ANA')
+    mh.add_protein("ANA")
     assert len(mh) == 1
     hashval = list(mh.hashes)[0]
-    assert hashval == hash_murmur('hph')
+    assert hashval == hash_murmur("hph")
 
     # also check seq_to_hashes
-    hashes = list(mh.seq_to_hashes('ANA', is_protein=True))
+    hashes = list(mh.seq_to_hashes("ANA", is_protein=True))
     assert hashval == hashes[0]
 
     mh = mh.copy_and_clear()
-    mh.add_protein('AN*')
+    mh.add_protein("AN*")
     assert len(mh) == 1
     hashval = list(mh.hashes)[0]
-    assert hashval == hash_murmur('hp*')
+    assert hashval == hash_murmur("hp*")
 
     # also check seq_to_hashes
-    hashes = list(mh.seq_to_hashes('AN*', is_protein=True))
+    hashes = list(mh.seq_to_hashes("AN*", is_protein=True))
     assert hashval == hashes[0]
 
 
 def test_protein_short(track_abundance):
     # verify that we can hash protein/aa sequences
     mh = MinHash(10, 9, is_protein=True, track_abundance=track_abundance)
-    mh.add_protein('AG')
+    mh.add_protein("AG")
 
     assert len(mh.hashes) == 0, mh.hashes
 
@@ -436,14 +468,14 @@ def test_size_limit(track_abundance):
     mh.add_hash(20)
     mh.add_hash(30)
     assert list(sorted(mh.hashes)) == [10, 20, 30]
-    mh.add_hash(5) # -> should push 30 off end
+    mh.add_hash(5)  # -> should push 30 off end
     assert list(sorted(mh.hashes)) == [5, 10, 20]
 
 
 def test_scaled(track_abundance):
     # test behavior with scaled
     scaled = _get_scaled_for_max_hash(35)
-    print('XX', scaled, _get_max_hash_for_scaled(scaled))
+    print("XX", scaled, _get_max_hash_for_scaled(scaled))
     mh = MinHash(0, 4, track_abundance=track_abundance, scaled=scaled)
     assert mh._max_hash == 35
 
@@ -461,11 +493,11 @@ def test_scaled(track_abundance):
 def test_no_scaled(track_abundance):
     # no 'scaled', num=0 - should fail
     with pytest.raises(ValueError):
-        mh = MinHash(0, 4, track_abundance=track_abundance)
+        MinHash(0, 4, track_abundance=track_abundance)
 
 
 def test_max_hash_conversion():
-    SCALED=100000
+    SCALED = 100000
     max_hash = _get_max_hash_for_scaled(SCALED)
     new_scaled = _get_scaled_for_max_hash(max_hash)
     assert new_scaled == SCALED
@@ -481,15 +513,15 @@ def test_max_hash_and_scaled_zero():
 def test_max_hash_and_scaled_error(track_abundance):
     # test behavior when supplying both max_hash and scaled
     with pytest.raises(ValueError):
-        mh = MinHash(0, 4, track_abundance=track_abundance, max_hash=35,
-                     scaled=5)
+        MinHash(0, 4, track_abundance=track_abundance, max_hash=35, scaled=5)
 
 
 def test_max_hash_cannot_limit(track_abundance):
     # make sure you can't set both n and scaled.
     with pytest.raises(ValueError):
-        mh = MinHash(2, 4, track_abundance=track_abundance,
-                     scaled=_get_scaled_for_max_hash(1))
+        MinHash(
+            2, 4, track_abundance=track_abundance, scaled=_get_scaled_for_max_hash(1)
+        )
 
 
 def test_no_downsample_scaled_if_n(track_abundance):
@@ -498,13 +530,13 @@ def test_no_downsample_scaled_if_n(track_abundance):
     with pytest.raises(ValueError) as excinfo:
         mh.downsample(scaled=100000000)
 
-    assert 'cannot downsample a num MinHash using scaled' in str(excinfo.value)
+    assert "cannot downsample a num MinHash using scaled" in str(excinfo.value)
 
 
 def test_scaled_num_both(track_abundance):
     # make sure you can't set both max_n and scaled.
     with pytest.raises(ValueError):
-        mh = MinHash(2, 4, track_abundance=track_abundance, scaled=2)
+        MinHash(2, 4, track_abundance=track_abundance, scaled=2)
 
 
 def test_mh_jaccard_similarity():
@@ -514,7 +546,7 @@ def test_mh_jaccard_similarity():
     a.add_many([1, 3, 5, 8])
     b.add_many([1, 3, 5, 6, 8, 10])
 
-    assert a.similarity(b) == 4. / 6.
+    assert a.similarity(b) == 4.0 / 6.0
 
 
 def test_mh_similarity_downsample_jaccard_value():
@@ -526,10 +558,10 @@ def test_mh_similarity_downsample_jaccard_value():
     b = MinHash(0, 20, scaled=scaled100, track_abundance=False)
 
     a.add_many([1, 3, 5, 8, 70])
-    b.add_many([1, 3, 5, 6, 8, 10, 70 ])
+    b.add_many([1, 3, 5, 6, 8, 10, 70])
 
     # the hash=70 will be truncated by downsampling
-    assert a.similarity(b, downsample=True) == 4. / 6.
+    assert a.similarity(b, downsample=True) == 4.0 / 6.0
 
 
 def test_mh_angular_similarity():
@@ -539,11 +571,11 @@ def test_mh_angular_similarity():
     # are always positive (https://en.wikipedia.org/wiki/Cosine_similarity)
     a = MinHash(0, 20, scaled=scaled50, track_abundance=True)
     b = MinHash(0, 20, scaled=scaled50, track_abundance=True)
-    a.set_abundances({ 1:5, 3:3, 5:2, 8:2})
-    b.set_abundances({ 1:3, 3:2, 5:1, 6:1, 8:1, 10:1 })
+    a.set_abundances({1: 5, 3: 3, 5: 2, 8: 2})
+    b.set_abundances({1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1})
 
     cos_sim = 0.9356
-    angular_sim = 1 - 2*math.acos(cos_sim) / math.pi
+    angular_sim = 1 - 2 * math.acos(cos_sim) / math.pi
     assert round(angular_sim, 4) == 0.7703
 
     assert round(a.similarity(b), 4) == round(angular_sim, 4)
@@ -553,13 +585,13 @@ def test_mh_angular_similarity_2():
     # check actual angular similarity for a second non-trivial case
     a = MinHash(0, 20, scaled=scaled100, track_abundance=True)
     b = MinHash(0, 20, scaled=scaled100, track_abundance=True)
-    a.set_abundances({ 1:5, 3:3, 5:2, 8:2, 70:70 })
-    b.set_abundances({ 1:3, 3:2, 5:1, 6:1, 8:1, 10:1, 70:70 })
+    a.set_abundances({1: 5, 3: 3, 5: 2, 8: 2, 70: 70})
+    b.set_abundances({1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1, 70: 70})
 
     assert round(a.similarity(b), 4) == 0.9728
 
     # ignore_abundance => jaccard
-    assert a.similarity(b, ignore_abundance=True) == 5. / 7.
+    assert a.similarity(b, ignore_abundance=True) == 5.0 / 7.0
 
 
 def test_mh_similarity_downsample_angular_value():
@@ -570,8 +602,8 @@ def test_mh_similarity_downsample_angular_value():
     # max_hash = 100
     b = MinHash(0, 20, scaled=scaled100, track_abundance=True)
 
-    a.set_abundances({ 1:5, 3:3, 5:2, 8:2, 70:70 })
-    b.set_abundances({ 1:3, 3:2, 5:1, 6:1, 8:1, 10:1, 70:70 })
+    a.set_abundances({1: 5, 3: 3, 5: 2, 8: 2, 70: 70})
+    b.set_abundances({1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1, 70: 70})
 
     # the hash=70 will be truncated by downsampling
     sim = a.similarity(b, downsample=True)
@@ -579,15 +611,16 @@ def test_mh_similarity_downsample_angular_value():
 
     # with ignore_abundance, will be equal to jaccard
     jaccard = a.similarity(b, downsample=True, ignore_abundance=True)
-    assert jaccard == 4. / 6.
+    assert jaccard == 4.0 / 6.0
+
 
 def test_mh_angular_similarity_fail():
     # raise TypeError if calling angular_similarity directly and
     # one or both sketches do not have abundance info
     a = MinHash(0, 20, scaled=scaled50, track_abundance=True)
     b = MinHash(0, 20, scaled=scaled50, track_abundance=False)
-    a_values = { 1:5, 3:3, 5:2, 8:2}
-    b_values = { 1:3, 3:2, 5:1, 6:1, 8:1, 10:1 }
+    a_values = {1: 5, 3: 3, 5: 2, 8: 2}
+    b_values = {1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1}
 
     a.set_abundances(a_values)
     b.add_many(b_values.keys())
@@ -596,14 +629,20 @@ def test_mh_angular_similarity_fail():
     with pytest.raises(TypeError) as exc:
         a.angular_similarity(b)
     print(str(exc))
-    assert "Error: Angular (cosine) similarity requires both sketches to track hash abundance." in str(exc)
+    assert (
+        "Error: Angular (cosine) similarity requires both sketches to track hash abundance."
+        in str(exc)
+    )
     # both sketches lack track abundance
     a = MinHash(0, 20, scaled=scaled50, track_abundance=False)
     a.add_many(a_values.keys())
     with pytest.raises(TypeError) as exc:
         a.angular_similarity(b)
     print(str(exc))
-    assert "Error: Angular (cosine) similarity requires both sketches to track hash abundance." in str(exc)
+    assert (
+        "Error: Angular (cosine) similarity requires both sketches to track hash abundance."
+        in str(exc)
+    )
 
 
 def test_mh_similarity_downsample_true(track_abundance):
@@ -614,8 +653,8 @@ def test_mh_similarity_downsample_true(track_abundance):
     # max_hash = 100
     b = MinHash(0, 20, scaled=scaled100, track_abundance=track_abundance)
 
-    a_values = { 1:5, 3:3, 5:2, 8:2}
-    b_values = { 1:3, 3:2, 5:1, 6:1, 8:1, 10:1 }
+    a_values = {1: 5, 3: 3, 5: 2, 8: 2}
+    b_values = {1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1}
     if track_abundance:
         a.set_abundances(a_values)
         b.set_abundances(b_values)
@@ -642,8 +681,8 @@ def test_mh_similarity_downsample_errors(track_abundance):
     # max_hash = 100
     b = MinHash(0, 20, scaled=scaled100, track_abundance=track_abundance)
 
-    a_values = { 1:5, 3:3, 5:2, 8:2}
-    b_values = { 1:3, 3:2, 5:1, 6:1, 8:1, 10:1 }
+    a_values = {1: 5, 3: 3, 5: 2, 8: 2}
+    b_values = {1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1}
     if track_abundance:
         a.set_abundances(a_values)
         b.set_abundances(b_values)
@@ -653,20 +692,20 @@ def test_mh_similarity_downsample_errors(track_abundance):
 
     # error, incompatible max hash
     with pytest.raises(ValueError) as e:
-        a.similarity(b, ignore_abundance=True)   # downsample=False
-    assert 'mismatch in scaled; comparison fail' in str(e.value)
+        a.similarity(b, ignore_abundance=True)  # downsample=False
+    assert "mismatch in scaled; comparison fail" in str(e.value)
 
     with pytest.raises(ValueError) as e:
         a.similarity(b, ignore_abundance=False)  # downsample=False
-    assert 'mismatch in scaled; comparison fail' in str(e.value)
+    assert "mismatch in scaled; comparison fail" in str(e.value)
 
     with pytest.raises(ValueError) as e:
-        b.similarity(a, ignore_abundance=True)   # downsample=False
-    assert 'mismatch in scaled; comparison fail' in str(e.value)
+        b.similarity(a, ignore_abundance=True)  # downsample=False
+    assert "mismatch in scaled; comparison fail" in str(e.value)
 
     with pytest.raises(ValueError) as e:
         b.similarity(a, ignore_abundance=False)  # downsample=false
-    assert 'mismatch in scaled; comparison fail' in str(e.value)
+    assert "mismatch in scaled; comparison fail" in str(e.value)
 
 
 def test_basic_dna_bad(track_abundance):
@@ -674,10 +713,10 @@ def test_basic_dna_bad(track_abundance):
     mh = MinHash(1, 4, track_abundance=track_abundance)
 
     with pytest.raises(ValueError) as e:
-        mh.add_sequence('ATGR')
+        mh.add_sequence("ATGR")
     print(e)
 
-    assert 'invalid DNA character in input k-mer: ATGR' in str(e.value)
+    assert "invalid DNA character in input k-mer: ATGR" in str(e.value)
 
 
 def test_basic_dna_bad_2(track_abundance):
@@ -685,40 +724,40 @@ def test_basic_dna_bad_2(track_abundance):
     mh = MinHash(1, 6, track_abundance=track_abundance)
 
     with pytest.raises(ValueError):
-        mh.add_protein('YYYY')
+        mh.add_protein("YYYY")
 
 
 def test_basic_dna_bad_force(track_abundance):
     # test behavior on bad DNA; use 100 so multiple hashes get added.
     mh = MinHash(100, 4, track_abundance=track_abundance)
     assert len(mh.hashes) == 0
-    mh.add_sequence('ATGN', True)     # ambiguous kmer skipped.
+    mh.add_sequence("ATGN", True)  # ambiguous kmer skipped.
     assert len(mh.hashes) == 0
-    mh.add_sequence('AATGN', True)    # but good k-mers still used.
+    mh.add_sequence("AATGN", True)  # but good k-mers still used.
     assert len(mh.hashes) == 1
-    mh.add_sequence('AATG', True)     # checking that right kmer was added
-    assert len(mh.hashes) == 1    # (only 1 hash <- this is a dup)
+    mh.add_sequence("AATG", True)  # checking that right kmer was added
+    assert len(mh.hashes) == 1  # (only 1 hash <- this is a dup)
 
 
 def test_basic_dna_bad_force_2(track_abundance):
     # test behavior on bad DNA
     mh = MinHash(100, 4, track_abundance=track_abundance)
     assert len(mh.hashes) == 0
-    mh.add_sequence('AAGNCGG', True)     # ambiguous kmers skipped.
+    mh.add_sequence("AAGNCGG", True)  # ambiguous kmers skipped.
     assert len(mh.hashes) == 0
-    mh.add_sequence('AATGNGCGG', True)  # ambiguous kmers skipped.
+    mh.add_sequence("AATGNGCGG", True)  # ambiguous kmers skipped.
     assert len(mh.hashes) == 2
-    mh.add_sequence('AATG', True)        # checking that right kmers were added
-    mh.add_sequence('GCGG', True)
-    assert len(mh.hashes) == 2       # (only 2 hashes should be there)
+    mh.add_sequence("AATG", True)  # checking that right kmers were added
+    mh.add_sequence("GCGG", True)
+    assert len(mh.hashes) == 2  # (only 2 hashes should be there)
 
 
 def test_consume_lowercase(track_abundance):
     a = MinHash(20, 10, track_abundance=track_abundance)
     b = MinHash(20, 10, track_abundance=track_abundance)
 
-    a.add_sequence('TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA'.lower())
-    b.add_sequence('TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA')
+    a.add_sequence("TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA".lower())
+    b.add_sequence("TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA")
 
     assert round(a.similarity(b), 3) == 1.0
     assert round(b.similarity(b), 3) == 1.0
@@ -730,8 +769,8 @@ def test_similarity_1(track_abundance):
     a = MinHash(20, 10, track_abundance=track_abundance)
     b = MinHash(20, 10, track_abundance=track_abundance)
 
-    a.add_sequence('TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA')
-    b.add_sequence('TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA')
+    a.add_sequence("TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA")
+    b.add_sequence("TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA")
 
     assert round(a.similarity(b), 3) == 1.0
     assert round(b.similarity(b), 3) == 1.0
@@ -739,14 +778,13 @@ def test_similarity_1(track_abundance):
     assert round(a.similarity(a), 3) == 1.0
 
     # add same sequence again
-    b.add_sequence('TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA')
+    b.add_sequence("TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA")
     assert round(a.similarity(b), 3) == 1.0
     assert round(b.similarity(b), 3) == 1.0
     assert round(b.similarity(a), 3) == 1.0
     assert round(a.similarity(a), 3) == 1.0
 
-
-    b.add_sequence('GATTGGTGCACACTTAACTGGGTGCCGCGCTGGTGCTGATCCATGAAGTT')
+    b.add_sequence("GATTGGTGCACACTTAACTGGGTGCCGCGCTGGTGCTGATCCATGAAGTT")
     x = a.similarity(b)
     assert x >= 0.3, x
 
@@ -777,7 +815,7 @@ def test_frozen_copy(track_abundance):
 def test_mh_copy(track_abundance):
     a = MinHash(20, 10, track_abundance=track_abundance)
 
-    a.add_sequence('TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA')
+    a.add_sequence("TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA")
     b = a.__copy__()
     assert round(b.similarity(a), 3) == 1.0
 
@@ -786,7 +824,7 @@ def test_mh_len(track_abundance):
     a = MinHash(20, 10, track_abundance=track_abundance)
 
     assert len(a) == 0
-    a.add_sequence('TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA')
+    a.add_sequence("TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA")
     assert len(a) == 20
 
 
@@ -800,7 +838,7 @@ def test_mh_len_2(track_abundance):
 
 def test_mh_unsigned_long_long(track_abundance):
     a = MinHash(20, 10, track_abundance=track_abundance)
-    a.add_hash(9227159859419181011)        # too big for a C long int.
+    a.add_hash(9227159859419181011)  # too big for a C long int.
     assert 9227159859419181011 in a.hashes
 
 
@@ -826,10 +864,20 @@ def test_mh_count_common_diff_protein(track_abundance):
 
 
 def test_mh_count_common_diff_maxhash(track_abundance):
-    a = MinHash(0, 5, is_protein=False, track_abundance=track_abundance,
-                scaled=_get_scaled_for_max_hash(1))
-    b = MinHash(0, 5, is_protein=True, track_abundance=track_abundance,
-                scaled=_get_scaled_for_max_hash(2))
+    a = MinHash(
+        0,
+        5,
+        is_protein=False,
+        track_abundance=track_abundance,
+        scaled=_get_scaled_for_max_hash(1),
+    )
+    b = MinHash(
+        0,
+        5,
+        is_protein=True,
+        track_abundance=track_abundance,
+        scaled=_get_scaled_for_max_hash(2),
+    )
 
     with pytest.raises(ValueError):
         a.count_common(b)
@@ -991,6 +1039,7 @@ def test_mh_merge_check_length2(track_abundance):
     c.merge(b)
     assert len(c.hashes) == 3
 
+
 def test_mh_asymmetric_merge(track_abundance):
     # test merging two asymmetric (different size) MHs
     a = MinHash(20, 10, track_abundance=track_abundance)
@@ -1055,13 +1104,13 @@ def test_mh_inplace_concat_asymmetric(track_abundance):
     try:
         d.similarity(a)
     except TypeError as exc:
-        assert 'must have same num' in str(exc)
+        assert "must have same num" in str(exc)
 
     a = a.downsample(num=d.num)
     if track_abundance:
-        assert round(d.similarity(a), 3) == 0.795 # see: d += a, above.
+        assert round(d.similarity(a), 3) == 0.795  # see: d += a, above.
     else:
-        assert d.similarity(a) == 1.0 # see: d += a, above.
+        assert d.similarity(a) == 1.0  # see: d += a, above.
 
     c = c.downsample(num=b.num)
     if track_abundance:
@@ -1132,11 +1181,13 @@ def test_mh_similarity_diff_seed(track_abundance):
 
 
 def test_mh_compare_diff_max_hash(track_abundance):
-    a = MinHash(0, 5, track_abundance=track_abundance,
-                scaled=_get_max_hash_for_scaled(5))
+    a = MinHash(
+        0, 5, track_abundance=track_abundance, scaled=_get_max_hash_for_scaled(5)
+    )
 
-    b = MinHash(0, 5, track_abundance=track_abundance,
-                scaled=_get_max_hash_for_scaled(10))
+    b = MinHash(
+        0, 5, track_abundance=track_abundance, scaled=_get_max_hash_for_scaled(10)
+    )
 
     with pytest.raises(ValueError):
         a.similarity(b)
@@ -1159,10 +1210,12 @@ def test_mh_concat_diff_ksize(track_abundance):
 
 
 def test_mh_concat_diff_max_hash(track_abundance):
-    a = MinHash(0, 5, track_abundance=track_abundance,
-                scaled=_get_max_hash_for_scaled(5))
-    b = MinHash(0, 5, track_abundance=track_abundance,
-                scaled=_get_max_hash_for_scaled(10))
+    a = MinHash(
+        0, 5, track_abundance=track_abundance, scaled=_get_max_hash_for_scaled(5)
+    )
+    b = MinHash(
+        0, 5, track_abundance=track_abundance, scaled=_get_max_hash_for_scaled(10)
+    )
 
     with pytest.raises(ValueError):
         a += b
@@ -1178,7 +1231,7 @@ def test_mh_concat_diff_seed(track_abundance):
 
 def test_short_sequence(track_abundance):
     a = MinHash(20, 5, track_abundance=track_abundance)
-    a.add_sequence('GGGG')
+    a.add_sequence("GGGG")
     # adding a short sequence should fail silently
     assert len(a.hashes) == 0
 
@@ -1190,7 +1243,7 @@ def test_bytes_murmur():
     x = hash_murmur(b"ACG")
     assert x == 1731421407650554201
 
-    x = hash_murmur(u"ACG")
+    x = hash_murmur("ACG")
     assert x == 1731421407650554201
 
 
@@ -1214,11 +1267,11 @@ def test_murmur():
 def test_abundance_simple():
     a = MinHash(20, 5, is_protein=False, track_abundance=True)
 
-    a.add_sequence('AAAAA')
+    a.add_sequence("AAAAA")
     assert list(a.hashes) == [2110480117637990133]
     assert a.hashes == {2110480117637990133: 1}
 
-    a.add_sequence('AAAAA')
+    a.add_sequence("AAAAA")
     assert list(a.hashes) == [2110480117637990133]
     assert a.hashes == {2110480117637990133: 2}
 
@@ -1269,15 +1322,15 @@ def test_abundance_simple_2():
     a = MinHash(20, 5, is_protein=False, track_abundance=True)
     b = MinHash(20, 5, is_protein=False, track_abundance=True)
 
-    a.add_sequence('AAAAA')
+    a.add_sequence("AAAAA")
     assert list(a.hashes) == [2110480117637990133]
     assert a.hashes == {2110480117637990133: 1}
 
-    a.add_sequence('AAAAA')
+    a.add_sequence("AAAAA")
     assert list(a.hashes) == [2110480117637990133]
     assert a.hashes == {2110480117637990133: 2}
 
-    b.add_sequence('AAAAA')
+    b.add_sequence("AAAAA")
     assert a.count_common(b) == 1
 
 
@@ -1285,13 +1338,13 @@ def test_abundance_count_common():
     a = MinHash(20, 5, is_protein=False, track_abundance=True)
     b = MinHash(20, 5, is_protein=False, track_abundance=False)
 
-    a.add_sequence('AAAAA')
-    a.add_sequence('AAAAA')
+    a.add_sequence("AAAAA")
+    a.add_sequence("AAAAA")
     assert list(a.hashes) == [2110480117637990133]
     assert a.hashes == {2110480117637990133: 2}
 
-    b.add_sequence('AAAAA')
-    b.add_sequence('GGGGG')
+    b.add_sequence("AAAAA")
+    b.add_sequence("GGGGG")
     assert a.count_common(b) == 1
     assert a.count_common(b) == b.count_common(a)
 
@@ -1302,8 +1355,8 @@ def test_abundance_similarity():
     a = MinHash(20, 10, track_abundance=True)
     b = MinHash(20, 10, track_abundance=False)
 
-    a.add_sequence('TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA')
-    b.add_sequence('TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA')
+    a.add_sequence("TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA")
+    b.add_sequence("TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA")
 
     assert round(a.similarity(b), 3) == 1.0
     assert round(b.similarity(b), 3) == 1.0
@@ -1311,13 +1364,13 @@ def test_abundance_similarity():
     assert round(a.similarity(a), 3) == 1.0
 
     # add same sequence again
-    b.add_sequence('TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA')
+    b.add_sequence("TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA")
     assert round(a.similarity(b), 3) == 1.0
     assert round(b.similarity(b), 3) == 1.0
     assert round(b.similarity(a), 3) == 1.0
     assert round(a.similarity(a), 3) == 1.0
 
-    b.add_sequence('GATTGGTGCACACTTAACTGGGTGCCGCGCTGGTGCTGATCCATGAAGTT')
+    b.add_sequence("GATTGGTGCACACTTAACTGGGTGCCGCGCTGGTGCTGATCCATGAAGTT")
     x = a.similarity(b)
     assert x >= 0.3, x
 
@@ -1338,9 +1391,7 @@ def test_set_abundance():
 
 def test_set_abundance_2():
     datapath = utils.get_test_data("genome-s12.fa.gz.sig")
-    sig = sourmash.load_one_signature(datapath,
-                                      ksize=30,
-                                      select_moltype='dna')
+    sig = sourmash.load_one_signature(datapath, ksize=30, select_moltype="dna")
     new_mh = sig.minhash.copy_and_clear()
     mins = sig.minhash.hashes
     mins = {k: 1 for k in mins}
@@ -1377,7 +1428,7 @@ def test_set_abundance_clear_3():
 
     a.add_hash(10)
     assert a.hashes == {10: 1}
-    
+
     a.set_abundances({20: 1, 30: 4}, clear=False)
     assert a.hashes == {10: 1, 20: 1, 30: 4}
 
@@ -1387,32 +1438,34 @@ def test_set_abundance_clear_4():
     # the abundances together
     a = MinHash(20, 5, is_protein=False, track_abundance=True)
 
-    a.set_abundances({20: 2, 10: 1}, clear=False)   # should also sort the hashes
+    a.set_abundances({20: 2, 10: 1}, clear=False)  # should also sort the hashes
     assert a.hashes == {10: 1, 20: 2}
 
     a.set_abundances({20: 1, 10: 2}, clear=False)
     assert a.hashes == {10: 3, 20: 3}
 
+
 def test_clear_abundance_on_zero():
     mh = sourmash.minhash.MinHash(n=0, ksize=31, scaled=1, track_abundance=True)
-    mh.set_abundances({ 1: 5, 2: 3, 3 : 5 })
-    mh.set_abundances({ 1: 0 }, clear=False)
+    mh.set_abundances({1: 5, 2: 3, 3: 5})
+    mh.set_abundances({1: 0}, clear=False)
     assert 1 not in dict(mh.hashes)
     assert dict(mh.hashes)[2] == 3
     assert dict(mh.hashes)[3] == 5
     assert len(mh) == 2
 
     with pytest.raises(ValueError):
-        mh.set_abundances({ 2: -1 }) # Test on clear = True
+        mh.set_abundances({2: -1})  # Test on clear = True
 
     with pytest.raises(ValueError):
-        mh.set_abundances({ 2: -1 }, clear=False)    
-    
-    assert len(mh) == 2 # Assert that nothing was affected
+        mh.set_abundances({2: -1}, clear=False)
+
+    assert len(mh) == 2  # Assert that nothing was affected
+
 
 def test_reset_abundance_initialized():
     a = MinHash(1, 4, track_abundance=True)
-    a.add_sequence('ATGC')
+    a.add_sequence("ATGC")
 
     # If we had a minhash with abundances and drop it, this shouldn't fail.
     # Convert from Abundance to Regular MinHash
@@ -1423,12 +1476,14 @@ def test_reset_abundance_initialized():
 
 def test_set_abundance_initialized():
     a = MinHash(1, 4, track_abundance=False)
-    a.add_sequence('ATGC')
+    a.add_sequence("ATGC")
 
     with pytest.raises(RuntimeError) as e:
         a.track_abundance = True
 
-    assert "Can only set track_abundance=True if the MinHash is empty" in e.value.args[0]
+    assert (
+        "Can only set track_abundance=True if the MinHash is empty" in e.value.args[0]
+    )
 
 
 def test_set_abundance_num():
@@ -1459,8 +1514,9 @@ def test_mh_copy_and_clear(track_abundance):
 
 def test_mh_copy_and_clear_with_max_hash(track_abundance):
     # test basic creation of new, empty MinHash w/max_hash param set
-    a = MinHash(0, 10, track_abundance=track_abundance,
-                scaled=_get_scaled_for_max_hash(20))
+    a = MinHash(
+        0, 10, track_abundance=track_abundance, scaled=_get_scaled_for_max_hash(20)
+    )
     for i in range(0, 40, 2):
         a.add_hash(i)
 
@@ -1484,8 +1540,13 @@ def test_scaled_property(track_abundance):
 
 def test_pickle_protein(track_abundance):
     # check that protein/etc ksize is handled properly during serialization.
-    a = MinHash(0, 10, track_abundance=track_abundance, is_protein=True,
-                scaled=_get_scaled_for_max_hash(20))
+    a = MinHash(
+        0,
+        10,
+        track_abundance=track_abundance,
+        is_protein=True,
+        scaled=_get_scaled_for_max_hash(20),
+    )
     for i in range(0, 40, 2):
         a.add_hash(i)
 
@@ -1505,8 +1566,13 @@ def test_pickle_protein(track_abundance):
 
 def test_pickle_dayhoff(track_abundance):
     # check that dayhoff ksize is handled properly during serialization.
-    a = MinHash(0, 10, track_abundance=track_abundance, dayhoff=True,
-                scaled=_get_scaled_for_max_hash(20))
+    a = MinHash(
+        0,
+        10,
+        track_abundance=track_abundance,
+        dayhoff=True,
+        scaled=_get_scaled_for_max_hash(20),
+    )
     for i in range(0, 40, 2):
         a.add_hash(i)
 
@@ -1526,8 +1592,13 @@ def test_pickle_dayhoff(track_abundance):
 
 def test_pickle_hp(track_abundance):
     # check that hp ksize is handled properly during serialization.
-    a = MinHash(0, 10, track_abundance=track_abundance, hp=True,
-                scaled=_get_scaled_for_max_hash(20))
+    a = MinHash(
+        0,
+        10,
+        track_abundance=track_abundance,
+        hp=True,
+        scaled=_get_scaled_for_max_hash(20),
+    )
     for i in range(0, 40, 2):
         a.add_hash(i)
 
@@ -1546,8 +1617,9 @@ def test_pickle_hp(track_abundance):
 
 
 def test_pickle_max_hash(track_abundance):
-    a = MinHash(0, 10, track_abundance=track_abundance,
-                scaled=_get_scaled_for_max_hash(20))
+    a = MinHash(
+        0, 10, track_abundance=track_abundance, scaled=_get_scaled_for_max_hash(20)
+    )
     for i in range(0, 40, 2):
         a.add_hash(i)
 
@@ -1650,8 +1722,9 @@ def test_minhash_abund_merge_flat_2():
 def test_distance_matrix(track_abundance):
     import numpy
 
-    siglist = [next(signature.load_signatures(utils.get_test_data(f)))
-               for f in utils.SIG_FILES]
+    siglist = [
+        next(signature.load_signatures(utils.get_test_data(f))) for f in utils.SIG_FILES
+    ]
 
     D1 = numpy.zeros([len(siglist), len(siglist)])
     D2 = numpy.zeros([len(siglist), len(siglist)])
@@ -1690,14 +1763,15 @@ def test_remove_many(track_abundance):
     assert len(a) == 33
     assert all(c % 6 != 0 for c in a.hashes)
 
+
 def test_remove_minhash(track_abundance):
     original_mh = MinHash(0, 10, track_abundance=track_abundance, scaled=scaled5000)
     added_mh = MinHash(0, 10, track_abundance=track_abundance, scaled=scaled5000)
     tested_mh = MinHash(0, 10, track_abundance=track_abundance, scaled=scaled5000)
 
     original_mh.add_many(list(range(101)))
-    added_mh.add_many(list(range(101,201))) # contains original in it
-    tested_mh.add_many(list(range(201))) # original + added
+    added_mh.add_many(list(range(101, 201)))  # contains original in it
+    tested_mh.add_many(list(range(201)))  # original + added
 
     # Now we should expect tested_minhash == original_minhash
     # Note we are passing a MinHash object instead of an iterable object
@@ -1718,7 +1792,7 @@ def test_add_many(track_abundance):
     b = MinHash(0, 10, track_abundance=track_abundance, scaled=scaled5000)
 
     a.add_many(list(range(0, 100, 2)))
-    a.add_many(list(range(0, 100, 2)))    # => abundance = 2
+    a.add_many(list(range(0, 100, 2)))  # => abundance = 2
 
     assert len(a) == 50
     assert all(c % 2 == 0 for c in a.hashes)
@@ -1733,8 +1807,7 @@ def test_add_many(track_abundance):
 
 def test_set_abundances_huge():
     max_hash = 4000000
-    a = MinHash(0, 10, track_abundance=True,
-                scaled=_get_scaled_for_max_hash(max_hash))
+    a = MinHash(0, 10, track_abundance=True, scaled=_get_scaled_for_max_hash(max_hash))
 
     hashes = list(range(max_hash))
     abundances = itertools.repeat(2)
@@ -1744,7 +1817,7 @@ def test_set_abundances_huge():
 
 def test_try_change_hashes(track_abundance):
     a = MinHash(0, 10, track_abundance=track_abundance, scaled=scaled5000)
-    b = MinHash(0, 10, track_abundance=track_abundance, scaled=scaled5000)
+    MinHash(0, 10, track_abundance=track_abundance, scaled=scaled5000)
 
     a.add_many(list(range(0, 100, 2)))
 
@@ -1846,7 +1919,10 @@ def test_inflate_error():
     with pytest.raises(ValueError) as exc:
         mh = mh.inflate(mh2)
 
-    assert "inflate operates on a flat MinHash and takes a MinHash object with track_abundance=True" in str(exc.value)
+    assert (
+        "inflate operates on a flat MinHash and takes a MinHash object with track_abundance=True"
+        in str(exc.value)
+    )
 
 
 def test_inflate_not_a_subset():
@@ -1878,7 +1954,7 @@ def test_inflate_not_a_subset():
     mh3 = mh.inflate(mh2)
 
     assert mh3.hashes[10] == 3
-    assert 20 not in mh3.hashes # should intersect, in this case.
+    assert 20 not in mh3.hashes  # should intersect, in this case.
     assert mh3.hashes[30] == 3
 
 
@@ -1887,14 +1963,14 @@ def test_add_kmer(track_abundance):
     mh1 = MinHash(0, 4, scaled=1, track_abundance=track_abundance)
     mh2 = MinHash(0, 4, scaled=1, track_abundance=track_abundance)
 
-    mh1.add_sequence('ATGCGTGC')
+    mh1.add_sequence("ATGCGTGC")
     a = mh1.hashes
 
-    mh2.add_kmer('ATGC')
-    mh2.add_kmer('TGCG')
-    mh2.add_kmer('GCGT')
-    mh2.add_kmer('CGTG')
-    mh2.add_kmer('GTGC')
+    mh2.add_kmer("ATGC")
+    mh2.add_kmer("TGCG")
+    mh2.add_kmer("GCGT")
+    mh2.add_kmer("CGTG")
+    mh2.add_kmer("GTGC")
     b = mh2.hashes
 
     assert set(a.items()) == set(b.items())
@@ -1905,7 +1981,7 @@ def test_add_kmer_too_long(track_abundance):
     mh1 = MinHash(0, 4, scaled=1, track_abundance=track_abundance)
 
     with pytest.raises(ValueError):
-        mh1.add_kmer('ATGCGTGC')
+        mh1.add_kmer("ATGCGTGC")
 
 
 def test_get_mins_deprecated(track_abundance):
@@ -1961,9 +2037,14 @@ def test_downsample_scaled(track_abundance):
     # test downsample(scaled...) method
     mh = MinHash(0, 21, scaled=1, track_abundance=track_abundance)
 
-    mins = (1, 2, 3,
-            9223372036854775808 + 1, 9223372036854775808 + 2,
-            9223372036854775808 + 3)
+    mins = (
+        1,
+        2,
+        3,
+        9223372036854775808 + 1,
+        9223372036854775808 + 2,
+        9223372036854775808 + 3,
+    )
     mh.add_many(mins)
 
     assert len(mh) == 6
@@ -1978,7 +2059,7 @@ def test_downsample_scaled(track_abundance):
 
 def test_is_molecule_type_1(track_abundance):
     mh = MinHash(1, 21, track_abundance=track_abundance)
-    assert mh.moltype == 'DNA'
+    assert mh.moltype == "DNA"
     assert mh.is_dna
     assert not mh.is_protein
     assert not mh.hp
@@ -1987,7 +2068,7 @@ def test_is_molecule_type_1(track_abundance):
 
 def test_is_molecule_type_2(track_abundance):
     mh = MinHash(1, 21, track_abundance=track_abundance, is_protein=True)
-    assert mh.moltype == 'protein'
+    assert mh.moltype == "protein"
     assert not mh.is_dna
     assert mh.is_protein
     assert not mh.hp
@@ -1996,17 +2077,16 @@ def test_is_molecule_type_2(track_abundance):
 
 def test_is_molecule_type_3(track_abundance):
     mh = MinHash(1, 21, track_abundance=track_abundance, hp=True)
-    assert mh.moltype == 'hp'
+    assert mh.moltype == "hp"
     assert not mh.is_dna
     assert not mh.is_protein
     assert mh.hp
     assert not mh.dayhoff
 
 
-
 def test_is_molecule_type_4(track_abundance):
     mh = MinHash(1, 21, track_abundance=track_abundance, dayhoff=True)
-    assert mh.moltype == 'dayhoff'
+    assert mh.moltype == "dayhoff"
     assert not mh.is_dna
     assert not mh.is_protein
     assert not mh.hp
@@ -2021,7 +2101,7 @@ def test_addition_num_incompatible():
     mh2.add_hash(1)
 
     with pytest.raises(TypeError) as exc:
-        mh3 = mh1 + mh2
+        mh1 + mh2
 
     assert "incompatible num values: self=10 other=20" in str(exc.value)
 
@@ -2030,8 +2110,8 @@ def test_addition_abund():
     mh1 = MinHash(10, 21, track_abundance=True)
     mh2 = MinHash(10, 21, track_abundance=True)
 
-    mh1.set_abundances({ 0: 1 })
-    mh2.set_abundances({ 0: 3 })
+    mh1.set_abundances({0: 1})
+    mh2.set_abundances({0: 3})
 
     mh3 = mh1 + mh2
     hashcounts = mh3.hashes
@@ -2057,8 +2137,8 @@ def test_iaddition_abund():
     mh1 = MinHash(10, 21, track_abundance=True)
     mh2 = MinHash(10, 21, track_abundance=True)
 
-    mh1.set_abundances({ 0: 1 })
-    mh2.set_abundances({ 0: 3 })
+    mh1.set_abundances({0: 1})
+    mh2.set_abundances({0: 3})
 
     mh1 += mh2
     hashcounts = mh1.hashes
@@ -2093,10 +2173,11 @@ def test_intersection_1_num():
     mh2.add_hash(2)
 
     mh3 = mh1.intersection(mh2)
-    print("mh.intersection INTERSECTION HASHES:",set(mh3.hashes))
+    print("mh.intersection INTERSECTION HASHES:", set(mh3.hashes))
     assert len(mh3) == 1
     assert 0 in mh3.hashes
 
+
 def test_and_operator():
     mh1 = MinHash(20, 21)
     mh1.add_hash(5)
@@ -2110,11 +2191,14 @@ def test_and_operator():
     mh3 = mh1.intersection(mh2)
     mh4 = mh1 & mh2
 
-    print("\n Intersection hashes (mh3): ", mh3.hashes, "\n '&' hashes: (mh4)", mh4.hashes)
+    print(
+        "\n Intersection hashes (mh3): ", mh3.hashes, "\n '&' hashes: (mh4)", mh4.hashes
+    )
 
     assert mh3
     assert mh3 == mh4
 
+
 def test_intersection_2_scaled():
     mh1 = MinHash(0, 21, scaled=1)
     mh2 = MinHash(0, 21, scaled=1)
@@ -2136,7 +2220,7 @@ def test_intersection_3_abundance_error():
     mh2 = MinHash(0, 21, scaled=1, track_abundance=True)
 
     with pytest.raises(TypeError) as exc:
-        mh3 = mh1.intersection(mh2)
+        mh1.intersection(mh2)
 
     assert str(exc.value) == "can only intersect flat MinHash objects"
 
@@ -2147,7 +2231,7 @@ def test_intersection_4_incompatible_ksize():
     mh2 = MinHash(500, 31)
 
     with pytest.raises(ValueError) as exc:
-        mh3 = mh1.intersection(mh2)
+        mh1.intersection(mh2)
 
     assert str(exc.value) == "different ksizes cannot be compared"
 
@@ -2157,7 +2241,7 @@ def test_intersection_5_incompatible():
     mh1 = MinHash(0, 21, scaled=1)
 
     with pytest.raises(TypeError) as exc:
-        mh3 = mh1.intersection(set())
+        mh1.intersection(set())
 
     assert str(exc.value) == "can only intersect MinHash objects"
 
@@ -2189,6 +2273,7 @@ def test_intersection_6_full_num():
 
     assert mh1.intersection_and_union_size(mh2) == (10, 20)
 
+
 def test_intersection_7_full_scaled():
     # intersection of two scaled objects is correct
     mh1 = MinHash(0, 21, scaled=100)
@@ -2231,8 +2316,8 @@ def test_merge_abund():
     mh1 = MinHash(10, 21, track_abundance=True)
     mh2 = MinHash(10, 21, track_abundance=True)
 
-    mh1.set_abundances({ 0: 1 })
-    mh2.set_abundances({ 0: 3 })
+    mh1.set_abundances({0: 1})
+    mh2.set_abundances({0: 3})
 
     ret = mh1.merge(mh2)
     assert ret is None
@@ -2315,6 +2400,7 @@ def test_merge_scaled():
     for k in mh2.hashes:
         assert k in mh3.hashes
 
+
 def test_add_is_symmetric():
     mh1 = MinHash(20, 21)
     mh1.add_hash(5)
@@ -2324,10 +2410,11 @@ def test_add_is_symmetric():
     mh3 = mh1 + mh2
     mh4 = mh2 + mh1
     print("\n mh3 EQUALS ", mh3.hashes, "\n mh4 EQUALS", mh4.hashes)
-    #if mh3 != 0, then it is "true", so it passes
+    # if mh3 != 0, then it is "true", so it passes
     assert mh3
     assert mh3 == mh4
 
+
 def test_or_equals_add():
     mh1 = MinHash(20, 21)
     mh1.add_hash(5)
@@ -2340,6 +2427,7 @@ def test_or_equals_add():
     assert mh3
     assert mh3 == mh4
 
+
 def test_max_containment():
     mh1 = MinHash(0, 21, scaled=1, track_abundance=False)
     mh2 = MinHash(0, 21, scaled=1, track_abundance=False)
@@ -2347,10 +2435,10 @@ def test_max_containment():
     mh1.add_many((1, 2, 3, 4))
     mh2.add_many((1, 5))
 
-    assert mh1.contained_by(mh2) == 1/4
-    assert mh2.contained_by(mh1) == 1/2
-    assert mh1.max_containment(mh2) == 1/2
-    assert mh2.max_containment(mh1) == 1/2
+    assert mh1.contained_by(mh2) == 1 / 4
+    assert mh2.contained_by(mh1) == 1 / 2
+    assert mh1.max_containment(mh2) == 1 / 2
+    assert mh2.max_containment(mh1) == 1 / 2
 
 
 def test_max_containment_empty():
@@ -2385,8 +2473,8 @@ def test_avg_containment():
     mh1.add_many((1, 2, 3, 4))
     mh2.add_many((1, 5))
 
-    assert mh1.contained_by(mh2) == 1/4
-    assert mh2.contained_by(mh1) == 1/2
+    assert mh1.contained_by(mh2) == 1 / 4
+    assert mh2.contained_by(mh1) == 1 / 2
     assert mh1.avg_containment(mh2) == 0.375
     assert mh2.avg_containment(mh1) == 0.375
 
@@ -2454,7 +2542,7 @@ def test_frozen_and_mutable_3(track_abundance):
 
 def test_dna_kmers():
     # test seq_to_hashes for dna -> dna
-    mh = MinHash(0, ksize=31, scaled=1) # DNA
+    mh = MinHash(0, ksize=31, scaled=1)  # DNA
     seq = "ATGCGAGTGTTGAAGTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCCAGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTGGCGATGATTGAAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAACGTATTTTTGCCGAACTTTTGACGGGACTCGCCGCCGCCCAGCCGGGGTTCCCGCTGGCGCAATTGAAAACTTTCGTCGATCAGGAATTTGCCCAAATAAAACATGTCCTGCATGGCATTAGTTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGCTGATTTGCCGTGGCGAGAAAATGTCGATCGCCATTATGGCCGGCGTATTAGAAGCGCGCGGTCACAACGTTACTGTTATCGATCCGGTCGAAAAACTGCTGGCAGTGGGGCATTACCTCGAATCTACCGTCGATATTGCTGAGTCCACCCGCCGTATTGCGGCAAGCCGCATTCCGGCTGATCACATGGTGCTGAT"
 
     # first calculate seq to hashes
@@ -2469,7 +2557,7 @@ def test_dna_kmers():
     # k-mer by k-mer?
     for i in range(0, len(seq) - 31 + 1):
         # calculate each k-mer
-        kmer = seq[i:i+31]
+        kmer = seq[i : i + 31]
 
         # add to minhash obj
         single_mh = mh.copy_and_clear()
@@ -2488,7 +2576,7 @@ def test_dna_kmers():
 
 def test_dna_kmers_2():
     # test kmers_and_hashes for dna -> dna
-    mh = MinHash(0, ksize=31, scaled=1) # DNA
+    mh = MinHash(0, ksize=31, scaled=1)  # DNA
     seq = "ATGCGAGTGTTGAAGTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCCAGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTGGCGATGATTGAAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAACGTATTTTTGCCGAACTTTTGACGGGACTCGCCGCCGCCCAGCCGGGGTTCCCGCTGGCGCAATTGAAAACTTTCGTCGATCAGGAATTTGCCCAAATAAAACATGTCCTGCATGGCATTAGTTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGCTGATTTGCCGTGGCGAGAAAATGTCGATCGCCATTATGGCCGGCGTATTAGAAGCGCGCGGTCACAACGTTACTGTTATCGATCCGGTCGAAAAACTGCTGGCAGTGGGGCATTACCTCGAATCTACCGTCGATATTGCTGAGTCCACCCGCCGTATTGCGGCAAGCCGCATTCCGGCTGATCACATGGTGCTGAT"
 
     # k-mer by k-mer?
@@ -2504,7 +2592,7 @@ def test_dna_kmers_2():
 
 def test_dna_kmers_3_bad_dna():
     # test kmers_and_hashes for dna -> dna, with some bad k-mers in there
-    mh = MinHash(0, ksize=31, scaled=1) # DNA
+    mh = MinHash(0, ksize=31, scaled=1)  # DNA
     seq = "NTGCGAGTGTTGAAGTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCCAGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTGGCGATGATTGAAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAACGTATTTTTGCCGAACTTTTGACGGGACTCGCCGCCGCCCAGCCGGGGTTCCCGCTGGCGCAATTGAAAACTTTCGTCGATCAGGAATTTGCCCAAATAAAACATGTCCTGCATGGCATTAGTTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGCTGATTTGCCGTGGCGAGAAAATGTCGATCGCCATTATGGCCGGCGTATTAGAAGCGCGCGGTCACAACGTTACTGTTATCGATCCGGTCGAAAAACTGCTGGCAGTGGGGCATTACCTCGAATCTACCGTCGATATTGCTGAGTCCACCCGCCGTATTGCGGCAAGCCGCATTCCGGCTGATCACATGGTGCTGAT"
 
     with pytest.raises(ValueError) as exc:
@@ -2515,7 +2603,7 @@ def test_dna_kmers_3_bad_dna():
 
 def test_dna_kmers_4_bad_dna():
     # test kmers_and_hashes for bad dna -> dna, using force
-    mh = MinHash(0, ksize=31, scaled=1) # DNA
+    mh = MinHash(0, ksize=31, scaled=1)  # DNA
     seq = "NTGCGAGTGTTGAAGTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCCAGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTGGCGATGATTGAAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAACGTATTTTTGCCGAACTTTTGACGGGACTCGCCGCCGCCCAGCCGGGGTTCCCGCTGGCGCAATTGAAAACTTTCGTCGATCAGGAATTTGCCCAAATAAAACATGTCCTGCATGGCATTAGTTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGCTGATTTGCCGTGGCGAGAAAATGTCGATCGCCATTATGGCCGGCGTATTAGAAGCGCGCGGTCACAACGTTACTGTTATCGATCCGGTCGAAAAACTGCTGGCAGTGGGGCATTACCTCGAATCTACCGTCGATATTGCTGAGTCCACCCGCCGTATTGCGGCAAGCCGCATTCCGGCTGATCACATGGTGCTGAT"
 
     # k-mer by k-mer?
@@ -2524,8 +2612,8 @@ def test_dna_kmers_4_bad_dna():
         # add to minhash obj
         single_mh = mh.copy_and_clear()
 
-        if hashval == None:
-            assert kmer == seq[:31] # first k-mer is baaaaad.
+        if hashval is None:
+            assert kmer == seq[:31]  # first k-mer is baaaaad.
             found_bad_kmer = True
             continue
 
@@ -2555,7 +2643,7 @@ def test_protein_kmers():
     # k-mer by k-mer?
     for i in range(0, len(seq) - 7 + 1):
         # calculate each k-mer
-        kmer = seq[i:i+7]
+        kmer = seq[i : i + 7]
 
         # add to minhash obj
         single_mh = mh.copy_and_clear()
@@ -2605,7 +2693,7 @@ def test_dayhoff_kmers():
     # k-mer by k-mer?
     for i in range(0, len(seq) - 7 + 1):
         # calculate each k-mer
-        kmer = seq[i:i+7]
+        kmer = seq[i : i + 7]
 
         # add to minhash obj
         single_mh = mh.copy_and_clear()
@@ -2655,7 +2743,7 @@ def test_hp_kmers():
     # k-mer by k-mer?
     for i in range(0, len(seq) - 7 + 1):
         # calculate each k-mer
-        kmer = seq[i:i+7]
+        kmer = seq[i : i + 7]
 
         # add to minhash obj
         single_mh = mh.copy_and_clear()
@@ -2789,8 +2877,8 @@ def test_containment(track_abundance):
     mh2.add_many((1, 5))
     mh2.add_many((1, 5))
 
-    assert mh1.contained_by(mh2) == 1/4
-    assert mh2.contained_by(mh1) == 1/2
+    assert mh1.contained_by(mh2) == 1 / 4
+    assert mh2.contained_by(mh1) == 1 / 2
 
 
 def test_sum_abundances(track_abundance):
@@ -2808,8 +2896,8 @@ def test_sum_abundances(track_abundance):
         assert mh1.sum_abundances == 6
         assert mh2.sum_abundances == 6
     else:
-        assert mh1.sum_abundances == None
-        assert mh2.sum_abundances == None
+        assert mh1.sum_abundances is None
+        assert mh2.sum_abundances is None
 
 
 def test_mean_abundance(track_abundance):
@@ -2885,32 +2973,44 @@ def test_unique_dataset_hashes(track_abundance):
 
 
 def test_containment_ANI():
-    f1 = utils.get_test_data('2.fa.sig')
-    f2 = utils.get_test_data('2+63.fa.sig')
+    f1 = utils.get_test_data("2.fa.sig")
+    f2 = utils.get_test_data("2+63.fa.sig")
     mh1 = sourmash.load_one_signature(f1, ksize=31).minhash
     mh2 = sourmash.load_one_signature(f2, ksize=31).minhash
 
-    m1_cont_m2 = mh1.containment_ani(mh2, estimate_ci =True)
-    m2_cont_m1 = mh2.containment_ani(mh1, estimate_ci =True)
+    m1_cont_m2 = mh1.containment_ani(mh2, estimate_ci=True)
+    m2_cont_m1 = mh2.containment_ani(mh1, estimate_ci=True)
     print("\nmh1 contained by mh2", m1_cont_m2)
     print("mh2 contained by mh1", m2_cont_m1)
 
-    assert (round(m1_cont_m2.ani,3), m1_cont_m2.ani_low, m1_cont_m2.ani_high) == (1.0, 1.0, 1.0)
-    assert (round(m2_cont_m1.ani,3), round(m2_cont_m1.ani_low,3), round(m2_cont_m1.ani_high,3)) == (0.966, 0.965, 0.967)
-
-    m1_mc_m2 = mh1.max_containment_ani(mh2, estimate_ci =True)
-    m2_mc_m1 = mh2.max_containment_ani(mh1, estimate_ci =True)
+    assert (round(m1_cont_m2.ani, 3), m1_cont_m2.ani_low, m1_cont_m2.ani_high) == (
+        1.0,
+        1.0,
+        1.0,
+    )
+    assert (
+        round(m2_cont_m1.ani, 3),
+        round(m2_cont_m1.ani_low, 3),
+        round(m2_cont_m1.ani_high, 3),
+    ) == (0.966, 0.965, 0.967)
+
+    m1_mc_m2 = mh1.max_containment_ani(mh2, estimate_ci=True)
+    m2_mc_m1 = mh2.max_containment_ani(mh1, estimate_ci=True)
     print("mh1 max containment", m1_mc_m2)
     print("mh2 max containment", m2_mc_m1)
     m1_mc_m2.size_is_inaccurate = False
     m2_mc_m1.size_is_inaccurate = False
     assert m1_mc_m2 == m2_mc_m1
-    assert (round(m1_mc_m2.ani, 3), round(m1_mc_m2.ani_low, 3), round(m1_mc_m2.ani_high, 3)) == (1.0,1.0,1.0)
+    assert (
+        round(m1_mc_m2.ani, 3),
+        round(m1_mc_m2.ani_low, 3),
+        round(m1_mc_m2.ani_high, 3),
+    ) == (1.0, 1.0, 1.0)
 
 
 def test_containment_ANI_precalc_containment():
-    f1 = utils.get_test_data('47+63.fa.sig')
-    f2 = utils.get_test_data('2+63.fa.sig')
+    f1 = utils.get_test_data("47+63.fa.sig")
+    f2 = utils.get_test_data("2+63.fa.sig")
     mh1 = sourmash.load_one_signature(f1, ksize=31).minhash
     mh2 = sourmash.load_one_signature(f2, ksize=31).minhash
     # precalc containments and assert same results
@@ -2918,27 +3018,37 @@ def test_containment_ANI_precalc_containment():
     s2c = mh2.contained_by(mh1)
     mc = max(s1c, s2c)
 
-    assert mh1.containment_ani(mh2, estimate_ci=True) ==  mh1.containment_ani(mh2, containment=s1c, estimate_ci=True)
-    assert mh2.containment_ani(mh1) ==  mh2.containment_ani(mh1, containment=s2c)
-    assert mh1.max_containment_ani(mh2) ==  mh2.max_containment_ani(mh1)
-    assert mh1.max_containment_ani(mh2) ==  mh1.max_containment_ani(mh2, max_containment=mc)
-    assert mh1.max_containment_ani(mh2) ==  mh2.max_containment_ani(mh1, max_containment=mc)
+    assert mh1.containment_ani(mh2, estimate_ci=True) == mh1.containment_ani(
+        mh2, containment=s1c, estimate_ci=True
+    )
+    assert mh2.containment_ani(mh1) == mh2.containment_ani(mh1, containment=s2c)
+    assert mh1.max_containment_ani(mh2) == mh2.max_containment_ani(mh1)
+    assert mh1.max_containment_ani(mh2) == mh1.max_containment_ani(
+        mh2, max_containment=mc
+    )
+    assert mh1.max_containment_ani(mh2) == mh2.max_containment_ani(
+        mh1, max_containment=mc
+    )
 
 
 def test_avg_containment_ani():
-    f1 = utils.get_test_data('47+63.fa.sig')
-    f2 = utils.get_test_data('2+63.fa.sig')
+    f1 = utils.get_test_data("47+63.fa.sig")
+    f2 = utils.get_test_data("2+63.fa.sig")
     mh1 = sourmash.load_one_signature(f1, ksize=31).minhash
     mh2 = sourmash.load_one_signature(f2, ksize=31).minhash
     # check average_containment_ani
     ac_m1 = mh1.avg_containment_ani(mh2)
     ac_m2 = mh2.avg_containment_ani(mh1)
-    assert ac_m1 == ac_m2 == (mh1.containment_ani(mh2).ani + mh2.containment_ani(mh1).ani)/2 
+    assert (
+        ac_m1
+        == ac_m2
+        == (mh1.containment_ani(mh2).ani + mh2.containment_ani(mh1).ani) / 2
+    )
 
 
 def test_containment_ANI_downsample():
-    f2 = utils.get_test_data('2+63.fa.sig')
-    f3 = utils.get_test_data('47+63.fa.sig')
+    f2 = utils.get_test_data("2+63.fa.sig")
+    f3 = utils.get_test_data("47+63.fa.sig")
     mh2 = sourmash.load_one_signature(f2, ksize=31).minhash
     mh3 = sourmash.load_one_signature(f3, ksize=31).minhash
     # check that downsampling works properly
@@ -2947,8 +3057,8 @@ def test_containment_ANI_downsample():
     assert mh2.scaled != mh3.scaled
     ds_s3c = mh2.containment_ani(mh3, downsample=True)
     ds_s4c = mh3.containment_ani(mh2, downsample=True)
-    mc_w_ds_1 =  mh2.max_containment_ani(mh3, downsample=True)
-    mc_w_ds_2 =  mh3.max_containment_ani(mh2, downsample=True)
+    mc_w_ds_1 = mh2.max_containment_ani(mh3, downsample=True)
+    mc_w_ds_2 = mh3.max_containment_ani(mh2, downsample=True)
     print(ds_s3c)
     with pytest.raises(ValueError) as e:
         mh2.containment_ani(mh3)
@@ -2962,19 +3072,19 @@ def test_containment_ANI_downsample():
     assert mh2.scaled == mh3.scaled
     ds_s3c_manual = mh2.containment_ani(mh3)
     ds_s4c_manual = mh3.containment_ani(mh2)
-    ds_mc_manual =  mh2.max_containment_ani(mh3)
+    ds_mc_manual = mh2.max_containment_ani(mh3)
     assert ds_s3c == ds_s3c_manual
     assert ds_s4c == ds_s4c_manual
     assert mc_w_ds_1 == mc_w_ds_2 == ds_mc_manual
 
     ac_m2 = mh2.avg_containment_ani(mh3)
     ac_m3 = mh3.avg_containment_ani(mh2)
-    assert ac_m2 == ac_m3 == (ds_s3c.ani + ds_s4c.ani)/2
+    assert ac_m2 == ac_m3 == (ds_s3c.ani + ds_s4c.ani) / 2
 
 
 def test_jaccard_ANI():
-    f1 = utils.get_test_data('2.fa.sig')
-    f2 = utils.get_test_data('2+63.fa.sig')
+    f1 = utils.get_test_data("2.fa.sig")
+    f2 = utils.get_test_data("2+63.fa.sig")
     mh1 = sourmash.load_one_signature(f1, ksize=31).minhash
     mh2 = sourmash.load_one_signature(f2).minhash
 
@@ -2984,12 +3094,16 @@ def test_jaccard_ANI():
     m2_jani_m1 = mh2.jaccard_ani(mh1)
 
     assert m1_jani_m2 == m2_jani_m1
-    assert (m1_jani_m2.ani, m1_jani_m2.p_nothing_in_common, m1_jani_m2.jaccard_error) == (0.9783711630110239, 0.0, 3.891666770716877e-07)
+    assert (
+        m1_jani_m2.ani,
+        m1_jani_m2.p_nothing_in_common,
+        m1_jani_m2.jaccard_error,
+    ) == (0.9783711630110239, 0.0, 3.891666770716877e-07)
 
 
 def test_jaccard_ANI_untrustworthy():
-    f1 = utils.get_test_data('2.fa.sig')
-    f2 = utils.get_test_data('2+63.fa.sig')
+    f1 = utils.get_test_data("2.fa.sig")
+    f2 = utils.get_test_data("2+63.fa.sig")
     mh1 = sourmash.load_one_signature(f1, ksize=31).minhash
     mh2 = sourmash.load_one_signature(f2).minhash
 
@@ -3000,28 +3114,32 @@ def test_jaccard_ANI_untrustworthy():
     # since size is inaccurate on 2.fa.sig, need to override to be able to get ani
     m1_jani_m2.size_is_inaccurate = False
 
-    assert m1_jani_m2.ani == None
-    assert m1_jani_m2.je_exceeds_threshold==True
+    assert m1_jani_m2.ani is None
+    assert m1_jani_m2.je_exceeds_threshold == True
     assert m1_jani_m2.je_threshold == 1e-7
 
 
 def test_jaccard_ANI_precalc_jaccard():
-    f1 = utils.get_test_data('2.fa.sig')
-    f2 = utils.get_test_data('2+63.fa.sig')
+    f1 = utils.get_test_data("2.fa.sig")
+    f2 = utils.get_test_data("2+63.fa.sig")
     mh1 = sourmash.load_one_signature(f1, ksize=31).minhash
     mh2 = sourmash.load_one_signature(f2).minhash
     # precalc jaccard and assert same result
     jaccard = mh1.jaccard(mh2)
-    print("\nJACCARD_ANI", mh1.jaccard_ani(mh2,jaccard=jaccard))
+    print("\nJACCARD_ANI", mh1.jaccard_ani(mh2, jaccard=jaccard))
 
-    assert mh1.jaccard_ani(mh2) == mh1.jaccard_ani(mh2, jaccard=jaccard) == mh2.jaccard_ani(mh1, jaccard=jaccard)
+    assert (
+        mh1.jaccard_ani(mh2)
+        == mh1.jaccard_ani(mh2, jaccard=jaccard)
+        == mh2.jaccard_ani(mh1, jaccard=jaccard)
+    )
     wrong_jaccard = jaccard - 0.1
     assert mh1.jaccard_ani(mh2) != mh1.jaccard_ani(mh2, jaccard=wrong_jaccard)
 
 
 def test_jaccard_ANI_downsample():
-    f1 = utils.get_test_data('2.fa.sig')
-    f2 = utils.get_test_data('2+63.fa.sig')
+    f1 = utils.get_test_data("2.fa.sig")
+    f2 = utils.get_test_data("2+63.fa.sig")
     mh1 = sourmash.load_one_signature(f1, ksize=31).minhash
     mh2 = sourmash.load_one_signature(f2).minhash
 
@@ -3058,13 +3176,13 @@ def test_containment_ani_ci_tiny_testdata():
     # from the formula ANI = c^(1/k) for c=3/4 and k=21
     np.testing.assert_almost_equal(m2_cani_m1.ani, 0.986394259982259, decimal=3)
     m2_cani_m1.size_is_inaccurate = False
-    assert m2_cani_m1.ani_low == None
-    assert m2_cani_m1.ani_high == None
+    assert m2_cani_m1.ani_low is None
+    assert m2_cani_m1.ani_high is None
 
 
 def test_containment_num_fail():
-    f1 = utils.get_test_data('num/47.fa.sig')
-    f2 = utils.get_test_data('num/63.fa.sig')
+    f1 = utils.get_test_data("num/47.fa.sig")
+    f2 = utils.get_test_data("num/63.fa.sig")
     mh1 = sourmash.load_one_signature(f1, ksize=31).minhash
     mh2 = sourmash.load_one_signature(f2, ksize=31).minhash
 
@@ -3081,8 +3199,8 @@ def test_containment_num_fail():
 
 
 def test_ANI_num_fail():
-    f1 = utils.get_test_data('num/47.fa.sig')
-    f2 = utils.get_test_data('num/63.fa.sig')
+    f1 = utils.get_test_data("num/47.fa.sig")
+    f2 = utils.get_test_data("num/63.fa.sig")
     mh1 = sourmash.load_one_signature(f1, ksize=31).minhash
     mh2 = sourmash.load_one_signature(f2, ksize=31).minhash
 
@@ -3091,7 +3209,7 @@ def test_ANI_num_fail():
     print(str(exc))
     assert "Error: can only calculate ANI for scaled MinHashes" in str(exc)
     with pytest.raises(TypeError) as exc:
-        mh2.containment_ani(mh1, estimate_ci =True)
+        mh2.containment_ani(mh1, estimate_ci=True)
     assert "Error: can only calculate ANI for scaled MinHashes" in str(exc)
     with pytest.raises(TypeError) as exc:
         mh1.max_containment_ani(mh2)
@@ -3105,8 +3223,8 @@ def test_ANI_num_fail():
 
 
 def test_minhash_set_size_estimate_is_accurate():
-    f1 = utils.get_test_data('2.fa.sig')
-    f2 = utils.get_test_data('2+63.fa.sig')
+    f1 = utils.get_test_data("2.fa.sig")
+    f2 = utils.get_test_data("2+63.fa.sig")
     mh1 = sourmash.load_one_signature(f1, ksize=31).minhash
     mh2 = sourmash.load_one_signature(f2).minhash
     mh1_ds = mh1.downsample(scaled=100000)
@@ -3126,22 +3244,31 @@ def test_minhash_set_size_estimate_is_accurate():
     # check that relative error and confidence must be between 0 and 1
     with pytest.raises(ValueError) as exc:
         mh2.size_is_accurate(relative_error=-1)
-    assert "Error: relative error and confidence values must be between 0 and 1." in str(exc)
+    assert (
+        "Error: relative error and confidence values must be between 0 and 1."
+        in str(exc)
+    )
 
     with pytest.raises(ValueError) as exc:
         mh2.size_is_accurate(confidence=-1)
-    assert "Error: relative error and confidence values must be between 0 and 1." in str(exc)
+    assert (
+        "Error: relative error and confidence values must be between 0 and 1."
+        in str(exc)
+    )
 
     with pytest.raises(ValueError) as exc:
         mh2.size_is_accurate(relative_error=-1, confidence=-1)
-    assert "Error: relative error and confidence values must be between 0 and 1." in str(exc)
+    assert (
+        "Error: relative error and confidence values must be between 0 and 1."
+        in str(exc)
+    )
 
 
 def test_minhash_ani_inaccurate_size_est():
     # TODO: It's actually really tricky to get the set size to be inaccurate. Eg. For a scale factor of 10000,
     # you would need
-    f1 = utils.get_test_data('2.fa.sig')
-    f2 = utils.get_test_data('2+63.fa.sig')
+    f1 = utils.get_test_data("2.fa.sig")
+    f2 = utils.get_test_data("2+63.fa.sig")
     mh1 = sourmash.load_one_signature(f1, ksize=31).minhash
     mh2 = sourmash.load_one_signature(f2).minhash
     # downsample
@@ -3160,12 +3287,12 @@ def test_minhash_ani_inaccurate_size_est():
 
     m1_ca_m2_ds = mh1_ds.containment_ani(mh2_ds)
     print(m1_ca_m2_ds)
-    assert m1_ca_m2_ds.ani == None #0.987
+    assert m1_ca_m2_ds.ani is None  # 0.987
     assert m1_ca_m2_ds.size_is_inaccurate == True
 
 
 def test_size_num_fail():
-    f1 = utils.get_test_data('num/47.fa.sig')
+    f1 = utils.get_test_data("num/47.fa.sig")
     mh1 = sourmash.load_one_signature(f1, ksize=31).minhash
 
     with pytest.raises(TypeError) as exc:
diff --git a/tests/test_nodegraph.py b/tests/test_nodegraph.py
index 68283dd620..bc9e02754b 100644
--- a/tests/test_nodegraph.py
+++ b/tests/test_nodegraph.py
@@ -2,15 +2,19 @@
 
 import pytest
 
-from sourmash.nodegraph import Nodegraph, extract_nodegraph_info, calc_expected_collisions
+from sourmash.nodegraph import (
+    Nodegraph,
+    extract_nodegraph_info,
+    calc_expected_collisions,
+)
 
 import sourmash_tst_utils as utils
 
 
 def test_nodegraph_to_khmer_basic():
-    pytest.importorskip('khmer')
+    pytest.importorskip("khmer")
 
-    ng_file = utils.get_test_data('.sbt.v3/internal.0')
+    ng_file = utils.get_test_data(".sbt.v3/internal.0")
 
     sourmash_ng = Nodegraph.load(ng_file)
     khmer_sm_ng = sourmash_ng.to_khmer_nodegraph()
@@ -19,7 +23,7 @@ def test_nodegraph_to_khmer_basic():
 
 
 def test_nodegraph_khmer_compare():
-    khmer = pytest.importorskip('khmer')
+    khmer = pytest.importorskip("khmer")
 
     khmer_ng = khmer.Nodegraph(3, 23, 6)
     khmer_ng.count("ACG")
@@ -43,14 +47,14 @@ def test_nodegraph_khmer_compare():
 
 
 def test_nodegraph_same_file():
-    khmer = pytest.importorskip('khmer')
+    khmer = pytest.importorskip("khmer")
     try:
         load_nodegraph = khmer.load_nodegraph
     except AttributeError:
         load_nodegraph = khmer.Nodegraph.load
 
-    ng_file = utils.get_test_data('.sbt.v3/internal.0')
-    with open(ng_file, 'rb') as f:
+    ng_file = utils.get_test_data(".sbt.v3/internal.0")
+    with open(ng_file, "rb") as f:
         ng_data = f.read()
 
     sourmash_ng = Nodegraph.load(ng_file)
@@ -85,7 +89,7 @@ def test_nodegraph_same_file():
 
 
 def test_nodegraph_expected_collisions():
-    ng_file = utils.get_test_data('.sbt.v3/internal.0')
+    ng_file = utils.get_test_data(".sbt.v3/internal.0")
 
     sourmash_ng = Nodegraph.load(ng_file)
 
@@ -93,7 +97,7 @@ def test_nodegraph_expected_collisions():
 
 
 def test_nodegraph_expected_collisions_error():
-    ng_file = utils.get_test_data('.sbt.v3/internal.0')
+    ng_file = utils.get_test_data(".sbt.v3/internal.0")
 
     sourmash_ng = Nodegraph.load(ng_file)
 
diff --git a/tests/test_np_utils.py b/tests/test_np_utils.py
index 50aaa756f4..e23ca361a0 100644
--- a/tests/test_np_utils.py
+++ b/tests/test_np_utils.py
@@ -5,7 +5,6 @@
 
 
 def test_memmap():
-
     e1 = sourmash.MinHash(n=1, ksize=20)
     sig1 = SourmashSignature(e1)
 
diff --git a/tests/test_picklist.py b/tests/test_picklist.py
index 73c8799689..682d6fb8af 100644
--- a/tests/test_picklist.py
+++ b/tests/test_picklist.py
@@ -14,23 +14,23 @@
 
 
 def test_load_empty_picklist_fail():
-    empty = utils.get_test_data('picklist/empty.csv')
+    empty = utils.get_test_data("picklist/empty.csv")
 
-    pl = SignaturePicklist('manifest', pickfile=empty)
+    pl = SignaturePicklist("manifest", pickfile=empty)
     with pytest.raises(ValueError):
         pl.load(allow_empty=False)
 
 
 def test_load_empty_picklist_allow():
-    empty = utils.get_test_data('picklist/empty.csv')
+    empty = utils.get_test_data("picklist/empty.csv")
 
-    pl = SignaturePicklist('manifest', pickfile=empty)
+    pl = SignaturePicklist("manifest", pickfile=empty)
     pl.load(allow_empty=True)
 
 
 def test_dup_md5_picked(runtmp):
     # load a sig, duplicate, and see if a picklist gets the right one
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss = sourmash.load_file_as_signatures(sig47)
     sig = list(ss)[0]
 
@@ -41,26 +41,26 @@ def test_dup_md5_picked(runtmp):
     print(ml.manifest.rows)
     assert len(ml.manifest) == 1
 
-    mf_csv = runtmp.output('select.csv')
+    mf_csv = runtmp.output("select.csv")
     ml.manifest.write_to_filename(mf_csv)
 
     # now make an index to select against, with an identical signature
     # (but diff name)
     new_sig = sig.to_mutable()
-    new_sig.name = 'foo'
+    new_sig.name = "foo"
     xl = LinearIndex([sig, new_sig])
     ml2 = MultiIndex.load([xl], [None], None)
 
     assert len(ml2) == 2
 
     # create a picklist...
-    pl = SignaturePicklist('manifest', pickfile=mf_csv)
+    pl = SignaturePicklist("manifest", pickfile=mf_csv)
     print(pl.load())
-    print('loaded:', len(pl.pickset))
+    print("loaded:", len(pl.pickset))
 
     # use in select
     ml3 = ml2.select(picklist=pl)
-    print('picked:', len(ml3))
+    print("picked:", len(ml3))
 
     assert len(pl.pickset) == len(ml3)
 
@@ -68,7 +68,7 @@ def test_dup_md5_picked(runtmp):
 def test_dup_md5_picked_mf_to_picklist(runtmp):
     # load a sig, duplicate, and see if a picklist gets the right one
     # uses an in memory picklist
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss = sourmash.load_file_as_signatures(sig47)
     sig = list(ss)[0]
 
@@ -84,7 +84,7 @@ def test_dup_md5_picked_mf_to_picklist(runtmp):
     # now make an index to select against, with an identical signature
     # (but diff name)
     new_sig = sig.to_mutable()
-    new_sig.name = 'foo'
+    new_sig.name = "foo"
     xl = LinearIndex([sig, new_sig])
     ml2 = MultiIndex.load([xl], [None], None)
 
@@ -92,7 +92,7 @@ def test_dup_md5_picked_mf_to_picklist(runtmp):
 
     # use picklist in select
     ml3 = ml2.select(picklist=pl)
-    print('picked:', len(ml3))
+    print("picked:", len(ml3))
 
     assert len(pl.pickset) == len(ml3)
 
@@ -100,12 +100,12 @@ def test_dup_md5_picked_mf_to_picklist(runtmp):
 def test_dup_md5_picked_mf_to_picklist_sqlite(runtmp):
     # load a sig, duplicate, and see if a picklist gets the right one
     # use a sqlite db with its own to_picklist behavior.
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss = sourmash.load_file_as_signatures(sig47)
     sig = list(ss)[0]
 
     # save a manifest with one entry
-    xl = SqliteIndex.create(':memory:')
+    xl = SqliteIndex.create(":memory:")
     xl.insert(sig)
 
     print(xl.manifest.rows)
@@ -116,7 +116,7 @@ def test_dup_md5_picked_mf_to_picklist_sqlite(runtmp):
     # now make an index to select against, with an identical signature
     # (but diff name)
     new_sig = sig.to_mutable()
-    new_sig.name = 'foo'
+    new_sig.name = "foo"
     xl = LinearIndex([sig, new_sig])
     ml2 = MultiIndex.load([xl], [None], None)
 
@@ -124,6 +124,6 @@ def test_dup_md5_picked_mf_to_picklist_sqlite(runtmp):
 
     # use picklist in select
     ml3 = ml2.select(picklist=pl)
-    print('picked:', len(ml3))
+    print("picked:", len(ml3))
 
     assert len(pl.pickset) == len(ml3)
diff --git a/tests/test_plugin_framework.py b/tests/test_plugin_framework.py
index 06156e4d85..1acb78bd6c 100644
--- a/tests/test_plugin_framework.py
+++ b/tests/test_plugin_framework.py
@@ -13,22 +13,23 @@
 import sourmash_tst_utils as utils
 from sourmash import plugins
 from sourmash.index import LinearIndex
-from sourmash.save_load import (Base_SaveSignaturesToLocation,
-                                SaveSignaturesToLocation)
+from sourmash.save_load import Base_SaveSignaturesToLocation, SaveSignaturesToLocation
+
+
+_Dist = collections.namedtuple("_Dist", ["version"])
 
 
-_Dist = collections.namedtuple('_Dist', ['version'])
 class FakeEntryPoint:
     """
     A class that stores a name and an object to be returned on 'load()'.
     Mocks the EntryPoint class used by importlib.metadata.
     """
-    module = 'test_plugin_framework'
-    dist = _Dist('0.1')
-    group = 'groupfoo'
 
-    def __init__(self, name, load_obj, *,
-                 error_on_import=None):
+    module = "test_plugin_framework"
+    dist = _Dist("0.1")
+    group = "groupfoo"
+
+    def __init__(self, name, load_obj, *, error_on_import=None):
         self.name = name
         self.load_obj = load_obj
         self.error_on_import = error_on_import
@@ -38,15 +39,17 @@ def load(self):
             raise self.error_on_import("as requested")
         return self.load_obj
 
+
 #
 # Test basic features of the load_from plugin hook.
 #
 
+
 class Test_EntryPointBasics_LoadFrom:
     def get_some_sigs(self, location, *args, **kwargs):
-        ss2 = utils.get_test_data('2.fa.sig')
-        ss47 = utils.get_test_data('47.fa.sig')
-        ss63 = utils.get_test_data('63.fa.sig')
+        ss2 = utils.get_test_data("2.fa.sig")
+        ss47 = utils.get_test_data("47.fa.sig")
+        ss63 = utils.get_test_data("63.fa.sig")
 
         sig2 = sourmash.load_one_signature(ss2, ksize=31)
         sig47 = sourmash.load_one_signature(ss47, ksize=31)
@@ -55,12 +58,17 @@ def get_some_sigs(self, location, *args, **kwargs):
         lidx = LinearIndex([sig2, sig47, sig63], location)
 
         return lidx
+
     get_some_sigs.priority = 1
-        
+
     def setup_method(self):
         self.saved_plugins = plugins._plugin_load_from
-        plugins._plugin_load_from = [FakeEntryPoint('test_load', self.get_some_sigs),
-                                     FakeEntryPoint('test_load', self.get_some_sigs, error_on_import=ModuleNotFoundError)]
+        plugins._plugin_load_from = [
+            FakeEntryPoint("test_load", self.get_some_sigs),
+            FakeEntryPoint(
+                "test_load", self.get_some_sigs, error_on_import=ModuleNotFoundError
+            ),
+        ]
 
     def teardown_method(self):
         plugins._plugin_load_from = self.saved_plugins
@@ -70,7 +78,7 @@ def test_load_1(self):
         assert len(ps) == 1
 
     def test_load_2(self, runtmp):
-        fake_location = runtmp.output('passed-through location')
+        fake_location = runtmp.output("passed-through location")
         idx = sourmash.load_file_as_index(fake_location)
         print(idx, idx.location)
 
@@ -80,9 +88,9 @@ def test_load_2(self, runtmp):
 
 class Test_EntryPoint_LoadFrom_Priority:
     def get_some_sigs(self, location, *args, **kwargs):
-        ss2 = utils.get_test_data('2.fa.sig')
-        ss47 = utils.get_test_data('47.fa.sig')
-        ss63 = utils.get_test_data('63.fa.sig')
+        ss2 = utils.get_test_data("2.fa.sig")
+        ss47 = utils.get_test_data("47.fa.sig")
+        ss63 = utils.get_test_data("63.fa.sig")
 
         sig2 = sourmash.load_one_signature(ss2, ksize=31)
         sig47 = sourmash.load_one_signature(ss47, ksize=31)
@@ -91,39 +99,43 @@ def get_some_sigs(self, location, *args, **kwargs):
         lidx = LinearIndex([sig2, sig47, sig63], location)
 
         return lidx
+
     get_some_sigs.priority = 5
 
     def set_called_flag_1(self, location, *args, **kwargs):
         # high priority 1, raise ValueError
-        print('setting flag 1')
+        print("setting flag 1")
         self.was_called_flag_1 = True
         raise ValueError
+
     set_called_flag_1.priority = 1
 
     def set_called_flag_2(self, location, *args, **kwargs):
         # high priority 2, return None
-        print('setting flag 2')
+        print("setting flag 2")
         self.was_called_flag_2 = True
 
         return None
+
     set_called_flag_2.priority = 2
 
     def set_called_flag_3(self, location, *args, **kwargs):
         # lower priority 10, should not be called
-        print('setting flag 3')
+        print("setting flag 3")
         self.was_called_flag_3 = True
 
         return None
+
     set_called_flag_3.priority = 10
 
     def setup_method(self):
         self.saved_plugins = plugins._plugin_load_from
         plugins._plugin_load_from = [
-            FakeEntryPoint('test_load', self.get_some_sigs),
-            FakeEntryPoint('test_load_2', self.set_called_flag_1),
-            FakeEntryPoint('test_load_3', self.set_called_flag_2),
-            FakeEntryPoint('test_load_4', self.set_called_flag_3)
-            ]
+            FakeEntryPoint("test_load", self.get_some_sigs),
+            FakeEntryPoint("test_load_2", self.set_called_flag_1),
+            FakeEntryPoint("test_load_3", self.set_called_flag_2),
+            FakeEntryPoint("test_load_4", self.set_called_flag_3),
+        ]
         self.was_called_flag_1 = False
         self.was_called_flag_2 = False
         self.was_called_flag_3 = False
@@ -140,7 +152,7 @@ def test_load_1(self):
         assert not self.was_called_flag_3
 
     def test_load_2(self, runtmp):
-        fake_location = runtmp.output('passed-through location')
+        fake_location = runtmp.output("passed-through location")
         idx = sourmash.load_file_as_index(fake_location)
         print(idx, idx.location)
 
@@ -156,10 +168,12 @@ def test_load_2(self, runtmp):
 # Test basic features of the save_to plugin hook.
 #
 
+
 class FakeSaveClass(Base_SaveSignaturesToLocation):
     """
     A fake save class that just records what was sent to it.
     """
+
     priority = 50
 
     def __init__(self, location):
@@ -169,7 +183,7 @@ def __init__(self, location):
     @classmethod
     def matches(cls, location):
         if location:
-            return location.endswith('.this-is-a-test')
+            return location.endswith(".this-is-a-test")
 
     def add(self, ss):
         super().add(ss)
@@ -184,8 +198,12 @@ class Test_EntryPointBasics_SaveTo:
     # test the basics
     def setup_method(self):
         self.saved_plugins = plugins._plugin_save_to
-        plugins._plugin_save_to = [FakeEntryPoint('test_save', FakeSaveClass),
-                                   FakeEntryPoint('test_save', FakeSaveClass, error_on_import=ModuleNotFoundError)]
+        plugins._plugin_save_to = [
+            FakeEntryPoint("test_save", FakeSaveClass),
+            FakeEntryPoint(
+                "test_save", FakeSaveClass, error_on_import=ModuleNotFoundError
+            ),
+        ]
 
     def teardown_method(self):
         plugins._plugin_save_to = self.saved_plugins
@@ -197,9 +215,9 @@ def test_save_1(self):
 
     def test_save_2(self, runtmp):
         # load some signatures to save
-        ss2 = utils.get_test_data('2.fa.sig')
-        ss47 = utils.get_test_data('47.fa.sig')
-        ss63 = utils.get_test_data('63.fa.sig')
+        ss2 = utils.get_test_data("2.fa.sig")
+        ss47 = utils.get_test_data("47.fa.sig")
+        ss63 = utils.get_test_data("63.fa.sig")
 
         sig2 = sourmash.load_one_signature(ss2, ksize=31)
         sig47 = sourmash.load_one_signature(ss47, ksize=31)
@@ -207,7 +225,7 @@ def test_save_2(self, runtmp):
 
         # build a fake location that matches the FakeSaveClass
         # extension
-        fake_location = runtmp.output('out.this-is-a-test')
+        fake_location = runtmp.output("out.this-is-a-test")
 
         # this should use the plugin architecture to return an object
         # of type FakeSaveClass, with the three signatures in it.
@@ -230,8 +248,8 @@ class Test_EntryPointPriority_SaveTo:
     def setup_method(self):
         self.saved_plugins = plugins._plugin_save_to
         plugins._plugin_save_to = [
-            FakeEntryPoint('test_save', FakeSaveClass),
-            FakeEntryPoint('test_save2', FakeSaveClass_HighPriority),
+            FakeEntryPoint("test_save", FakeSaveClass),
+            FakeEntryPoint("test_save2", FakeSaveClass_HighPriority),
         ]
 
     def teardown_method(self):
@@ -244,9 +262,9 @@ def test_save_1(self):
 
     def test_save_2(self, runtmp):
         # load some signatures to save
-        ss2 = utils.get_test_data('2.fa.sig')
-        ss47 = utils.get_test_data('47.fa.sig')
-        ss63 = utils.get_test_data('63.fa.sig')
+        ss2 = utils.get_test_data("2.fa.sig")
+        ss47 = utils.get_test_data("47.fa.sig")
+        ss63 = utils.get_test_data("63.fa.sig")
 
         sig2 = sourmash.load_one_signature(ss2, ksize=31)
         sig47 = sourmash.load_one_signature(ss47, ksize=31)
@@ -254,7 +272,7 @@ def test_save_2(self, runtmp):
 
         # build a fake location that matches the FakeSaveClass
         # extension
-        fake_location = runtmp.output('out.this-is-a-test')
+        fake_location = runtmp.output("out.this-is-a-test")
 
         # this should use the plugin architecture to return an object
         # of type FakeSaveClass, with the three signatures in it.
@@ -276,18 +294,20 @@ def test_save_2(self, runtmp):
 # Test basic features of the save_to plugin hook.
 #
 
+
 class FakeCommandClass(plugins.CommandLinePlugin):
     """
     A fake CLI class.
     """
-    command = 'nifty'
+
+    command = "nifty"
     description = "do somethin' nifty"
 
     def __init__(self, parser):
         super().__init__(parser)
-        parser.add_argument('arg1')
-        parser.add_argument('--other', action='store_true')
-        parser.add_argument('--do-fail', action='store_true')
+        parser.add_argument("arg1")
+        parser.add_argument("--other", action="store_true")
+        parser.add_argument("--do-fail", action="store_true")
 
     def main(self, args):
         super().main(args)
@@ -305,8 +325,7 @@ def setup_method(self):
         _ = plugins.get_cli_script_plugins()
         self.saved_plugins = plugins._plugin_cli
         plugins._plugin_cli_once = False
-        plugins._plugin_cli = [FakeEntryPoint('test_command',
-                                              FakeCommandClass)]
+        plugins._plugin_cli = [FakeEntryPoint("test_command", FakeCommandClass)]
 
     def teardown_method(self):
         plugins._plugin_cli = self.saved_plugins
@@ -316,17 +335,17 @@ def test_empty(self, runtmp):
         plugins._plugin_cli = []
 
         with pytest.raises(utils.SourmashCommandFailed):
-            runtmp.sourmash('scripts')
+            runtmp.sourmash("scripts")
         out = runtmp.last_result.out
         err = runtmp.last_result.err
         print(out)
         print(err)
-        assert '(No script plugins detected!)' in out
+        assert "(No script plugins detected!)" in out
 
     def test_cmd_0(self, runtmp):
         # test default output with some plugins
         with pytest.raises(utils.SourmashCommandFailed):
-            runtmp.sourmash('scripts')
+            runtmp.sourmash("scripts")
 
         out = runtmp.last_result.out
         err = runtmp.last_result.err
@@ -354,32 +373,32 @@ def test_cmd_2(self):
     def test_cmd_3(self, runtmp):
         # test ability to run 'nifty' ;)
         with pytest.raises(utils.SourmashCommandFailed):
-            runtmp.sourmash('scripts', 'nifty')
+            runtmp.sourmash("scripts", "nifty")
 
         out = runtmp.last_result.out
         err = runtmp.last_result.err
         print(out)
         print(err)
 
-        assert 'nifty: error: the following arguments are required: arg1' in err
-        assert 'usage:  nifty [-h] [-q] [-d] [--other] [--do-fail] arg1' in err
+        assert "nifty: error: the following arguments are required: arg1" in err
+        assert "usage:  nifty [-h] [-q] [-d] [--other] [--do-fail] arg1" in err
 
     def test_cmd_4(self, runtmp):
         # test basic argument parsing etc
-        runtmp.sourmash('scripts', 'nifty', '--other', 'some arg')
+        runtmp.sourmash("scripts", "nifty", "--other", "some arg")
 
         out = runtmp.last_result.out
         err = runtmp.last_result.err
         print(out)
         print(err)
 
-        assert 'other is True' in out
-        assert 'hello, world! argument is: some arg' in out
+        assert "other is True" in out
+        assert "hello, world! argument is: some arg" in out
 
     def test_cmd_5(self, runtmp):
         # test exit code passthru
         with pytest.raises(utils.SourmashCommandFailed):
-            runtmp.sourmash('scripts', 'nifty', '--do-fail', 'some arg')
+            runtmp.sourmash("scripts", "nifty", "--do-fail", "some arg")
 
         status = runtmp.last_result.status
         out = runtmp.last_result.out
@@ -388,22 +407,23 @@ def test_cmd_5(self, runtmp):
         print(err)
         print(status)
 
-        assert 'other is False' in out
-        assert 'hello, world! argument is: some arg' in out
+        assert "other is False" in out
+        assert "hello, world! argument is: some arg" in out
 
 
 class FakeCommandClass_Second(plugins.CommandLinePlugin):
     """
     A fake CLI class.
     """
-    command = 'more_nifty'
+
+    command = "more_nifty"
     description = "do somethin' else nifty"
 
     def __init__(self, parser):
         super().__init__(parser)
-        parser.add_argument('arg1')
-        parser.add_argument('--other', action='store_true')
-        parser.add_argument('--do-fail', action='store_true')
+        parser.add_argument("arg1")
+        parser.add_argument("--other", action="store_true")
+        parser.add_argument("--do-fail", action="store_true")
 
     def main(self, args):
         super().main(args)
@@ -419,6 +439,7 @@ class FakeCommandClass_Broken_1:
     """
     A fake CLI class.
     """
+
     # command = 'more_nifty' # no command
 
     def __init__(self, parser):
@@ -432,7 +453,8 @@ class FakeCommandClass_Broken_2:
     """
     A fake CLI class.
     """
-    command = 'broken'
+
+    command = "broken"
     # no description
 
     def __init__(self, parser):
@@ -448,18 +470,15 @@ def setup_method(self):
         _ = plugins.get_cli_script_plugins()
         self.saved_plugins = plugins._plugin_cli
         plugins._plugin_cli_once = False
-        plugins._plugin_cli = [FakeEntryPoint('test_command',
-                                              FakeCommandClass),
-                               FakeEntryPoint('test_command2',
-                                              FakeCommandClass_Second),
-                               FakeEntryPoint('test_command3',
-                                              FakeCommandClass_Broken_1),
-                               FakeEntryPoint('test_command4',
-                                              FakeCommandClass_Broken_2),
-                               FakeEntryPoint('error-on-import',
-                                              FakeCommandClass,
-                                           error_on_import=ModuleNotFoundError)
-                               ]
+        plugins._plugin_cli = [
+            FakeEntryPoint("test_command", FakeCommandClass),
+            FakeEntryPoint("test_command2", FakeCommandClass_Second),
+            FakeEntryPoint("test_command3", FakeCommandClass_Broken_1),
+            FakeEntryPoint("test_command4", FakeCommandClass_Broken_2),
+            FakeEntryPoint(
+                "error-on-import", FakeCommandClass, error_on_import=ModuleNotFoundError
+            ),
+        ]
 
     def teardown_method(self):
         plugins._plugin_cli = self.saved_plugins
@@ -467,7 +486,7 @@ def teardown_method(self):
     def test_cmd_0(self, runtmp):
         # test default output for a few plugins
         with pytest.raises(utils.SourmashCommandFailed):
-            runtmp.sourmash('scripts')
+            runtmp.sourmash("scripts")
 
         out = runtmp.last_result.out
         err = runtmp.last_result.err
@@ -481,7 +500,7 @@ def test_cmd_0(self, runtmp):
 
     def test_cmd_1(self, runtmp):
         # test 'nifty'
-        runtmp.sourmash('scripts', 'nifty', 'some arg')
+        runtmp.sourmash("scripts", "nifty", "some arg")
 
         status = runtmp.last_result.status
         out = runtmp.last_result.out
@@ -490,12 +509,12 @@ def test_cmd_1(self, runtmp):
         print(err)
         print(status)
 
-        assert 'other is False' in out
-        assert 'hello, world! argument is: some arg' in out
+        assert "other is False" in out
+        assert "hello, world! argument is: some arg" in out
 
     def test_cmd_2(self, runtmp):
         # test 'more_nifty'
-        runtmp.sourmash('scripts', 'more_nifty', 'some arg')
+        runtmp.sourmash("scripts", "more_nifty", "some arg")
 
         status = runtmp.last_result.status
         out = runtmp.last_result.out
@@ -504,12 +523,12 @@ def test_cmd_2(self, runtmp):
         print(err)
         print(status)
 
-        assert 'other is False' in out
-        assert 'hello, world! argument is: some arg' in out
+        assert "other is False" in out
+        assert "hello, world! argument is: some arg" in out
 
     def test_sourmash_info(self, runtmp):
         # test 'sourmash info -v' => shows the plugins
-        runtmp.sourmash('info', '-v')
+        runtmp.sourmash("info", "-v")
 
         out = runtmp.last_result.out
         err = runtmp.last_result.err
diff --git a/tests/test_prefetch.py b/tests/test_prefetch.py
index 7ab2d2c1dd..44c6b4aac5 100644
--- a/tests/test_prefetch.py
+++ b/tests/test_prefetch.py
@@ -25,29 +25,40 @@ def test_prefetch_basic(runtmp, linear_gather):
     c = runtmp
 
     # test a basic prefetch
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
-    c.run_sourmash('prefetch', '-k', '31', sig47, sig63, sig2, sig47,
-                   linear_gather)
+    c.run_sourmash("prefetch", "-k", "31", sig47, sig63, sig2, sig47, linear_gather)
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
 
-    assert "WARNING: no output(s) specified! Nothing will be saved from this prefetch!" in c.last_result.err
+    assert (
+        "WARNING: no output(s) specified! Nothing will be saved from this prefetch!"
+        in c.last_result.err
+    )
     assert "selecting specified query k=31" in c.last_result.err
-    assert "loaded query: NC_009665.1 Shewanella baltica... (k=31, DNA)" in c.last_result.err
-    assert "query sketch has scaled=1000; will be dynamically downsampled as needed" in c.last_result.err
+    assert (
+        "loaded query: NC_009665.1 Shewanella baltica... (k=31, DNA)"
+        in c.last_result.err
+    )
+    assert (
+        "query sketch has scaled=1000; will be dynamically downsampled as needed"
+        in c.last_result.err
+    )
 
     err = c.last_result.err
     assert "loaded 5 total signatures from 3 locations." in err
     assert "after selecting signatures compatible with search, 3 remain." in err
 
     assert "total of 2 matching signatures." in c.last_result.err
-    assert "of 5177 distinct query hashes, 5177 were found in matches above threshold." in c.last_result.err
+    assert (
+        "of 5177 distinct query hashes, 5177 were found in matches above threshold."
+        in c.last_result.err
+    )
     assert "a total of 0 query hashes remain unmatched." in c.last_result.err
 
 
@@ -55,15 +66,18 @@ def test_prefetch_select_query_ksize(runtmp, linear_gather):
     # test prefetch where query and subject db both have multiple ksizes
     c = runtmp
 
-    ss = utils.get_test_data('GCF_000005845.2_ASM584v2_genomic.fna.gz.sig')
+    ss = utils.get_test_data("GCF_000005845.2_ASM584v2_genomic.fna.gz.sig")
 
-    c.run_sourmash('prefetch', ss, ss, linear_gather)
+    c.run_sourmash("prefetch", ss, ss, linear_gather)
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert 'of 4476 distinct query hashes, 4476 were found in matches above threshold.' in c.last_result.err
+    assert (
+        "of 4476 distinct query hashes, 4476 were found in matches above threshold."
+        in c.last_result.err
+    )
 
 
 def test_prefetch_subject_scaled_is_larger(runtmp, linear_gather):
@@ -71,26 +85,39 @@ def test_prefetch_subject_scaled_is_larger(runtmp, linear_gather):
     c = runtmp
 
     # make a query sketch with scaled=1000
-    fa = utils.get_test_data('genome-s10.fa.gz')
-    c.run_sourmash('sketch', 'dna', fa, '-o', 'query.sig')
-    assert os.path.exists(runtmp.output('query.sig'))
+    fa = utils.get_test_data("genome-s10.fa.gz")
+    c.run_sourmash("sketch", "dna", fa, "-o", "query.sig")
+    assert os.path.exists(runtmp.output("query.sig"))
 
     # this has a scaled of 10000, from same genome:
-    against1 = utils.get_test_data('scaled/genome-s10.fa.gz.sig')
-    against2 = utils.get_test_data('scaled/all.sbt.zip')
-    against3 = utils.get_test_data('scaled/all.lca.json')
+    against1 = utils.get_test_data("scaled/genome-s10.fa.gz.sig")
+    against2 = utils.get_test_data("scaled/all.sbt.zip")
+    against3 = utils.get_test_data("scaled/all.lca.json")
 
     # run against large scaled, then small (self)
-    c.run_sourmash('prefetch', 'query.sig', against1, against2, against3,
-                   'query.sig', linear_gather)
+    c.run_sourmash(
+        "prefetch",
+        "query.sig",
+        against1,
+        against2,
+        against3,
+        "query.sig",
+        linear_gather,
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert 'total of 8 matching signatures.' in c.last_result.err
-    assert 'of 48 distinct query hashes, 48 were found in matches above threshold.' in c.last_result.err
-    assert 'final scaled value (max across query and all matches) is 10000' in c.last_result.err
+    assert "total of 8 matching signatures." in c.last_result.err
+    assert (
+        "of 48 distinct query hashes, 48 were found in matches above threshold."
+        in c.last_result.err
+    )
+    assert (
+        "final scaled value (max across query and all matches) is 10000"
+        in c.last_result.err
+    )
 
 
 def test_prefetch_subject_scaled_is_larger_outsigs(runtmp, linear_gather):
@@ -98,30 +125,45 @@ def test_prefetch_subject_scaled_is_larger_outsigs(runtmp, linear_gather):
     c = runtmp
 
     # make a query sketch with scaled=1000
-    fa = utils.get_test_data('genome-s10.fa.gz')
-    c.run_sourmash('sketch', 'dna', fa, '-o', 'query.sig')
-    assert os.path.exists(runtmp.output('query.sig'))
+    fa = utils.get_test_data("genome-s10.fa.gz")
+    c.run_sourmash("sketch", "dna", fa, "-o", "query.sig")
+    assert os.path.exists(runtmp.output("query.sig"))
 
     # this has a scaled of 10000, from same genome:
-    against1 = utils.get_test_data('scaled/genome-s10.fa.gz.sig')
-    against2 = utils.get_test_data('scaled/all.sbt.zip')
-    against3 = utils.get_test_data('scaled/all.lca.json')
+    against1 = utils.get_test_data("scaled/genome-s10.fa.gz.sig")
+    against2 = utils.get_test_data("scaled/all.sbt.zip")
+    against3 = utils.get_test_data("scaled/all.lca.json")
 
     # run against large scaled, then small (self)
-    c.run_sourmash('prefetch', 'query.sig', against1, against2, against3,
-                   'query.sig', linear_gather, '--save-matches', 'matches.sig')
+    c.run_sourmash(
+        "prefetch",
+        "query.sig",
+        against1,
+        against2,
+        against3,
+        "query.sig",
+        linear_gather,
+        "--save-matches",
+        "matches.sig",
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert 'total of 8 matching signatures.' in c.last_result.err
-    assert 'of 48 distinct query hashes, 48 were found in matches above threshold.' in c.last_result.err
-    assert 'final scaled value (max across query and all matches) is 10000' in c.last_result.err
+    assert "total of 8 matching signatures." in c.last_result.err
+    assert (
+        "of 48 distinct query hashes, 48 were found in matches above threshold."
+        in c.last_result.err
+    )
+    assert (
+        "final scaled value (max across query and all matches) is 10000"
+        in c.last_result.err
+    )
 
     # make sure non-downsampled sketches were saved.
-    matches = sourmash.load_file_as_signatures(runtmp.output('matches.sig'))
-    scaled_vals = set([ match.minhash.scaled for match in matches ])
+    matches = sourmash.load_file_as_signatures(runtmp.output("matches.sig"))
+    scaled_vals = set([match.minhash.scaled for match in matches])
     assert 1000 in scaled_vals
     assert 10000 in scaled_vals
     assert len(scaled_vals) == 2
@@ -131,25 +173,36 @@ def test_prefetch_query_abund(runtmp, linear_gather):
     c = runtmp
 
     # test a basic prefetch w/abund query
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
-    c.run_sourmash('prefetch', '-k', '31', sig47, sig63, sig2, sig47,
-                   linear_gather)
+    c.run_sourmash("prefetch", "-k", "31", sig47, sig63, sig2, sig47, linear_gather)
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
 
-    assert "WARNING: no output(s) specified! Nothing will be saved from this prefetch!" in c.last_result.err
+    assert (
+        "WARNING: no output(s) specified! Nothing will be saved from this prefetch!"
+        in c.last_result.err
+    )
     assert "selecting specified query k=31" in c.last_result.err
-    assert "loaded query: NC_009665.1 Shewanella baltica... (k=31, DNA)" in c.last_result.err
-    assert "query sketch has scaled=1000; will be dynamically downsampled as needed" in c.last_result.err
+    assert (
+        "loaded query: NC_009665.1 Shewanella baltica... (k=31, DNA)"
+        in c.last_result.err
+    )
+    assert (
+        "query sketch has scaled=1000; will be dynamically downsampled as needed"
+        in c.last_result.err
+    )
 
     assert "total of 2 matching signatures." in c.last_result.err
-    assert "of 5177 distinct query hashes, 5177 were found in matches above threshold." in c.last_result.err
+    assert (
+        "of 5177 distinct query hashes, 5177 were found in matches above threshold."
+        in c.last_result.err
+    )
     assert "a total of 0 query hashes remain unmatched." in c.last_result.err
 
 
@@ -157,25 +210,36 @@ def test_prefetch_subj_abund(runtmp, linear_gather):
     c = runtmp
 
     # test a basic prefetch w/abund signature.
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('track_abund/63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("track_abund/63.fa.sig")
 
-    c.run_sourmash('prefetch', '-k', '31', sig47, sig63, sig2, sig47,
-                   linear_gather)
+    c.run_sourmash("prefetch", "-k", "31", sig47, sig63, sig2, sig47, linear_gather)
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
 
-    assert "WARNING: no output(s) specified! Nothing will be saved from this prefetch!" in c.last_result.err
+    assert (
+        "WARNING: no output(s) specified! Nothing will be saved from this prefetch!"
+        in c.last_result.err
+    )
     assert "selecting specified query k=31" in c.last_result.err
-    assert "loaded query: NC_009665.1 Shewanella baltica... (k=31, DNA)" in c.last_result.err
-    assert "query sketch has scaled=1000; will be dynamically downsampled as needed" in c.last_result.err
+    assert (
+        "loaded query: NC_009665.1 Shewanella baltica... (k=31, DNA)"
+        in c.last_result.err
+    )
+    assert (
+        "query sketch has scaled=1000; will be dynamically downsampled as needed"
+        in c.last_result.err
+    )
 
     assert "total of 2 matching signatures." in c.last_result.err
-    assert "of 5177 distinct query hashes, 5177 were found in matches above threshold." in c.last_result.err
+    assert (
+        "of 5177 distinct query hashes, 5177 were found in matches above threshold."
+        in c.last_result.err
+    )
     assert "a total of 0 query hashes remain unmatched." in c.last_result.err
 
 
@@ -183,14 +247,15 @@ def test_prefetch_csv_out(runtmp, linear_gather):
     c = runtmp
 
     # test a basic prefetch, with CSV output
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
-    csvout = c.output('out.csv')
+    csvout = c.output("out.csv")
 
-    c.run_sourmash('prefetch', '-k', '31', sig47, sig63, sig2, sig47,
-                   '-o', csvout, linear_gather)
+    c.run_sourmash(
+        "prefetch", "-k", "31", sig47, sig63, sig2, sig47, "-o", csvout, linear_gather
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
@@ -199,25 +264,26 @@ def test_prefetch_csv_out(runtmp, linear_gather):
     assert os.path.exists(csvout)
 
     expected_intersect_bp = [2529000, 5177000]
-    with open(csvout, 'rt', newline="") as fp:
+    with open(csvout, newline="") as fp:
         r = csv.DictReader(fp)
-        for (row, expected) in zip(r, expected_intersect_bp):
+        for row, expected in zip(r, expected_intersect_bp):
             print(row)
-            assert int(row['intersect_bp']) == expected
+            assert int(row["intersect_bp"]) == expected
 
 
 def test_prefetch_csv_gz_out(runtmp, linear_gather):
     c = runtmp
 
     # test a basic prefetch, with CSV output to a .gz file
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
-    csvout = c.output('out.csv.gz')
+    csvout = c.output("out.csv.gz")
 
-    c.run_sourmash('prefetch', '-k', '31', sig47, sig63, sig2, sig47,
-                   '-o', csvout, linear_gather)
+    c.run_sourmash(
+        "prefetch", "-k", "31", sig47, sig63, sig2, sig47, "-o", csvout, linear_gather
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
@@ -226,25 +292,35 @@ def test_prefetch_csv_gz_out(runtmp, linear_gather):
     assert os.path.exists(csvout)
 
     expected_intersect_bp = [2529000, 5177000]
-    with gzip.open(csvout, 'rt', newline="") as fp:
+    with gzip.open(csvout, "rt", newline="") as fp:
         r = csv.DictReader(fp)
-        for (row, expected) in zip(r, expected_intersect_bp):
+        for row, expected in zip(r, expected_intersect_bp):
             print(row)
-            assert int(row['intersect_bp']) == expected
+            assert int(row["intersect_bp"]) == expected
 
 
 def test_prefetch_matches(runtmp, linear_gather):
     c = runtmp
 
     # test a basic prefetch, with --save-matches
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-
-    matches_out = c.output('matches.sig')
-
-    c.run_sourmash('prefetch', '-k', '31', sig47, sig63, sig2, sig47,
-                   '--save-matches', matches_out, linear_gather)
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+
+    matches_out = c.output("matches.sig")
+
+    c.run_sourmash(
+        "prefetch",
+        "-k",
+        "31",
+        sig47,
+        sig63,
+        sig2,
+        sig47,
+        "--save-matches",
+        matches_out,
+        linear_gather,
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
@@ -255,7 +331,7 @@ def test_prefetch_matches(runtmp, linear_gather):
     sigs = sourmash.load_file_as_index(matches_out)
 
     expected_matches = [sig63, sig47]
-    for (match, expected) in zip(sigs.signatures(), expected_matches):
+    for match, expected in zip(sigs.signatures(), expected_matches):
         ss = sourmash.load_one_signature(expected, ksize=31)
         assert match == ss
 
@@ -264,16 +340,26 @@ def test_prefetch_matches_to_dir(runtmp, linear_gather):
     c = runtmp
 
     # test a basic prefetch, with --save-matches to a directory
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
     ss63 = sourmash.load_one_signature(sig63)
     ss47 = sourmash.load_one_signature(sig47)
 
-    matches_out = c.output('matches_dir/')
-
-    c.run_sourmash('prefetch', '-k', '31', sig47, sig63, sig2, sig47,
-                   '--save-matches', matches_out, linear_gather)
+    matches_out = c.output("matches_dir/")
+
+    c.run_sourmash(
+        "prefetch",
+        "-k",
+        "31",
+        sig47,
+        sig63,
+        sig2,
+        sig47,
+        "--save-matches",
+        matches_out,
+        linear_gather,
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
@@ -296,16 +382,26 @@ def test_prefetch_matches_to_sig_gz(runtmp, linear_gather):
     import gzip
 
     # test a basic prefetch, with --save-matches to a sig.gz file
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
     ss63 = sourmash.load_one_signature(sig63)
     ss47 = sourmash.load_one_signature(sig47)
 
-    matches_out = c.output('matches.sig.gz')
-
-    c.run_sourmash('prefetch', '-k', '31', sig47, sig63, sig2, sig47,
-                   '--save-matches', matches_out, linear_gather)
+    matches_out = c.output("matches.sig.gz")
+
+    c.run_sourmash(
+        "prefetch",
+        "-k",
+        "31",
+        sig47,
+        sig63,
+        sig2,
+        sig47,
+        "--save-matches",
+        matches_out,
+        linear_gather,
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
@@ -332,16 +428,26 @@ def test_prefetch_matches_to_zip(runtmp, linear_gather):
     # test a basic prefetch, with --save-matches to a zipfile
     import zipfile
 
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
     ss63 = sourmash.load_one_signature(sig63)
     ss47 = sourmash.load_one_signature(sig47)
 
-    matches_out = c.output('matches.zip')
-
-    c.run_sourmash('prefetch', '-k', '31', sig47, sig63, sig2, sig47,
-                   '--save-matches', matches_out, linear_gather)
+    matches_out = c.output("matches.zip")
+
+    c.run_sourmash(
+        "prefetch",
+        "-k",
+        "31",
+        sig47,
+        sig63,
+        sig2,
+        sig47,
+        "--save-matches",
+        matches_out,
+        linear_gather,
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
@@ -367,14 +473,22 @@ def test_prefetch_matching_hashes(runtmp, linear_gather):
     c = runtmp
 
     # test a basic prefetch, with --save-matches
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-
-    matches_out = c.output('matches.sig')
-
-    c.run_sourmash('prefetch', '-k', '31', sig47, sig63,
-                   '--save-matching-hashes', matches_out, linear_gather)
+    utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+
+    matches_out = c.output("matches.sig")
+
+    c.run_sourmash(
+        "prefetch",
+        "-k",
+        "31",
+        sig47,
+        sig63,
+        "--save-matching-hashes",
+        matches_out,
+        linear_gather,
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
@@ -390,7 +504,7 @@ def test_prefetch_matching_hashes(runtmp, linear_gather):
     intersect.add_many(matches)
 
     ss = sourmash.load_one_signature(matches_out)
-    assert ss.name.endswith('-known')
+    assert ss.name.endswith("-known")
     assert ss.minhash == intersect
 
 
@@ -398,14 +512,23 @@ def test_prefetch_nomatch_hashes(runtmp, linear_gather):
     c = runtmp
 
     # test a basic prefetch, with --save-matches
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-
-    nomatch_out = c.output('unmatched_hashes.sig')
-
-    c.run_sourmash('prefetch', '-k', '31', sig47, sig63, sig2,
-                   '--save-unmatched-hashes', nomatch_out, linear_gather)
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+
+    nomatch_out = c.output("unmatched_hashes.sig")
+
+    c.run_sourmash(
+        "prefetch",
+        "-k",
+        "31",
+        sig47,
+        sig63,
+        sig2,
+        "--save-unmatched-hashes",
+        nomatch_out,
+        linear_gather,
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
@@ -420,7 +543,7 @@ def test_prefetch_nomatch_hashes(runtmp, linear_gather):
     remain.remove_many(ss63.minhash.hashes)
 
     ss = sourmash.load_one_signature(nomatch_out)
-    assert ss.name.endswith('-unknown')
+    assert ss.name.endswith("-unknown")
     assert ss.minhash == remain
 
 
@@ -428,12 +551,11 @@ def test_prefetch_no_num_query(runtmp, linear_gather):
     c = runtmp
 
     # can't do prefetch with num signatures for query
-    sig47 = utils.get_test_data('num/47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("num/47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('prefetch', '-k', '31', sig47, sig63, sig47,
-                       linear_gather)
+        c.run_sourmash("prefetch", "-k", "31", sig47, sig63, sig47, linear_gather)
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -446,50 +568,66 @@ def test_prefetch_no_num_subj(runtmp, linear_gather):
     c = runtmp
 
     # can't do prefetch with num signatures for query; no matches!
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('num/63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("num/63.fa.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('prefetch', '-k', '31', sig47, sig63, linear_gather)
+        c.run_sourmash("prefetch", "-k", "31", sig47, sig63, linear_gather)
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status != 0
-    assert "ERROR in prefetch: after picklists and patterns, no signatures to search!?" in c.last_result.err
+    assert (
+        "ERROR in prefetch: after picklists and patterns, no signatures to search!?"
+        in c.last_result.err
+    )
 
 
 def test_prefetch_db_fromfile(runtmp, linear_gather):
     c = runtmp
 
     # test a basic prefetch
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
-    from_file = c.output('from-list.txt')
+    from_file = c.output("from-list.txt")
 
-    with open(from_file, 'wt') as fp:
+    with open(from_file, "w") as fp:
         print(sig63, file=fp)
         print(sig2, file=fp)
         print(sig47, file=fp)
 
-    c.run_sourmash('prefetch', '-k', '31', sig47, linear_gather,
-                   '--db-from-file', from_file)
+    c.run_sourmash(
+        "prefetch", "-k", "31", sig47, linear_gather, "--db-from-file", from_file
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
 
-    assert "WARNING: no output(s) specified! Nothing will be saved from this prefetch!" in c.last_result.err
+    assert (
+        "WARNING: no output(s) specified! Nothing will be saved from this prefetch!"
+        in c.last_result.err
+    )
     assert "selecting specified query k=31" in c.last_result.err
-    assert "loaded query: NC_009665.1 Shewanella baltica... (k=31, DNA)" in c.last_result.err
-    assert "query sketch has scaled=1000; will be dynamically downsampled as needed" in c.last_result.err
+    assert (
+        "loaded query: NC_009665.1 Shewanella baltica... (k=31, DNA)"
+        in c.last_result.err
+    )
+    assert (
+        "query sketch has scaled=1000; will be dynamically downsampled as needed"
+        in c.last_result.err
+    )
 
     assert "total of 2 matching signatures." in c.last_result.err
-    assert "of 5177 distinct query hashes, 5177 were found in matches above threshold." in c.last_result.err
+    assert (
+        "of 5177 distinct query hashes, 5177 were found in matches above threshold."
+        in c.last_result.err
+    )
     assert "a total of 0 query hashes remain unmatched." in c.last_result.err
 
 
@@ -497,10 +635,10 @@ def test_prefetch_no_db(runtmp, linear_gather):
     c = runtmp
 
     # test a basic prefetch with no databases/signatures
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('prefetch', '-k', '31', sig47, linear_gather)
+        c.run_sourmash("prefetch", "-k", "31", sig47, linear_gather)
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
@@ -512,13 +650,23 @@ def test_prefetch_no_db(runtmp, linear_gather):
 def test_prefetch_check_scaled_bounds_negative(runtmp, linear_gather):
     c = runtmp
 
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('prefetch', '-k', '31', sig47, sig63, sig2, sig47,
-                    '--scaled', '-5', linear_gather)
+        c.run_sourmash(
+            "prefetch",
+            "-k",
+            "31",
+            sig47,
+            sig63,
+            sig2,
+            sig47,
+            "--scaled",
+            "-5",
+            linear_gather,
+        )
 
     assert "ERROR: scaled value must be positive" in str(exc.value)
 
@@ -526,41 +674,75 @@ def test_prefetch_check_scaled_bounds_negative(runtmp, linear_gather):
 def test_prefetch_check_scaled_bounds_less_than_minimum(runtmp, linear_gather):
     c = runtmp
 
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('prefetch', '-k', '31', sig47, sig63, sig2, sig47,
-                    '--scaled', '50', linear_gather)
-
-    assert "WARNING: scaled value should be >= 100. Continuing anyway." in str(exc.value)
+        c.run_sourmash(
+            "prefetch",
+            "-k",
+            "31",
+            sig47,
+            sig63,
+            sig2,
+            sig47,
+            "--scaled",
+            "50",
+            linear_gather,
+        )
+
+    assert "WARNING: scaled value should be >= 100. Continuing anyway." in str(
+        exc.value
+    )
 
 
 def test_prefetch_check_scaled_bounds_more_than_maximum(runtmp, linear_gather):
     c = runtmp
 
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('prefetch', '-k', '31', sig47, sig63, sig2, sig47,
-                    '--scaled', '1e9', linear_gather)
-
-    assert "WARNING: scaled value should be <= 1e6. Continuing anyway." in str(exc.value)
+        c.run_sourmash(
+            "prefetch",
+            "-k",
+            "31",
+            sig47,
+            sig63,
+            sig2,
+            sig47,
+            "--scaled",
+            "1e9",
+            linear_gather,
+        )
+
+    assert "WARNING: scaled value should be <= 1e6. Continuing anyway." in str(
+        exc.value
+    )
 
 
 def test_prefetch_downsample_scaled(runtmp, linear_gather):
     c = runtmp
 
     # test --scaled
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-
-    c.run_sourmash('prefetch', '-k', '31', sig47, sig63, sig2, sig47,
-                   '--scaled', '1e5', linear_gather)
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+
+    c.run_sourmash(
+        "prefetch",
+        "-k",
+        "31",
+        sig47,
+        sig63,
+        sig2,
+        sig47,
+        "--scaled",
+        "1e5",
+        linear_gather,
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
@@ -569,21 +751,19 @@ def test_prefetch_downsample_scaled(runtmp, linear_gather):
     assert "downsampling query from scaled=1000 to 10000" in c.last_result.err
 
 
-
-
 def test_prefetch_downsample_multiple(runtmp, linear_gather):
     # test multiple different downsamplings in prefetch code
-    query_sig = utils.get_test_data('GCF_000006945.2-s500.sig')
+    query_sig = utils.get_test_data("GCF_000006945.2-s500.sig")
 
     # load in the hashes and do split them into four bins, randomly.
     ss = sourmash.load_one_signature(query_sig)
     hashes = list(ss.minhash.hashes)
 
-    random.seed(a=1)            # fix seed so test is reproducible
+    random.seed(a=1)  # fix seed so test is reproducible
     random.shuffle(hashes)
 
     # split into 4 bins:
-    mh_bins = [ ss.minhash.copy_and_clear() for i in range(4) ]
+    mh_bins = [ss.minhash.copy_and_clear() for i in range(4)]
     for i, hashval in enumerate(hashes):
         mh_bins[i % 4].add_hash(hashval)
 
@@ -602,25 +782,38 @@ def test_prefetch_downsample_multiple(runtmp, linear_gather):
 
         gathersigs.append(f"bin{i}.sig")
 
-    runtmp.sourmash('prefetch', linear_gather, query_sig, *gathersigs)
+    runtmp.sourmash("prefetch", linear_gather, query_sig, *gathersigs)
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert "final scaled value (max across query and all matches) is 1000" in runtmp.last_result.err
+    assert (
+        "final scaled value (max across query and all matches) is 1000"
+        in runtmp.last_result.err
+    )
 
 
 def test_prefetch_empty(runtmp, linear_gather):
     c = runtmp
 
     # test --scaled
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('prefetch', '-k', '31', sig47, sig63, sig2, sig47,
-                       '--scaled', '1e9', linear_gather)
+        c.run_sourmash(
+            "prefetch",
+            "-k",
+            "31",
+            sig47,
+            sig63,
+            sig2,
+            sig47,
+            "--scaled",
+            "1e9",
+            linear_gather,
+        )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
@@ -633,13 +826,13 @@ def test_prefetch_basic_many_sigs(runtmp, linear_gather):
     c = runtmp
 
     # test what happens with many (and duplicate) signatures
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     manysigs = [sig63, sig2, sig47] * 5
 
-    c.run_sourmash('prefetch', '-k', '31', sig47, *manysigs, linear_gather)
+    c.run_sourmash("prefetch", "-k", "31", sig47, *manysigs, linear_gather)
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
@@ -647,18 +840,22 @@ def test_prefetch_basic_many_sigs(runtmp, linear_gather):
     assert c.last_result.status == 0
     assert "total of 10 matching signatures so far." in c.last_result.err
     assert "total of 10 matching signatures." in c.last_result.err
-    assert "of 5177 distinct query hashes, 5177 were found in matches above threshold." in c.last_result.err
+    assert (
+        "of 5177 distinct query hashes, 5177 were found in matches above threshold."
+        in c.last_result.err
+    )
     assert "a total of 0 query hashes remain unmatched." in c.last_result.err
 
 
 def test_prefetch_with_picklist(runtmp):
     # test 'sourmash prefetch' with picklists
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    metag_sig = utils.get_test_data('gather/combined.sig')
-    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    metag_sig = utils.get_test_data("gather/combined.sig")
+    picklist = utils.get_test_data("gather/thermotoga-picklist.csv")
 
-    runtmp.sourmash('prefetch', metag_sig, *gcf_sigs,
-                    '--picklist', f"{picklist}:md5:md5")
+    runtmp.sourmash(
+        "prefetch", metag_sig, *gcf_sigs, "--picklist", f"{picklist}:md5:md5"
+    )
 
     err = runtmp.last_result.err
     print(err)
@@ -670,18 +867,22 @@ def test_prefetch_with_picklist(runtmp):
     print(out)
 
     assert "total of 3 matching signatures." in err
-    assert "of 1466 distinct query hashes, 453 were found in matches above threshold." in err
+    assert (
+        "of 1466 distinct query hashes, 453 were found in matches above threshold."
+        in err
+    )
     assert "a total of 1013 query hashes remain unmatched." in err
 
 
 def test_prefetch_with_picklist_exclude(runtmp):
     # test 'sourmash prefetch' with picklists, exclude
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    metag_sig = utils.get_test_data('gather/combined.sig')
-    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    metag_sig = utils.get_test_data("gather/combined.sig")
+    picklist = utils.get_test_data("gather/thermotoga-picklist.csv")
 
-    runtmp.sourmash('prefetch', metag_sig, *gcf_sigs,
-                    '--picklist', f"{picklist}:md5:md5:exclude")
+    runtmp.sourmash(
+        "prefetch", metag_sig, *gcf_sigs, "--picklist", f"{picklist}:md5:md5:exclude"
+    )
 
     err = runtmp.last_result.err
     print(err)
@@ -692,17 +893,19 @@ def test_prefetch_with_picklist_exclude(runtmp):
     print(out)
 
     assert "total of 9 matching signatures." in err
-    assert "of 1466 distinct query hashes, 1013 were found in matches above threshold." in err
+    assert (
+        "of 1466 distinct query hashes, 1013 were found in matches above threshold."
+        in err
+    )
     assert "a total of 453 query hashes remain unmatched." in err
 
 
 def test_prefetch_with_pattern_include(runtmp):
     # test 'sourmash prefetch' with --include-db-pattern
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    metag_sig = utils.get_test_data('gather/combined.sig')
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    metag_sig = utils.get_test_data("gather/combined.sig")
 
-    runtmp.sourmash('prefetch', metag_sig, *gcf_sigs,
-                    '--include', 'thermotoga')
+    runtmp.sourmash("prefetch", metag_sig, *gcf_sigs, "--include", "thermotoga")
 
     err = runtmp.last_result.err
     print(err)
@@ -711,17 +914,19 @@ def test_prefetch_with_pattern_include(runtmp):
     print(out)
 
     assert "total of 3 matching signatures." in err
-    assert "of 1466 distinct query hashes, 453 were found in matches above threshold." in err
+    assert (
+        "of 1466 distinct query hashes, 453 were found in matches above threshold."
+        in err
+    )
     assert "a total of 1013 query hashes remain unmatched." in err
 
 
 def test_prefetch_with_pattern_exclude(runtmp):
     # test 'sourmash prefetch' with --exclude-db-pattern
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    metag_sig = utils.get_test_data('gather/combined.sig')
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    metag_sig = utils.get_test_data("gather/combined.sig")
 
-    runtmp.sourmash('prefetch', metag_sig, *gcf_sigs,
-                    '--exclude', 'thermotoga')
+    runtmp.sourmash("prefetch", metag_sig, *gcf_sigs, "--exclude", "thermotoga")
 
     err = runtmp.last_result.err
     print(err)
@@ -730,27 +935,37 @@ def test_prefetch_with_pattern_exclude(runtmp):
     print(out)
 
     assert "total of 9 matching signatures." in err
-    assert "of 1466 distinct query hashes, 1013 were found in matches above threshold." in err
+    assert (
+        "of 1466 distinct query hashes, 1013 were found in matches above threshold."
+        in err
+    )
     assert "a total of 453 query hashes remain unmatched." in err
 
 
 def test_prefetch_output_with_abundance(runtmp, prefetch_gather, linear_gather):
     c = runtmp
-    query = utils.get_test_data('gather-abund/reads-s10x10-s11.sig')
-    against = utils.get_test_data('gather-abund/genome-s10.fa.gz.sig')
-
-    c.run_sourmash('prefetch', linear_gather, query, against,
-                   '--save-matching-hashes', c.output('match-hash.sig'),
-                   '--save-unmatched-hashes', c.output('nomatch-hash.sig'))
+    query = utils.get_test_data("gather-abund/reads-s10x10-s11.sig")
+    against = utils.get_test_data("gather-abund/genome-s10.fa.gz.sig")
+
+    c.run_sourmash(
+        "prefetch",
+        linear_gather,
+        query,
+        against,
+        "--save-matching-hashes",
+        c.output("match-hash.sig"),
+        "--save-unmatched-hashes",
+        c.output("nomatch-hash.sig"),
+    )
 
     print(c.last_result.out)
 
-    assert os.path.exists(c.output('match-hash.sig'))
-    ss = list(sourmash.load_file_as_signatures(c.output('match-hash.sig')))[0]
+    assert os.path.exists(c.output("match-hash.sig"))
+    ss = list(sourmash.load_file_as_signatures(c.output("match-hash.sig")))[0]
     assert ss.minhash.track_abundance
 
-    assert os.path.exists(c.output('nomatch-hash.sig'))
-    ss = list(sourmash.load_file_as_signatures(c.output('nomatch-hash.sig')))[0]
+    assert os.path.exists(c.output("nomatch-hash.sig"))
+    ss = list(sourmash.load_file_as_signatures(c.output("nomatch-hash.sig")))[0]
     assert ss.minhash.track_abundance
 
 
@@ -758,14 +973,15 @@ def test_prefetch_ani_csv_out(runtmp, linear_gather):
     c = runtmp
 
     # test a basic prefetch, with CSV output
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
-    csvout = c.output('out.csv')
+    csvout = c.output("out.csv")
 
-    c.run_sourmash('prefetch', '-k', '31', sig47, sig63, sig2, sig47,
-                   '-o', csvout, linear_gather)
+    c.run_sourmash(
+        "prefetch", "-k", "31", sig47, sig63, sig2, sig47, "-o", csvout, linear_gather
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
@@ -774,37 +990,56 @@ def test_prefetch_ani_csv_out(runtmp, linear_gather):
     assert os.path.exists(csvout)
 
     prefetch_result_names = PrefetchResult.prefetch_write_cols
-    exp1 = {'q_ani': '0.9771552502238963','m_ani': '0.9767860811200507',
-                      'ac_ani': '0.9769706656719734','mc_ani': '0.9771552502238963',
-                      'pfn': 'False'}
-    exp2 = {'q_ani': '1.0','m_ani': '1.0',
-                      'ac_ani': '1.0','mc_ani': '1.0',
-                      'pfn': 'False'}
+    exp1 = {
+        "q_ani": "0.9771552502238963",
+        "m_ani": "0.9767860811200507",
+        "ac_ani": "0.9769706656719734",
+        "mc_ani": "0.9771552502238963",
+        "pfn": "False",
+    }
+    exp2 = {
+        "q_ani": "1.0",
+        "m_ani": "1.0",
+        "ac_ani": "1.0",
+        "mc_ani": "1.0",
+        "pfn": "False",
+    }
     expected_ani_vals = [exp1, exp2]
-    with open(csvout, 'rt', newline="") as fp:
+    with open(csvout, newline="") as fp:
         r = csv.DictReader(fp)
-        for (row, expected) in zip(r, expected_ani_vals):
+        for row, expected in zip(r, expected_ani_vals):
             print(row)
             assert prefetch_result_names == list(row.keys())
-            assert approx_eq(row['query_containment_ani'], expected['q_ani'])
-            assert approx_eq(row['match_containment_ani'], expected['m_ani'])
-            assert approx_eq(row['max_containment_ani'], expected['mc_ani'])
-            assert approx_eq(row['average_containment_ani'], expected['ac_ani'])
-            assert row['potential_false_negative'] == expected['pfn']
+            assert approx_eq(row["query_containment_ani"], expected["q_ani"])
+            assert approx_eq(row["match_containment_ani"], expected["m_ani"])
+            assert approx_eq(row["max_containment_ani"], expected["mc_ani"])
+            assert approx_eq(row["average_containment_ani"], expected["ac_ani"])
+            assert row["potential_false_negative"] == expected["pfn"]
 
 
 def test_prefetch_ani_csv_out_estimate_ci(runtmp, linear_gather):
     c = runtmp
 
     # test a basic prefetch, with CSV output
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
-
-    csvout = c.output('out.csv')
-
-    c.run_sourmash('prefetch', '-k', '31', sig47, sig63, sig2, sig47,
-                   '-o', csvout, linear_gather, '--estimate-ani-ci')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
+
+    csvout = c.output("out.csv")
+
+    c.run_sourmash(
+        "prefetch",
+        "-k",
+        "31",
+        sig47,
+        sig63,
+        sig2,
+        sig47,
+        "-o",
+        csvout,
+        linear_gather,
+        "--estimate-ani-ci",
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
@@ -813,54 +1048,64 @@ def test_prefetch_ani_csv_out_estimate_ci(runtmp, linear_gather):
     assert os.path.exists(csvout)
     prefetch_result_names_ci = PrefetchResult.prefetch_write_cols_ci
 
-    exp1 = {'q_ani': '0.9771552502238963','m_ani': '0.9767860811200507',
-            'q_ani_low': "0.9762537506990911", 'q_ani_high': "0.9780336875157754",
-            'm_ani_low': "0.9758801604653301", "m_ani_high": "0.9776692390768575",
-            'ac_ani': '0.9769706656719734','mc_ani': '0.9771552502238963',
-            'pfn': 'False'}
-    exp2 = {'q_ani': '1.0','m_ani': '1.0',
-            'q_ani_low': "1.0", 'q_ani_high': "1.0",
-            'm_ani_low': "1.0", "m_ani_high": "1.0",
-                      'ac_ani': '1.0','mc_ani': '1.0',
-                      'pfn': 'False'}
+    exp1 = {
+        "q_ani": "0.9771552502238963",
+        "m_ani": "0.9767860811200507",
+        "q_ani_low": "0.9762537506990911",
+        "q_ani_high": "0.9780336875157754",
+        "m_ani_low": "0.9758801604653301",
+        "m_ani_high": "0.9776692390768575",
+        "ac_ani": "0.9769706656719734",
+        "mc_ani": "0.9771552502238963",
+        "pfn": "False",
+    }
+    exp2 = {
+        "q_ani": "1.0",
+        "m_ani": "1.0",
+        "q_ani_low": "1.0",
+        "q_ani_high": "1.0",
+        "m_ani_low": "1.0",
+        "m_ani_high": "1.0",
+        "ac_ani": "1.0",
+        "mc_ani": "1.0",
+        "pfn": "False",
+    }
 
     expected_ani_vals = [exp1, exp2]
-    with open(csvout, 'rt', newline="") as fp:
+    with open(csvout, newline="") as fp:
         r = csv.DictReader(fp)
-        for (row, expected) in zip(r, expected_ani_vals):
+        for row, expected in zip(r, expected_ani_vals):
             print(row)
             assert prefetch_result_names_ci == list(row.keys())
-            assert approx_eq(row['query_containment_ani'],expected['q_ani'])
-            assert approx_eq(row['query_containment_ani_low'], expected['q_ani_low'])
-            assert approx_eq(row['query_containment_ani_high'], expected['q_ani_high'])
-            assert approx_eq(row['match_containment_ani'], expected['m_ani'])
-            assert approx_eq(row['match_containment_ani_low'], expected['m_ani_low'])
-            assert approx_eq(row['match_containment_ani_high'], expected['m_ani_high'])
-            assert approx_eq(row['max_containment_ani'], expected['mc_ani'])
-            assert approx_eq(row['average_containment_ani'], expected['ac_ani'])
-            assert row['potential_false_negative'] == expected['pfn']
+            assert approx_eq(row["query_containment_ani"], expected["q_ani"])
+            assert approx_eq(row["query_containment_ani_low"], expected["q_ani_low"])
+            assert approx_eq(row["query_containment_ani_high"], expected["q_ani_high"])
+            assert approx_eq(row["match_containment_ani"], expected["m_ani"])
+            assert approx_eq(row["match_containment_ani_low"], expected["m_ani_low"])
+            assert approx_eq(row["match_containment_ani_high"], expected["m_ani_high"])
+            assert approx_eq(row["max_containment_ani"], expected["mc_ani"])
+            assert approx_eq(row["average_containment_ani"], expected["ac_ani"])
+            assert row["potential_false_negative"] == expected["pfn"]
 
 
 def test_prefetch_ani_containment_asymmetry(runtmp):
     # test contained_by asymmetries, viz #2215
-    query_sig = utils.get_test_data('47.fa.sig')
-    merged_sig = utils.get_test_data('47-63-merge.sig')
+    query_sig = utils.get_test_data("47.fa.sig")
+    merged_sig = utils.get_test_data("47-63-merge.sig")
 
-    runtmp.sourmash('prefetch', query_sig, merged_sig, '-o',
-                    'query-in-merged.csv')
-    runtmp.sourmash('prefetch', merged_sig, query_sig, '-o',
-                    'merged-in-query.csv')
+    runtmp.sourmash("prefetch", query_sig, merged_sig, "-o", "query-in-merged.csv")
+    runtmp.sourmash("prefetch", merged_sig, query_sig, "-o", "merged-in-query.csv")
 
-    with sourmash_args.FileInputCSV(runtmp.output('query-in-merged.csv')) as r:
+    with sourmash_args.FileInputCSV(runtmp.output("query-in-merged.csv")) as r:
         query_in_merged = list(r)[0]
 
-    with sourmash_args.FileInputCSV(runtmp.output('merged-in-query.csv')) as r:
+    with sourmash_args.FileInputCSV(runtmp.output("merged-in-query.csv")) as r:
         merged_in_query = list(r)[0]
 
-    assert query_in_merged['query_containment_ani'] == '1.0'
-    assert query_in_merged['match_containment_ani'] == '0.9865155060423993'
-    assert query_in_merged['average_containment_ani'] == '0.9932577530211997'
+    assert query_in_merged["query_containment_ani"] == "1.0"
+    assert query_in_merged["match_containment_ani"] == "0.9865155060423993"
+    assert query_in_merged["average_containment_ani"] == "0.9932577530211997"
 
-    assert merged_in_query['match_containment_ani'] == '1.0'
-    assert merged_in_query['query_containment_ani'] == '0.9865155060423993'
-    assert merged_in_query['average_containment_ani'] == '0.9932577530211997'
+    assert merged_in_query["match_containment_ani"] == "1.0"
+    assert merged_in_query["query_containment_ani"] == "0.9865155060423993"
+    assert merged_in_query["average_containment_ani"] == "0.9932577530211997"
diff --git a/tests/test_sbt.py b/tests/test_sbt.py
index a66d0c634e..cfc71d43dd 100644
--- a/tests/test_sbt.py
+++ b/tests/test_sbt.py
@@ -6,13 +6,11 @@
 import pytest
 
 import sourmash
-from sourmash import (load_one_signature, SourmashSignature,
-                      load_file_as_signatures)
+from sourmash import load_one_signature, SourmashSignature, load_file_as_signatures
 from sourmash.exceptions import IndexNotSupported
 from sourmash.sbt import SBT, GraphFactory, Leaf, Node
-from sourmash.sbtmh import (SigLeaf, load_sbt_index)
-from sourmash.sbt_storage import (FSStorage, RedisStorage,
-                                  IPFSStorage, ZipStorage)
+from sourmash.sbtmh import SigLeaf, load_sbt_index
+from sourmash.sbt_storage import FSStorage, RedisStorage, IPFSStorage, ZipStorage
 from sourmash.search import make_jaccard_search_query
 from sourmash.picklist import SignaturePicklist, PickStyle
 
@@ -24,29 +22,29 @@ def test_simple(runtmp, n_children):
     root = SBT(factory, d=n_children)
 
     leaf1 = Leaf("a", factory())
-    leaf1.data.count('AAAAA')
-    leaf1.data.count('AAAAT')
-    leaf1.data.count('AAAAC')
+    leaf1.data.count("AAAAA")
+    leaf1.data.count("AAAAT")
+    leaf1.data.count("AAAAC")
 
     leaf2 = Leaf("b", factory())
-    leaf2.data.count('AAAAA')
-    leaf2.data.count('AAAAT')
-    leaf2.data.count('AAAAG')
+    leaf2.data.count("AAAAA")
+    leaf2.data.count("AAAAT")
+    leaf2.data.count("AAAAG")
 
     leaf3 = Leaf("c", factory())
-    leaf3.data.count('AAAAA')
-    leaf3.data.count('AAAAT')
-    leaf3.data.count('CAAAA')
+    leaf3.data.count("AAAAA")
+    leaf3.data.count("AAAAT")
+    leaf3.data.count("CAAAA")
 
     leaf4 = Leaf("d", factory())
-    leaf4.data.count('AAAAA')
-    leaf4.data.count('CAAAA')
-    leaf4.data.count('GAAAA')
+    leaf4.data.count("AAAAA")
+    leaf4.data.count("CAAAA")
+    leaf4.data.count("GAAAA")
 
     leaf5 = Leaf("e", factory())
-    leaf5.data.count('AAAAA')
-    leaf5.data.count('AAAAT')
-    leaf5.data.count('GAAAA')
+    leaf5.data.count("AAAAA")
+    leaf5.data.count("AAAAT")
+    leaf5.data.count("GAAAA")
 
     root.add_node(leaf1)
     root.add_node(leaf2)
@@ -58,8 +56,8 @@ def test_simple(runtmp, n_children):
     def search_kmer(leaf, kmer):
         return leaf.data.get(kmer)
 
-    leaves = [leaf1, leaf2, leaf3, leaf4, leaf5 ]
-    kmers = [ "AAAAA", "AAAAT", "AAAAG", "CAAAA", "GAAAA" ]
+    leaves = [leaf1, leaf2, leaf3, leaf4, leaf5]
+    kmers = ["AAAAA", "AAAAT", "AAAAG", "CAAAA", "GAAAA"]
 
     # define an exhaustive search function that looks in all the leaf nodes.
     def search_kmer_in_list(kmer):
@@ -75,20 +73,20 @@ def search_kmer_in_list(kmer):
     for kmer in kmers:
         assert set(root._find_nodes(search_kmer, kmer)) == search_kmer_in_list(kmer)
 
-    print('-----')
-    print([ x.metadata for x in root._find_nodes(search_kmer, "AAAAA") ])
-    print([ x.metadata for x in root._find_nodes(search_kmer, "AAAAT") ])
-    print([ x.metadata for x in root._find_nodes(search_kmer, "AAAAG") ])
-    print([ x.metadata for x in root._find_nodes(search_kmer, "CAAAA") ])
-    print([ x.metadata for x in root._find_nodes(search_kmer, "GAAAA") ])
+    print("-----")
+    print([x.metadata for x in root._find_nodes(search_kmer, "AAAAA")])
+    print([x.metadata for x in root._find_nodes(search_kmer, "AAAAT")])
+    print([x.metadata for x in root._find_nodes(search_kmer, "AAAAG")])
+    print([x.metadata for x in root._find_nodes(search_kmer, "CAAAA")])
+    print([x.metadata for x in root._find_nodes(search_kmer, "GAAAA")])
 
     # save SBT to a directory and then reload
-    root.save(runtmp.output('demo'))
-    root = SBT.load(runtmp.output('demo'))
+    root.save(runtmp.output("demo"))
+    root = SBT.load(runtmp.output("demo"))
 
     for kmer in kmers:
         new_result = {str(r) for r in root._find_nodes(search_kmer, kmer)}
-        print(*new_result, sep='\n')
+        print(*new_result, sep="\n")
 
         assert new_result == {str(r) for r in search_kmer_in_list(kmer)}
 
@@ -99,29 +97,29 @@ def test_longer_search(n_children):
     root = SBT(factory, d=n_children)
 
     leaf1 = Leaf("a", factory())
-    leaf1.data.count('AAAAA')
-    leaf1.data.count('AAAAT')
-    leaf1.data.count('AAAAC')
+    leaf1.data.count("AAAAA")
+    leaf1.data.count("AAAAT")
+    leaf1.data.count("AAAAC")
 
     leaf2 = Leaf("b", factory())
-    leaf2.data.count('AAAAA')
-    leaf2.data.count('AAAAT')
-    leaf2.data.count('AAAAG')
+    leaf2.data.count("AAAAA")
+    leaf2.data.count("AAAAT")
+    leaf2.data.count("AAAAG")
 
     leaf3 = Leaf("c", factory())
-    leaf3.data.count('AAAAA')
-    leaf3.data.count('AAAAT')
-    leaf3.data.count('CAAAA')
+    leaf3.data.count("AAAAA")
+    leaf3.data.count("AAAAT")
+    leaf3.data.count("CAAAA")
 
     leaf4 = Leaf("d", factory())
-    leaf4.data.count('AAAAA')
-    leaf4.data.count('CAAAA')
-    leaf4.data.count('GAAAA')
+    leaf4.data.count("AAAAA")
+    leaf4.data.count("CAAAA")
+    leaf4.data.count("GAAAA")
 
     leaf5 = Leaf("e", factory())
-    leaf5.data.count('AAAAA')
-    leaf5.data.count('AAAAT')
-    leaf5.data.count('GAAAA')
+    leaf5.data.count("AAAAA")
+    leaf5.data.count("AAAAT")
+    leaf5.data.count("GAAAA")
 
     root.add_node(leaf1)
     root.add_node(leaf2)
@@ -131,32 +129,32 @@ def test_longer_search(n_children):
 
     def kmers(k, seq):
         for start in range(len(seq) - k + 1):
-            yield seq[start:start + k]
+            yield seq[start : start + k]
 
     def search_transcript(node, seq, threshold):
-        presence = [ node.data.get(kmer) for kmer in kmers(ksize, seq) ]
+        presence = [node.data.get(kmer) for kmer in kmers(ksize, seq)]
         if sum(presence) >= int(threshold * (len(seq) - ksize + 1)):
             return 1
         return 0
 
-    try1 = [ x.metadata for x in root._find_nodes(search_transcript, "AAAAT", 1.0) ]
-    assert set(try1) == set([ 'a', 'b', 'c', 'e' ]), try1 # no 'd'
+    try1 = [x.metadata for x in root._find_nodes(search_transcript, "AAAAT", 1.0)]
+    assert set(try1) == set(["a", "b", "c", "e"]), try1  # no 'd'
 
-    try2 = [ x.metadata for x in root._find_nodes(search_transcript, "GAAAAAT", 0.6) ]
-    assert set(try2) == set([ 'a', 'b', 'c', 'd', 'e' ])
+    try2 = [x.metadata for x in root._find_nodes(search_transcript, "GAAAAAT", 0.6)]
+    assert set(try2) == set(["a", "b", "c", "d", "e"])
 
-    try3 = [ x.metadata for x in root._find_nodes(search_transcript, "GAAAA", 1.0) ]
-    assert set(try3) == set([ 'd', 'e' ]), try3
+    try3 = [x.metadata for x in root._find_nodes(search_transcript, "GAAAA", 1.0)]
+    assert set(try3) == set(["d", "e"]), try3
 
 
-#@pytest.mark.parametrize("old_version", ["v1", "v2", "v3", "v4", "v5"])
+# @pytest.mark.parametrize("old_version", ["v1", "v2", "v3", "v4", "v5"])
 @pytest.mark.parametrize("old_version", ["v3", "v4", "v5"])
 def test_tree_old_load(old_version):
-    tree_old = SBT.load(utils.get_test_data('{}.sbt.json'.format(old_version)),
-                       leaf_loader=SigLeaf.load)
+    tree_old = SBT.load(
+        utils.get_test_data(f"{old_version}.sbt.json"), leaf_loader=SigLeaf.load
+    )
 
-    tree_cur = SBT.load(utils.get_test_data('v6.sbt.json'),
-                        leaf_loader=SigLeaf.load)
+    tree_cur = SBT.load(utils.get_test_data("v6.sbt.json"), leaf_loader=SigLeaf.load)
 
     testdata1 = utils.get_test_data(utils.SIG_FILES[0])
     to_search = load_one_signature(testdata1)
@@ -177,8 +175,8 @@ def test_tree_old_load(old_version):
 
 
 def test_load_future(tmpdir):
-    with open(str(tmpdir.join("v9999.sbt.json")), 'w') as f:
-        json.dump({'version': 9999}, f)
+    with open(str(tmpdir.join("v9999.sbt.json")), "w") as f:
+        json.dump({"version": 9999}, f)
 
     with pytest.raises(IndexNotSupported) as excinfo:
         SBT.load(str(tmpdir.join("v9999.sbt.json")))
@@ -196,21 +194,20 @@ def test_tree_save_load(runtmp, n_children):
         tree.add_node(leaf)
         to_search = leaf
 
-    print('*' * 60)
-    print("{}:".format(to_search.metadata))
+    print("*" * 60)
+    print(f"{to_search.metadata}:")
     search_obj = make_jaccard_search_query(threshold=0.1)
     old_result = {str(s.signature) for s in tree.find(search_obj, to_search.data)}
-    print(*old_result, sep='\n')
+    print(*old_result, sep="\n")
 
-    tree.save(runtmp.output('demo'))
-    tree = SBT.load(runtmp.output('demo'),
-                    leaf_loader=SigLeaf.load)
+    tree.save(runtmp.output("demo"))
+    tree = SBT.load(runtmp.output("demo"), leaf_loader=SigLeaf.load)
 
-    print('*' * 60)
-    print("{}:".format(to_search.metadata))
+    print("*" * 60)
+    print(f"{to_search.metadata}:")
     search_obj = make_jaccard_search_query(threshold=0.1)
     new_result = {str(s.signature) for s in tree.find(search_obj, to_search.data)}
-    print(*new_result, sep='\n')
+    print(*new_result, sep="\n")
 
     assert old_result == new_result
 
@@ -219,7 +216,6 @@ def test_search_minhashes():
     factory = GraphFactory(31, 1e5, 4)
     tree = SBT(factory)
 
-    n_leaves = 0
     for f in utils.SIG_FILES:
         sig = load_one_signature(utils.get_test_data(f))
         leaf = SigLeaf(os.path.basename(f), sig)
@@ -257,12 +253,12 @@ def test_binary_nary_tree():
     assert all([len(list(t.leaves())) == n_leaves for t in trees.values()])
 
     results = {}
-    print('*' * 60)
-    print("{}:".format(to_search.metadata))
+    print("*" * 60)
+    print(f"{to_search.metadata}:")
     for d, tree in trees.items():
         search_obj = make_jaccard_search_query(threshold=0.1)
         results[d] = {str(s.signature) for s in tree.find(search_obj, to_search.data)}
-    print(*results[2], sep='\n')
+    print(*results[2], sep="\n")
 
     assert results[2] == results[5]
     assert results[5] == results[10]
@@ -327,26 +323,26 @@ def test_sbt_fsstorage(runtmp):
         tree.add_node(leaf)
         to_search = leaf
 
-    print('*' * 60)
-    print("{}:".format(to_search.metadata))
+    print("*" * 60)
+    print(f"{to_search.metadata}:")
     search_obj = make_jaccard_search_query(threshold=0.1)
     old_result = {str(s.signature) for s in tree.find(search_obj, to_search.data)}
-    print(*old_result, sep='\n')
+    print(*old_result, sep="\n")
 
-    with FSStorage(runtmp.location, '.fstree') as storage:
-        tree.save(runtmp.output('tree.sbt.json'), storage=storage)
+    with FSStorage(runtmp.location, ".fstree") as storage:
+        tree.save(runtmp.output("tree.sbt.json"), storage=storage)
 
-    tree = SBT.load(runtmp.output('tree.sbt.json'), leaf_loader=SigLeaf.load)
-    print('*' * 60)
-    print("{}:".format(to_search.metadata))
+    tree = SBT.load(runtmp.output("tree.sbt.json"), leaf_loader=SigLeaf.load)
+    print("*" * 60)
+    print(f"{to_search.metadata}:")
     search_obj = make_jaccard_search_query(threshold=0.1)
     new_result = {str(s.signature) for s in tree.find(search_obj, to_search.data)}
-    print(*new_result, sep='\n')
+    print(*new_result, sep="\n")
 
     assert old_result == new_result
 
     assert os.path.exists(runtmp.output(tree.storage.subdir))
-    assert os.path.exists(runtmp.output('.fstree'))
+    assert os.path.exists(runtmp.output(".fstree"))
 
 
 def test_sbt_zipstorage(tmpdir):
@@ -361,31 +357,31 @@ def test_sbt_zipstorage(tmpdir):
         tree.add_node(leaf)
         to_search = leaf
 
-    print('*' * 60)
-    print("{}:".format(to_search.metadata))
+    print("*" * 60)
+    print(f"{to_search.metadata}:")
     search_obj = make_jaccard_search_query(threshold=0.1)
     old_result = {str(s.signature) for s in tree.find(search_obj, to_search.data)}
-    print(*old_result, sep='\n')
+    print(*old_result, sep="\n")
 
     with ZipStorage(str(tmpdir.join("tree.sbt.zip")), mode="w") as storage:
         tree.save(str(tmpdir.join("tree.sbt.json")), storage=storage)
 
     with ZipStorage(str(tmpdir.join("tree.sbt.zip"))) as storage:
-        tree = SBT.load(str(tmpdir.join("tree.sbt.json")),
-                        leaf_loader=SigLeaf.load,
-                        storage=storage)
+        tree = SBT.load(
+            str(tmpdir.join("tree.sbt.json")), leaf_loader=SigLeaf.load, storage=storage
+        )
 
-        print('*' * 60)
-        print("{}:".format(to_search.metadata))
+        print("*" * 60)
+        print(f"{to_search.metadata}:")
         search_obj = make_jaccard_search_query(threshold=0.1)
         new_result = {str(s.signature) for s in tree.find(search_obj, to_search.data)}
-        print(*new_result, sep='\n')
+        print(*new_result, sep="\n")
 
         assert old_result == new_result
 
 
 def test_sbt_ipfsstorage(runtmp):
-    ipfshttpclient = pytest.importorskip('ipfshttpclient')
+    ipfshttpclient = pytest.importorskip("ipfshttpclient")
 
     factory = GraphFactory(31, 1e5, 4)
     tree = SBT(factory)
@@ -397,34 +393,34 @@ def test_sbt_ipfsstorage(runtmp):
         tree.add_node(leaf)
         to_search = leaf
 
-    print('*' * 60)
-    print("{}:".format(to_search.metadata))
+    print("*" * 60)
+    print(f"{to_search.metadata}:")
     search_obj = make_jaccard_search_query(threshold=0.1)
     old_result = {str(s.signature) for s in tree.find(search_obj, to_search.data)}
-    print(*old_result, sep='\n')
+    print(*old_result, sep="\n")
 
     try:
         with IPFSStorage() as storage:
-            tree.save(runtmp.output('tree.sbt.json'), storage=storage)
+            tree.save(runtmp.output("tree.sbt.json"), storage=storage)
     except ipfshttpclient.exceptions.ConnectionError:
         pytest.xfail("ipfs not installed/functioning probably")
 
     with IPFSStorage() as storage:
-        tree = SBT.load(runtmp.output('tree.sbt.json'),
-                        leaf_loader=SigLeaf.load,
-                        storage=storage)
+        tree = SBT.load(
+            runtmp.output("tree.sbt.json"), leaf_loader=SigLeaf.load, storage=storage
+        )
 
-        print('*' * 60)
-        print("{}:".format(to_search.metadata))
+        print("*" * 60)
+        print(f"{to_search.metadata}:")
         search_obj = make_jaccard_search_query(threshold=0.1)
         new_result = {str(s.signature) for s in tree.find(search_obj, to_search.data)}
-        print(*new_result, sep='\n')
+        print(*new_result, sep="\n")
 
         assert old_result == new_result
 
 
 def test_sbt_redisstorage(runtmp):
-    redis = pytest.importorskip('redis')
+    redis = pytest.importorskip("redis")
     factory = GraphFactory(31, 1e5, 4)
     tree = SBT(factory)
 
@@ -435,28 +431,28 @@ def test_sbt_redisstorage(runtmp):
         tree.add_node(leaf)
         to_search = leaf
 
-    print('*' * 60)
-    print("{}:".format(to_search.metadata))
+    print("*" * 60)
+    print(f"{to_search.metadata}:")
     search_obj = make_jaccard_search_query(threshold=0.1)
     old_result = {str(s.signature) for s in tree.find(search_obj, to_search.data)}
-    print(*old_result, sep='\n')
+    print(*old_result, sep="\n")
 
     try:
         with RedisStorage() as storage:
-            tree.save(runtmp.output('tree.sbt.json'), storage=storage)
+            tree.save(runtmp.output("tree.sbt.json"), storage=storage)
     except redis.exceptions.ConnectionError:
         pytest.xfail("Couldn't connect to redis server")
 
     with RedisStorage() as storage:
-        tree = SBT.load(runtmp.output('tree.sbt.json'),
-                        leaf_loader=SigLeaf.load,
-                        storage=storage)
+        tree = SBT.load(
+            runtmp.output("tree.sbt.json"), leaf_loader=SigLeaf.load, storage=storage
+        )
 
-        print('*' * 60)
-        print("{}:".format(to_search.metadata))
+        print("*" * 60)
+        print(f"{to_search.metadata}:")
         search_obj = make_jaccard_search_query(threshold=0.1)
         new_result = {str(s.signature) for s in tree.find(search_obj, to_search.data)}
-        print(*new_result, sep='\n')
+        print(*new_result, sep="\n")
 
         assert old_result == new_result
 
@@ -475,12 +471,12 @@ def test_save_zip(tmpdir):
 
     new_tree = SBT.load(str(newsbt), leaf_loader=SigLeaf.load)
     assert isinstance(new_tree.storage, ZipStorage)
-    assert new_tree.storage.list_sbts() == ['new.sbt.json']
+    assert new_tree.storage.list_sbts() == ["new.sbt.json"]
 
     to_search = load_one_signature(utils.get_test_data(utils.SIG_FILES[0]))
 
     print("*" * 60)
-    print("{}:".format(to_search))
+    print(f"{to_search}:")
     search_obj = make_jaccard_search_query(threshold=0.1)
     old_result = {str(s.signature) for s in tree.find(search_obj, to_search)}
     new_result = {str(s.signature) for s in new_tree.find(search_obj, to_search)}
@@ -502,7 +498,7 @@ def test_load_zip(tmpdir):
     to_search = load_one_signature(utils.get_test_data(utils.SIG_FILES[0]))
 
     print("*" * 60)
-    print("{}:".format(to_search))
+    print(f"{to_search}:")
     search_obj = make_jaccard_search_query(threshold=0.1)
     new_result = {str(s.signature) for s in tree.find(search_obj, to_search)}
     print(*new_result, sep="\n")
@@ -516,7 +512,7 @@ def test_load_zip_uncompressed(tmpdir):
     testdata = utils.get_test_data("v6.sbt.zip")
     testsbt = tmpdir.join("v6.sbt.json")
 
-    with zipfile.ZipFile(testdata, 'r') as z:
+    with zipfile.ZipFile(testdata, "r") as z:
         z.extractall(str(tmpdir))
 
     tree = SBT.load(str(testsbt), leaf_loader=SigLeaf.load)
@@ -524,7 +520,7 @@ def test_load_zip_uncompressed(tmpdir):
     to_search = load_one_signature(utils.get_test_data(utils.SIG_FILES[0]))
 
     print("*" * 60)
-    print("{}:".format(to_search))
+    print(f"{to_search}:")
     search_obj = make_jaccard_search_query(threshold=0.1)
     new_result = {str(s.signature) for s in tree.find(search_obj, to_search)}
     print(*new_result, sep="\n")
@@ -532,11 +528,11 @@ def test_load_zip_uncompressed(tmpdir):
 
 
 def test_tree_repair():
-    tree_repair = SBT.load(utils.get_test_data('leaves.sbt.json'),
-                           leaf_loader=SigLeaf.load)
+    tree_repair = SBT.load(
+        utils.get_test_data("leaves.sbt.json"), leaf_loader=SigLeaf.load
+    )
 
-    tree_cur = SBT.load(utils.get_test_data('v3.sbt.json'),
-                        leaf_loader=SigLeaf.load)
+    tree_cur = SBT.load(utils.get_test_data("v3.sbt.json"), leaf_loader=SigLeaf.load)
 
     testdata1 = utils.get_test_data(utils.SIG_FILES[0])
     to_search = load_one_signature(testdata1)
@@ -550,8 +546,9 @@ def test_tree_repair():
 
 
 def test_tree_repair_insert():
-    tree_repair = SBT.load(utils.get_test_data('leaves.sbt.json'),
-                           leaf_loader=SigLeaf.load)
+    tree_repair = SBT.load(
+        utils.get_test_data("leaves.sbt.json"), leaf_loader=SigLeaf.load
+    )
 
     for f in utils.SIG_FILES:
         sig = load_one_signature(utils.get_test_data(f))
@@ -579,23 +576,23 @@ def test_save_sparseness(runtmp, n_children):
         tree.add_node(leaf)
         to_search = leaf
 
-    print('*' * 60)
-    print("{}:".format(to_search.metadata))
+    print("*" * 60)
+    print(f"{to_search.metadata}:")
 
     search_obj = make_jaccard_search_query(threshold=0.1)
     old_result = {str(s.signature) for s in tree.find(search_obj, to_search.data)}
-    print(*old_result, sep='\n')
+    print(*old_result, sep="\n")
 
-    tree.save(runtmp.output('demo'), sparseness=1.0)
-    tree_loaded = SBT.load(runtmp.output('demo'),
-                            leaf_loader=SigLeaf.load)
+    tree.save(runtmp.output("demo"), sparseness=1.0)
+    tree_loaded = SBT.load(runtmp.output("demo"), leaf_loader=SigLeaf.load)
     assert all(not isinstance(n, Node) for _, n in tree_loaded)
 
-    print('*' * 60)
-    print("{}:".format(to_search.metadata))
-    new_result = {str(s.signature) for s in tree_loaded.find(search_obj,
-                                                    to_search.data)}
-    print(*new_result, sep='\n')
+    print("*" * 60)
+    print(f"{to_search.metadata}:")
+    new_result = {
+        str(s.signature) for s in tree_loaded.find(search_obj, to_search.data)
+    }
+    print(*new_result, sep="\n")
 
     assert old_result == new_result
 
@@ -615,8 +612,8 @@ def test_sbt_as_index_select():
     factory = GraphFactory(31, 1e5, 4)
     tree = SBT(factory, d=2)
 
-    sig47 = load_one_signature(utils.get_test_data('47.fa.sig'))
-    sig63 = load_one_signature(utils.get_test_data('63.fa.sig'))
+    sig47 = load_one_signature(utils.get_test_data("47.fa.sig"))
+    sig63 = load_one_signature(utils.get_test_data("63.fa.sig"))
 
     tree.insert(sig47)
     tree.insert(sig63)
@@ -624,7 +621,7 @@ def test_sbt_as_index_select():
     xx = tree.select(ksize=31)
     assert xx == tree
 
-    xx = tree.select(moltype='DNA')
+    xx = tree.select(moltype="DNA")
     assert xx == tree
 
     xx = tree.select(abund=False)
@@ -634,7 +631,7 @@ def test_sbt_as_index_select():
         tree.select(ksize=21)
 
     with pytest.raises(ValueError):
-        tree.select(moltype='protein')
+        tree.select(moltype="protein")
 
     with pytest.raises(ValueError):
         tree.select(abund=True)
@@ -646,15 +643,15 @@ def test_sbt_as_index_select_picklist():
     factory = GraphFactory(31, 1e5, 4)
     tree = SBT(factory, d=2)
 
-    sig47 = load_one_signature(utils.get_test_data('47.fa.sig'))
-    sig63 = load_one_signature(utils.get_test_data('63.fa.sig'))
+    sig47 = load_one_signature(utils.get_test_data("47.fa.sig"))
+    sig63 = load_one_signature(utils.get_test_data("63.fa.sig"))
 
     tree.insert(sig47)
     tree.insert(sig63)
 
     # construct a picklist...
-    picklist = SignaturePicklist('md5prefix8')
-    picklist.init(['09a08691'])
+    picklist = SignaturePicklist("md5prefix8")
+    picklist.init(["09a08691"])
 
     # select on picklist
     tree = tree.select(picklist=picklist)
@@ -663,7 +660,7 @@ def test_sbt_as_index_select_picklist():
 
     ss = siglist[0]
     assert ss.minhash.ksize == 31
-    assert ss.md5sum().startswith('09a08691c')
+    assert ss.md5sum().startswith("09a08691c")
 
 
 def test_sbt_as_index_select_picklist_exclude():
@@ -672,15 +669,15 @@ def test_sbt_as_index_select_picklist_exclude():
     factory = GraphFactory(31, 1e5, 4)
     tree = SBT(factory, d=2)
 
-    sig47 = load_one_signature(utils.get_test_data('47.fa.sig'))
-    sig63 = load_one_signature(utils.get_test_data('63.fa.sig'))
+    sig47 = load_one_signature(utils.get_test_data("47.fa.sig"))
+    sig63 = load_one_signature(utils.get_test_data("63.fa.sig"))
 
     tree.insert(sig47)
     tree.insert(sig63)
 
     # construct a picklist...
-    picklist = SignaturePicklist('md5prefix8', pickstyle=PickStyle.EXCLUDE)
-    picklist.init(['09a08691'])
+    picklist = SignaturePicklist("md5prefix8", pickstyle=PickStyle.EXCLUDE)
+    picklist.init(["09a08691"])
 
     # select on picklist
     tree = tree.select(picklist=picklist)
@@ -689,7 +686,7 @@ def test_sbt_as_index_select_picklist_exclude():
 
     ss = siglist[0]
     assert ss.minhash.ksize == 31
-    assert ss.md5sum().startswith('38729c637')
+    assert ss.md5sum().startswith("38729c637")
 
 
 def test_sbt_as_index_find_picklist():
@@ -698,15 +695,15 @@ def test_sbt_as_index_find_picklist():
     factory = GraphFactory(31, 1e5, 4)
     tree = SBT(factory, d=2)
 
-    sig47 = load_one_signature(utils.get_test_data('47.fa.sig'))
-    sig63 = load_one_signature(utils.get_test_data('63.fa.sig'))
+    sig47 = load_one_signature(utils.get_test_data("47.fa.sig"))
+    sig63 = load_one_signature(utils.get_test_data("63.fa.sig"))
 
     tree.insert(sig47)
     tree.insert(sig63)
 
     # construct a picklist...
-    picklist = SignaturePicklist('md5prefix8')
-    picklist.init(['09a08691'])
+    picklist = SignaturePicklist("md5prefix8")
+    picklist.init(["09a08691"])
 
     # run a 'find' with sig63, should find 47 and 63 both.
     search_obj = make_jaccard_search_query(do_containment=True, threshold=0.0)
@@ -723,7 +720,7 @@ def test_sbt_as_index_find_picklist():
     # and check that it is the expected one!
     ss = results[0].signature
     assert ss.minhash.ksize == 31
-    assert ss.md5sum().startswith('09a08691c')
+    assert ss.md5sum().startswith("09a08691c")
 
 
 def test_sbt_as_index_find_picklist_exclude():
@@ -732,15 +729,15 @@ def test_sbt_as_index_find_picklist_exclude():
     factory = GraphFactory(31, 1e5, 4)
     tree = SBT(factory, d=2)
 
-    sig47 = load_one_signature(utils.get_test_data('47.fa.sig'))
-    sig63 = load_one_signature(utils.get_test_data('63.fa.sig'))
+    sig47 = load_one_signature(utils.get_test_data("47.fa.sig"))
+    sig63 = load_one_signature(utils.get_test_data("63.fa.sig"))
 
     tree.insert(sig47)
     tree.insert(sig63)
 
     # construct a picklist...
-    picklist = SignaturePicklist('md5prefix8', pickstyle=PickStyle.EXCLUDE)
-    picklist.init(['09a08691'])
+    picklist = SignaturePicklist("md5prefix8", pickstyle=PickStyle.EXCLUDE)
+    picklist.init(["09a08691"])
 
     # run a 'find' with sig63, should find 47 and 63 both.
     search_obj = make_jaccard_search_query(do_containment=True, threshold=0.0)
@@ -757,7 +754,7 @@ def test_sbt_as_index_find_picklist_exclude():
     # and check that it is the expected one!
     ss = results[0].signature
     assert ss.minhash.ksize == 31
-    assert ss.md5sum().startswith('38729c637')
+    assert ss.md5sum().startswith("38729c637")
 
 
 def test_sbt_as_index_find_picklist_twice():
@@ -766,15 +763,15 @@ def test_sbt_as_index_find_picklist_twice():
     factory = GraphFactory(31, 1e5, 4)
     tree = SBT(factory, d=2)
 
-    sig47 = load_one_signature(utils.get_test_data('47.fa.sig'))
-    sig63 = load_one_signature(utils.get_test_data('63.fa.sig'))
+    sig47 = load_one_signature(utils.get_test_data("47.fa.sig"))
+    sig63 = load_one_signature(utils.get_test_data("63.fa.sig"))
 
     tree.insert(sig47)
     tree.insert(sig63)
 
     # construct a picklist...
-    picklist = SignaturePicklist('md5prefix8')
-    picklist.init(['09a08691'])
+    picklist = SignaturePicklist("md5prefix8")
+    picklist.init(["09a08691"])
 
     # run a 'find' with sig63, should find 47 and 63 both.
     search_obj = make_jaccard_search_query(do_containment=True, threshold=0.0)
@@ -787,7 +784,9 @@ def test_sbt_as_index_find_picklist_twice():
 
     with pytest.raises(ValueError):
         tree = tree.select(picklist=picklist)
-        assert "we do not (yet) support multiple picklists for SBT databases" in str(exc)
+        assert "we do not (yet) support multiple picklists for SBT databases" in str(
+            exc
+        )
 
 
 def test_sbt_as_index_signatures():
@@ -795,8 +794,8 @@ def test_sbt_as_index_signatures():
     factory = GraphFactory(31, 1e5, 4)
     tree = SBT(factory, d=2)
 
-    sig47 = load_one_signature(utils.get_test_data('47.fa.sig'))
-    sig63 = load_one_signature(utils.get_test_data('63.fa.sig'))
+    sig47 = load_one_signature(utils.get_test_data("47.fa.sig"))
+    sig63 = load_one_signature(utils.get_test_data("63.fa.sig"))
 
     tree.insert(sig47)
     tree.insert(sig63)
@@ -813,9 +812,9 @@ def test_sbt_gather_threshold_1():
     factory = GraphFactory(31, 1e5, 4)
     tree = SBT(factory, d=2)
 
-    sig2 = load_one_signature(utils.get_test_data('2.fa.sig'), ksize=31)
-    sig47 = load_one_signature(utils.get_test_data('47.fa.sig'), ksize=31)
-    sig63 = load_one_signature(utils.get_test_data('63.fa.sig'), ksize=31)
+    sig2 = load_one_signature(utils.get_test_data("2.fa.sig"), ksize=31)
+    sig47 = load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
+    sig63 = load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31)
 
     tree.insert(sig47)
     tree.insert(sig63)
@@ -861,7 +860,7 @@ def test_sbt_gather_threshold_1():
     assert name is None
 
     # check with a too-high threshold -> should be no results.
-    print('len mh', len(new_mh))
+    print("len mh", len(new_mh))
     with pytest.raises(ValueError):
         tree.best_containment(SourmashSignature(new_mh), threshold_bp=5000)
 
@@ -871,9 +870,9 @@ def test_sbt_gather_threshold_5():
     factory = GraphFactory(31, 1e5, 4)
     tree = SBT(factory, d=2)
 
-    sig2 = load_one_signature(utils.get_test_data('2.fa.sig'), ksize=31)
-    sig47 = load_one_signature(utils.get_test_data('47.fa.sig'), ksize=31)
-    sig63 = load_one_signature(utils.get_test_data('63.fa.sig'), ksize=31)
+    sig2 = load_one_signature(utils.get_test_data("2.fa.sig"), ksize=31)
+    sig47 = load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
+    sig63 = load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31)
 
     tree.insert(sig47)
     tree.insert(sig63)
@@ -902,7 +901,7 @@ def test_sbt_gather_threshold_5():
     assert name is None
 
     # now, check with a threshold_bp that should be meet-able.
-    results = tree.best_containment(SourmashSignature(new_mh), threshold_bp=5000)
+    tree.best_containment(SourmashSignature(new_mh), threshold_bp=5000)
     assert result
     containment, match_sig, name = result
     assert containment == 1.0
@@ -913,9 +912,9 @@ def test_sbt_gather_threshold_5():
 @utils.in_tempdir
 def test_gather_single_return(c):
     # test gather() number of returns
-    sig2file = utils.get_test_data('2.fa.sig')
-    sig47file = utils.get_test_data('47.fa.sig')
-    sig63file = utils.get_test_data('63.fa.sig')
+    sig2file = utils.get_test_data("2.fa.sig")
+    sig47file = utils.get_test_data("47.fa.sig")
+    sig63file = utils.get_test_data("63.fa.sig")
 
     sig2 = load_one_signature(sig2file, ksize=31)
     sig47 = load_one_signature(sig47file, ksize=31)
@@ -953,10 +952,10 @@ def test_sbt_jaccard_ordering(runtmp):
     def _intersect(x, y):
         return x.intersection_and_union_size(y)[0]
 
-    print('a intersect b:', _intersect(a, b))
-    print('a intersect c:', _intersect(a, c))
-    print('a jaccard b:', a.jaccard(b))
-    print('a jaccard c:', a.jaccard(c))
+    print("a intersect b:", _intersect(a, b))
+    print("a intersect c:", _intersect(a, c))
+    print("a jaccard b:", a.jaccard(b))
+    print("a jaccard c:", a.jaccard(c))
     assert _intersect(a, b) > _intersect(a, c)
     assert a.jaccard(b) < a.jaccard(c)
 
@@ -965,9 +964,9 @@ def _intersect(x, y):
     assert a.jaccard(c) > 0.15
 
     # now - make signatures, try out :)
-    ss_a = sourmash.SourmashSignature(a, name='A')
-    ss_b = sourmash.SourmashSignature(b, name='B')
-    ss_c = sourmash.SourmashSignature(c, name='C')
+    ss_a = sourmash.SourmashSignature(a, name="A")
+    ss_b = sourmash.SourmashSignature(b, name="B")
+    ss_c = sourmash.SourmashSignature(c, name="C")
 
     factory = GraphFactory(31, 1e5, 4)
     db = SBT(factory, d=2)
@@ -988,16 +987,21 @@ def test_sbt_protein_command_index(runtmp):
     c = runtmp
 
     # test command-line creation of SBT database with protein sigs
-    sigfile1 = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    sigfile2 = utils.get_test_data('prot/protein/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig')
+    sigfile1 = utils.get_test_data(
+        "prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    sigfile2 = utils.get_test_data(
+        "prot/protein/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig"
+    )
 
-    db_out = c.output('protein.sbt.zip')
+    db_out = c.output("protein.sbt.zip")
 
-    c.run_sourmash('index', db_out, sigfile1, sigfile2,
-                   '--scaled', '100', '-k', '19', '--protein')
+    c.run_sourmash(
+        "index", db_out, sigfile1, sigfile2, "--scaled", "100", "-k", "19", "--protein"
+    )
 
     # check to make sure .sbt.protein directory doesn't get created
-    assert not os.path.exists(c.output('.sbt.protein'))
+    assert not os.path.exists(c.output(".sbt.protein"))
 
     db2 = load_sbt_index(db_out)
 
@@ -1005,14 +1009,19 @@ def test_sbt_protein_command_index(runtmp):
     sig2 = sourmash.load_one_signature(sigfile2)
 
     # check reconstruction --
-    mh_list = [ x.minhash for x in db2.signatures() ]
+    mh_list = [x.minhash for x in db2.signatures()]
     assert len(mh_list) == 2
     assert sig1.minhash in mh_list
     assert sig2.minhash in mh_list
 
     # and search, gather
-    results = db2.search(sig1, threshold=0.0, ignore_abundance=True,
-                         do_containment=False, best_only=False)
+    results = db2.search(
+        sig1,
+        threshold=0.0,
+        ignore_abundance=True,
+        do_containment=False,
+        best_only=False,
+    )
     assert len(results) == 2
 
     result = db2.best_containment(sig2)
@@ -1024,13 +1033,18 @@ def test_sbt_protein_command_index(runtmp):
 @utils.in_tempdir
 def test_sbt_protein_search_no_threshold(c):
     # test the '.search' method on SBTs w/no threshold
-    sigfile1 = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    sigfile2 = utils.get_test_data('prot/protein/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig')
+    sigfile1 = utils.get_test_data(
+        "prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    sigfile2 = utils.get_test_data(
+        "prot/protein/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig"
+    )
 
-    db_out = c.output('protein.sbt.zip')
+    db_out = c.output("protein.sbt.zip")
 
-    c.run_sourmash('index', db_out, sigfile1, sigfile2,
-                   '--scaled', '100', '-k', '19', '--protein')
+    c.run_sourmash(
+        "index", db_out, sigfile1, sigfile2, "--scaled", "100", "-k", "19", "--protein"
+    )
 
     db2 = load_sbt_index(db_out)
 
@@ -1038,34 +1052,41 @@ def test_sbt_protein_search_no_threshold(c):
 
     # and search, gather
     with pytest.raises(TypeError) as exc:
-        results = db2.search(sig1)
+        db2.search(sig1)
     assert "'search' requires 'threshold'" in str(exc)
 
 
 @utils.in_thisdir
 def test_sbt_protein_command_search(c):
     # test command-line search/gather of SBT database with protein sigs
-    sigfile1 = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    db_out = utils.get_test_data('prot/protein.sbt.zip')
+    sigfile1 = utils.get_test_data(
+        "prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    db_out = utils.get_test_data("prot/protein.sbt.zip")
 
-    c.run_sourmash('search', sigfile1, db_out, '--threshold', '0.0')
-    assert '2 matches' in c.last_result.out
+    c.run_sourmash("search", sigfile1, db_out, "--threshold", "0.0")
+    assert "2 matches" in c.last_result.out
 
-    c.run_sourmash('gather', sigfile1, db_out)
-    assert 'found 1 matches total' in c.last_result.out
-    assert 'the recovered matches hit 100.0% of the query' in c.last_result.out
+    c.run_sourmash("gather", sigfile1, db_out)
+    assert "found 1 matches total" in c.last_result.out
+    assert "the recovered matches hit 100.0% of the query" in c.last_result.out
 
 
 @utils.in_tempdir
 def test_sbt_hp_command_index(c):
     # test command-line creation of SBT database with hp sigs
-    sigfile1 = utils.get_test_data('prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    sigfile2 = utils.get_test_data('prot/hp/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig')
+    sigfile1 = utils.get_test_data(
+        "prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    sigfile2 = utils.get_test_data(
+        "prot/hp/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig"
+    )
 
-    db_out = c.output('hp.sbt.zip')
+    db_out = c.output("hp.sbt.zip")
 
-    c.run_sourmash('index', db_out, sigfile1, sigfile2,
-                   '--scaled', '100', '-k', '19', '--hp')
+    c.run_sourmash(
+        "index", db_out, sigfile1, sigfile2, "--scaled", "100", "-k", "19", "--hp"
+    )
 
     db2 = load_sbt_index(db_out)
 
@@ -1073,14 +1094,19 @@ def test_sbt_hp_command_index(c):
     sig2 = sourmash.load_one_signature(sigfile2)
 
     # check reconstruction --
-    mh_list = [ x.minhash for x in db2.signatures() ]
+    mh_list = [x.minhash for x in db2.signatures()]
     assert len(mh_list) == 2
     assert sig1.minhash in mh_list
     assert sig2.minhash in mh_list
 
     # and search, gather
-    results = db2.search(sig1, threshold=0.0, ignore_abundance=True,
-                         do_containment=False, best_only=False)
+    results = db2.search(
+        sig1,
+        threshold=0.0,
+        ignore_abundance=True,
+        do_containment=False,
+        best_only=False,
+    )
     assert results
 
     result = db2.best_containment(sig2)
@@ -1092,27 +1118,34 @@ def test_sbt_hp_command_index(c):
 @utils.in_thisdir
 def test_sbt_hp_command_search(c):
     # test command-line search/gather of SBT database with hp sigs
-    sigfile1 = utils.get_test_data('prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    db_out = utils.get_test_data('prot/hp.sbt.zip')
+    sigfile1 = utils.get_test_data(
+        "prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    db_out = utils.get_test_data("prot/hp.sbt.zip")
 
-    c.run_sourmash('search', sigfile1, db_out, '--threshold', '0.0')
-    assert '2 matches' in c.last_result.out
+    c.run_sourmash("search", sigfile1, db_out, "--threshold", "0.0")
+    assert "2 matches" in c.last_result.out
 
-    c.run_sourmash('gather', sigfile1, db_out, '--threshold', '0.0')
-    assert 'found 1 matches total' in c.last_result.out
-    assert 'the recovered matches hit 100.0% of the query' in c.last_result.out
+    c.run_sourmash("gather", sigfile1, db_out, "--threshold", "0.0")
+    assert "found 1 matches total" in c.last_result.out
+    assert "the recovered matches hit 100.0% of the query" in c.last_result.out
 
 
 @utils.in_tempdir
 def test_sbt_dayhoff_command_index(c):
     # test command-line creation of SBT database with dayhoff sigs
-    sigfile1 = utils.get_test_data('prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    sigfile2 = utils.get_test_data('prot/dayhoff/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig')
+    sigfile1 = utils.get_test_data(
+        "prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    sigfile2 = utils.get_test_data(
+        "prot/dayhoff/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig"
+    )
 
-    db_out = c.output('dayhoff.sbt.zip')
+    db_out = c.output("dayhoff.sbt.zip")
 
-    c.run_sourmash('index', db_out, sigfile1, sigfile2,
-                   '--scaled', '100', '-k', '19', '--dayhoff')
+    c.run_sourmash(
+        "index", db_out, sigfile1, sigfile2, "--scaled", "100", "-k", "19", "--dayhoff"
+    )
 
     db2 = load_sbt_index(db_out)
 
@@ -1120,14 +1153,19 @@ def test_sbt_dayhoff_command_index(c):
     sig2 = sourmash.load_one_signature(sigfile2)
 
     # check reconstruction --
-    mh_list = [ x.minhash for x in db2.signatures() ]
+    mh_list = [x.minhash for x in db2.signatures()]
     assert len(mh_list) == 2
     assert sig1.minhash in mh_list
     assert sig2.minhash in mh_list
 
     # and search, gather
-    results = db2.search(sig1, threshold=0.0, ignore_abundance=True,
-                         do_containment=False, best_only=False)
+    results = db2.search(
+        sig1,
+        threshold=0.0,
+        ignore_abundance=True,
+        do_containment=False,
+        best_only=False,
+    )
     assert len(results) == 2
 
     result = db2.best_containment(sig2)
@@ -1139,21 +1177,23 @@ def test_sbt_dayhoff_command_index(c):
 @utils.in_thisdir
 def test_sbt_dayhoff_command_search(c):
     # test command-line search/gather of SBT database with dayhoff sigs
-    sigfile1 = utils.get_test_data('prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    db_out = utils.get_test_data('prot/dayhoff.sbt.zip')
+    sigfile1 = utils.get_test_data(
+        "prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    db_out = utils.get_test_data("prot/dayhoff.sbt.zip")
 
-    c.run_sourmash('search', sigfile1, db_out, '--threshold', '0.0')
-    assert '2 matches' in c.last_result.out
+    c.run_sourmash("search", sigfile1, db_out, "--threshold", "0.0")
+    assert "2 matches" in c.last_result.out
 
-    c.run_sourmash('gather', sigfile1, db_out, '--threshold', '0.0')
-    assert 'found 1 matches total' in c.last_result.out
-    assert 'the recovered matches hit 100.0% of the query' in c.last_result.out
+    c.run_sourmash("gather", sigfile1, db_out, "--threshold", "0.0")
+    assert "found 1 matches total" in c.last_result.out
+    assert "the recovered matches hit 100.0% of the query" in c.last_result.out
 
 
 def test_sbt_node_cache():
-    tree = SBT.load(utils.get_test_data('v6.sbt.json'),
-                    leaf_loader=SigLeaf.load,
-                    cache_size=1)
+    tree = SBT.load(
+        utils.get_test_data("v6.sbt.json"), leaf_loader=SigLeaf.load, cache_size=1
+    )
 
     testdata1 = utils.get_test_data(utils.SIG_FILES[0])
     to_search = load_one_signature(testdata1)
@@ -1172,28 +1212,28 @@ def test_sbt_node_cache():
 
 
 def test_sbt_no_containment_on_num():
-    tree = SBT.load(utils.get_test_data('v6.sbt.json'),
-                    leaf_loader=SigLeaf.load,
-                    cache_size=1)
+    tree = SBT.load(
+        utils.get_test_data("v6.sbt.json"), leaf_loader=SigLeaf.load, cache_size=1
+    )
 
     testdata1 = utils.get_test_data(utils.SIG_FILES[0])
     to_search = load_one_signature(testdata1)
 
     search_obj = make_jaccard_search_query(do_containment=True, threshold=0.05)
     with pytest.raises(TypeError) as exc:
-        results = list(tree.find(search_obj, to_search))
+        list(tree.find(search_obj, to_search))
 
     assert "this search requires a scaled signature" in str(exc)
 
 
 def test_build_sbt_zip_with_dups(runtmp):
-    dups_data = utils.get_test_data('duplicate-sigs')
+    dups_data = utils.get_test_data("duplicate-sigs")
 
     all_sigs = set(sourmash.load_file_as_signatures(dups_data))
     assert len(all_sigs) == 4
 
-    runtmp.run_sourmash('index', 'dups.sbt.zip', dups_data)
-    outfile = runtmp.output('dups.sbt.zip')
+    runtmp.run_sourmash("index", "dups.sbt.zip", dups_data)
+    outfile = runtmp.output("dups.sbt.zip")
 
     sbt_sigs = set(sourmash.load_file_as_signatures(outfile))
     assert len(sbt_sigs) == 4
@@ -1202,17 +1242,17 @@ def test_build_sbt_zip_with_dups(runtmp):
 
 
 def test_build_sbt_zip_with_dups_exists(runtmp):
-    dups_data = utils.get_test_data('duplicate-sigs')
+    dups_data = utils.get_test_data("duplicate-sigs")
 
     all_sigs = set(sourmash.load_file_as_signatures(dups_data))
     assert len(all_sigs) == 4
 
-    runtmp.run_sourmash('index', 'dups.sbt.zip', dups_data)
-    outfile = runtmp.output('dups.sbt.zip')
+    runtmp.run_sourmash("index", "dups.sbt.zip", dups_data)
+    outfile = runtmp.output("dups.sbt.zip")
 
     # run again, to see what happens :)
-    runtmp.run_sourmash('index', 'dups.sbt.zip', dups_data)
-    outfile = runtmp.output('dups.sbt.zip')
+    runtmp.run_sourmash("index", "dups.sbt.zip", dups_data)
+    outfile = runtmp.output("dups.sbt.zip")
 
     sbt_sigs = set(sourmash.load_file_as_signatures(outfile))
     assert len(sbt_sigs) == 4
@@ -1221,13 +1261,13 @@ def test_build_sbt_zip_with_dups_exists(runtmp):
 
 
 def test_build_sbt_json_with_dups(runtmp):
-    dups_data = utils.get_test_data('duplicate-sigs')
+    dups_data = utils.get_test_data("duplicate-sigs")
 
     all_sigs = set(sourmash.load_file_as_signatures(dups_data))
     assert len(all_sigs) == 4
 
-    runtmp.run_sourmash('index', 'dups.sbt.json', dups_data)
-    outfile = runtmp.output('dups.sbt.json')
+    runtmp.run_sourmash("index", "dups.sbt.json", dups_data)
+    outfile = runtmp.output("dups.sbt.json")
 
     sbt_sigs = set(sourmash.load_file_as_signatures(outfile))
     assert len(sbt_sigs) == 4
@@ -1236,17 +1276,17 @@ def test_build_sbt_json_with_dups(runtmp):
 
 
 def test_build_sbt_json_with_dups_exists(runtmp):
-    dups_data = utils.get_test_data('duplicate-sigs')
+    dups_data = utils.get_test_data("duplicate-sigs")
 
     all_sigs = set(sourmash.load_file_as_signatures(dups_data))
     assert len(all_sigs) == 4
 
-    runtmp.run_sourmash('index', 'dups.sbt.json', dups_data)
-    outfile = runtmp.output('dups.sbt.json')
+    runtmp.run_sourmash("index", "dups.sbt.json", dups_data)
+    outfile = runtmp.output("dups.sbt.json")
 
     # run again, see what happens!
-    runtmp.run_sourmash('index', 'dups.sbt.json', dups_data)
-    outfile = runtmp.output('dups.sbt.json')
+    runtmp.run_sourmash("index", "dups.sbt.json", dups_data)
+    outfile = runtmp.output("dups.sbt.json")
 
     sbt_sigs = set(sourmash.load_file_as_signatures(outfile))
     assert len(sbt_sigs) == 4
@@ -1258,9 +1298,9 @@ def test_load_fail_on_file_not_dir(runtmp):
     # make sure the load function raises a ValueError for {filename}/sbt,
     # rather than a NotADirectoryError
 
-    filename = runtmp.output('foo')
-    with open(filename, 'wt') as fp:
-        fp.write('something')
+    filename = runtmp.output("foo")
+    with open(filename, "w") as fp:
+        fp.write("something")
 
-    with pytest.raises(ValueError) as exc:
-        x = SBT.load(runtmp.output('foo/bar.sbt.json'))
+    with pytest.raises(ValueError):
+        SBT.load(runtmp.output("foo/bar.sbt.json"))
diff --git a/tests/test_search.py b/tests/test_search.py
index a1b8171cfd..c9c6d601cc 100644
--- a/tests/test_search.py
+++ b/tests/test_search.py
@@ -5,9 +5,13 @@
 import sourmash_tst_utils as utils
 
 from sourmash import search, SourmashSignature, MinHash, load_one_signature
-from sourmash.search import (make_jaccard_search_query,
-                             make_containment_query,
-                             SearchResult, PrefetchResult, GatherResult)
+from sourmash.search import (
+    make_jaccard_search_query,
+    make_containment_query,
+    SearchResult,
+    PrefetchResult,
+    GatherResult,
+)
 from sourmash.index import LinearIndex
 
 
@@ -20,8 +24,7 @@ def test_make_jaccard_search_query():
 
 
 def test_make_jaccard_search_query_cont():
-    search_obj = make_jaccard_search_query(do_containment=True,
-                                           threshold=0)
+    search_obj = make_jaccard_search_query(do_containment=True, threshold=0)
 
     assert search_obj.score_fn == search_obj.score_containment
     assert search_obj.require_scaled
@@ -29,8 +32,7 @@ def test_make_jaccard_search_query_cont():
 
 
 def test_make_jaccard_search_query_max_cont():
-    search_obj = make_jaccard_search_query(do_max_containment=True,
-                                           threshold=0)
+    search_obj = make_jaccard_search_query(do_max_containment=True, threshold=0)
 
     assert search_obj.score_fn == search_obj.score_max_containment
     assert search_obj.require_scaled
@@ -55,16 +57,18 @@ def test_make_jaccard_search_query_no_threshold_none():
 
 def test_make_jaccard_search_query_cont_and_max_cont():
     with pytest.raises(TypeError) as exc:
-        search_obj = make_jaccard_search_query(do_containment=True,
-                                               do_max_containment=True)
+        make_jaccard_search_query(do_containment=True, do_max_containment=True)
 
-    assert str(exc.value) == "'do_containment' and 'do_max_containment' cannot both be True"
+    assert (
+        str(exc.value)
+        == "'do_containment' and 'do_max_containment' cannot both be True"
+    )
 
 
 def test_cont_requires_scaled():
     search_obj = make_jaccard_search_query(do_containment=True)
     assert search_obj.require_scaled
-    
+
     mh = MinHash(n=500, ksize=31)
     with pytest.raises(TypeError) as exc:
         search_obj.check_is_compatible(SourmashSignature(mh))
@@ -73,7 +77,7 @@ def test_cont_requires_scaled():
 
 def test_search_requires_flat():
     search_obj = make_jaccard_search_query()
-    
+
     mh = MinHash(n=500, ksize=31, track_abundance=True)
     with pytest.raises(TypeError) as exc:
         search_obj.check_is_compatible(SourmashSignature(mh))
@@ -164,7 +168,7 @@ def test_make_containment_query_num_minhash():
         mh.add_hash(i)
 
     with pytest.raises(TypeError) as exc:
-        search_obj = make_containment_query(mh, 5e4)
+        make_containment_query(mh, 5e4)
 
     assert str(exc.value) == "query signature must be calculated with scaled"
 
@@ -177,7 +181,7 @@ def test_make_containment_query_empty_minhash():
         mh.add_hash(i)
 
     with pytest.raises(TypeError) as exc:
-        search_obj = make_containment_query(mh, -1)
+        make_containment_query(mh, -1)
 
     assert str(exc.value) == "threshold_bp must be non-negative"
 
@@ -191,7 +195,7 @@ def test_make_containment_query_high_threshold():
 
     # effective threshold > 1; raise ValueError
     with pytest.raises(ValueError):
-        search_obj = make_containment_query(mh, 200000)
+        make_containment_query(mh, 200000)
 
 
 class FakeIndex(LinearIndex):
@@ -240,29 +244,31 @@ def test_search_with_abund_query():
     query = SourmashSignature(mh)
 
     with pytest.raises(TypeError):
-        search.search_databases_with_abund_query(query, [],
-                                                 threshold=0,
-                                                 do_containment=True)
+        search.search_databases_with_abund_query(
+            query, [], threshold=0, do_containment=True
+        )
 
     with pytest.raises(TypeError):
-        search.search_databases_with_abund_query(query, [],
-                                                 threshold=0,
-                                                 do_max_containment=True)
+        search.search_databases_with_abund_query(
+            query, [], threshold=0, do_max_containment=True
+        )
 
 
 def test_scaledSearchResult():
     # check that values get stored/calculated correctly
-    ss47_file = utils.get_test_data('47.fa.sig')
-    ss4763_file = utils.get_test_data('47+63.fa.sig')
-    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype='dna')
-    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype='dna')
+    ss47_file = utils.get_test_data("47.fa.sig")
+    ss4763_file = utils.get_test_data("47+63.fa.sig")
+    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype="dna")
+    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype="dna")
 
     ss4763 = ss4763.to_mutable()
     ss4763.filename = ss4763_file
 
     scaled = ss47.minhash.scaled
 
-    res = SearchResult(ss47, ss4763, cmp_scaled=scaled, similarity= ss47.contained_by(ss4763))
+    res = SearchResult(
+        ss47, ss4763, cmp_scaled=scaled, similarity=ss47.contained_by(ss4763)
+    )
 
     assert res.query_name == ss47.name
     assert res.match_name == ss4763.name
@@ -271,16 +277,16 @@ def test_scaledSearchResult():
     assert res.cmp_scaled == 1000
     assert res.query_abundance == ss47.minhash.track_abundance
     assert res.match_abundance == ss4763.minhash.track_abundance
-#    assert res.query_bp == len(ss47.minhash) * scaled
-#    assert res.match_bp == len(ss4763.minhash) * scaled
+    #    assert res.query_bp == len(ss47.minhash) * scaled
+    #    assert res.match_bp == len(ss4763.minhash) * scaled
     assert res.ksize == 31
-    assert res.moltype == 'DNA'
-    assert res.query_filename == '47.fa'
+    assert res.moltype == "DNA"
+    assert res.query_filename == "47.fa"
     assert res.match_filename == ss4763_file
     assert res.query_md5 == ss47.md5sum()
     assert res.match_md5 == ss4763.md5sum()
- #   assert res.query_n_hashes == len(ss47.minhash)
- #   assert res.match_n_hashes == len(ss4763.minhash)
+    #   assert res.query_n_hashes == len(ss47.minhash)
+    #   assert res.match_n_hashes == len(ss4763.minhash)
     assert res.md5 == ss4763.md5sum()
     assert res.name == ss4763.name
     assert res.filename == ss4763.filename
@@ -289,18 +295,19 @@ def test_scaledSearchResult():
     # check that we _can_ get avg_containment_ani
     assert res.cmp.avg_containment_ani == np.mean([queryc_ani.ani, matchc_ani.ani])
 
+
 def test_numSearchResult():
     # check that values get stored/calculated correctly
-    ss47_file = utils.get_test_data('num/47.fa.sig')
-    ss63_file = utils.get_test_data('num/63.fa.sig')
-    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype='dna')
-    ss63 = load_one_signature(ss63_file, ksize=31, select_moltype='dna')
+    ss47_file = utils.get_test_data("num/47.fa.sig")
+    ss63_file = utils.get_test_data("num/63.fa.sig")
+    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype="dna")
+    ss63 = load_one_signature(ss63_file, ksize=31, select_moltype="dna")
     ss63 = ss63.to_mutable()
     ss63.filename = ss63_file
 
     assert ss47.minhash.num and ss63.minhash.num
 
-    res = SearchResult(ss47, ss63, similarity= ss47.jaccard(ss63))
+    res = SearchResult(ss47, ss63, similarity=ss47.jaccard(ss63))
     print(res.cmp_num)
     assert res.mh1.num
     assert res.cmp.cmp_num == 500
@@ -311,8 +318,8 @@ def test_numSearchResult():
     assert res.query_abundance == ss47.minhash.track_abundance
     assert res.match_abundance == ss63.minhash.track_abundance
     assert res.ksize == 31
-    assert res.moltype == 'DNA'
-    assert res.query_filename == '47.fa'
+    assert res.moltype == "DNA"
+    assert res.query_filename == "47.fa"
     assert res.match_filename == ss63_file
     assert res.query_md5 == ss47.md5sum()
     assert res.match_md5 == ss63.md5sum()
@@ -323,7 +330,7 @@ def test_numSearchResult():
     # check that we can't get ani
     with pytest.raises(TypeError) as exc:
         res.estimate_search_ani()
-    assert("ANI can only be estimated from scaled signatures.") in str(exc)
+    assert ("ANI can only be estimated from scaled signatures.") in str(exc)
 
     # get result as dictionary (of just items to write)
     resD = res.resultdict
@@ -333,10 +340,10 @@ def test_numSearchResult():
 
 
 def test_SearchResult_incompatible_sigs():
-    ss47_file = utils.get_test_data('num/47.fa.sig')
-    ss4763_file = utils.get_test_data('47+63.fa.sig')
-    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype='dna')
-    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype='dna')
+    ss47_file = utils.get_test_data("num/47.fa.sig")
+    ss4763_file = utils.get_test_data("47+63.fa.sig")
+    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype="dna")
+    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype="dna")
 
     with pytest.raises(TypeError) as exc:
         SearchResult(ss47, ss4763, similarity=10)
@@ -345,8 +352,8 @@ def test_SearchResult_incompatible_sigs():
 
 
 def test_SearchResult_notsigs():
-    ss47_file = utils.get_test_data('num/47.fa.sig')
-    ss4763_file = utils.get_test_data('47+63.fa.sig')
+    ss47_file = utils.get_test_data("num/47.fa.sig")
+    ss4763_file = utils.get_test_data("47+63.fa.sig")
 
     with pytest.raises(AttributeError) as exc:
         SearchResult(ss47_file, ss4763_file, similarity=10)
@@ -356,10 +363,10 @@ def test_SearchResult_notsigs():
 
 def test_SearchResult_no_similarity():
     # check that values get stored/calculated correctly
-    ss47_file = utils.get_test_data('47.fa.sig')
-    ss4763_file = utils.get_test_data('47+63.fa.sig')
-    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype='dna')
-    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype='dna')
+    ss47_file = utils.get_test_data("47.fa.sig")
+    ss4763_file = utils.get_test_data("47+63.fa.sig")
+    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype="dna")
+    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype="dna")
 
     with pytest.raises(ValueError) as exc:
         SearchResult(ss47, ss4763)
@@ -369,10 +376,10 @@ def test_SearchResult_no_similarity():
 
 def test_PrefetchResult():
     # check that values get stored/calculated correctly
-    ss47_file = utils.get_test_data('47.fa.sig')
-    ss4763_file = utils.get_test_data('47+63.fa.sig')
-    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype='dna')
-    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype='dna')
+    ss47_file = utils.get_test_data("47.fa.sig")
+    ss4763_file = utils.get_test_data("47+63.fa.sig")
+    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype="dna")
+    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype="dna")
 
     ss4763 = ss4763.to_mutable()
     ss4763.filename = ss4763_file
@@ -381,14 +388,14 @@ def test_PrefetchResult():
 
     intersect_mh = ss47.minhash.intersection(ss4763.minhash)
     intersect_bp = len(intersect_mh) * scaled
-    jaccard=ss4763.jaccard(ss47)
-    max_containment=ss4763.max_containment(ss47)
-    f_match_query=ss47.contained_by(ss4763)
-    f_query_match=ss4763.contained_by(ss47)
+    jaccard = ss4763.jaccard(ss47)
+    max_containment = ss4763.max_containment(ss47)
+    f_match_query = ss47.contained_by(ss4763)
+    f_query_match = ss4763.contained_by(ss47)
     queryc_ani = ss47.containment_ani(ss4763)
     matchc_ani = ss4763.containment_ani(ss47)
 
-    res = PrefetchResult(ss47, ss4763, cmp_scaled = scaled)
+    res = PrefetchResult(ss47, ss4763, cmp_scaled=scaled)
 
     assert res.query_name == ss47.name
     assert res.match_name == ss4763.name
@@ -400,8 +407,8 @@ def test_PrefetchResult():
     assert res.query_bp == len(ss47.minhash) * scaled
     assert res.match_bp == len(ss4763.minhash) * scaled
     assert res.ksize == 31
-    assert res.moltype == 'DNA'
-    assert res.query_filename == '47.fa'
+    assert res.moltype == "DNA"
+    assert res.query_filename == "47.fa"
     assert res.match_filename == ss4763_file
     assert res.query_md5 == ss47.md5sum()
     assert res.match_md5 == ss4763.md5sum()
@@ -426,23 +433,26 @@ def test_PrefetchResult():
 
 
 def test_PrefetchResult_incompatible_sigs():
-    ss47_file = utils.get_test_data('num/47.fa.sig')
-    ss4763_file = utils.get_test_data('47+63.fa.sig')
-    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype='dna')
-    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype='dna')
+    ss47_file = utils.get_test_data("num/47.fa.sig")
+    ss4763_file = utils.get_test_data("47+63.fa.sig")
+    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype="dna")
+    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype="dna")
 
     with pytest.raises(TypeError) as exc:
         PrefetchResult(ss47, ss4763)
     print(str(exc))
-    assert "Error: prefetch and gather results must be between scaled signatures." in str(exc)
+    assert (
+        "Error: prefetch and gather results must be between scaled signatures."
+        in str(exc)
+    )
 
 
 def test_GatherResult():
     # check that values get stored/calculated correctly
-    ss47_file = utils.get_test_data('track_abund/47.fa.sig')
-    ss4763_file = utils.get_test_data('47+63.fa.sig')
-    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype='dna')
-    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype='dna')
+    ss47_file = utils.get_test_data("track_abund/47.fa.sig")
+    ss4763_file = utils.get_test_data("47+63.fa.sig")
+    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype="dna")
+    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype="dna")
 
     ss4763 = ss4763.to_mutable()
     ss4763.filename = ss4763_file
@@ -454,8 +464,8 @@ def test_GatherResult():
     remaining_mh.remove_many(intersect_mh)
 
     intersect_bp = len(intersect_mh) * scaled
-    max_containment=ss4763.max_containment(ss47)
-    f_match_query = ss47.contained_by(ss4763)
+    max_containment = ss4763.max_containment(ss47)
+    ss47.contained_by(ss4763)
     orig_query_abunds = ss47.minhash.hashes
     queryc_ani = ss47.containment_ani(ss4763)
     matchc_ani = ss4763.containment_ani(ss47)
@@ -464,12 +474,16 @@ def test_GatherResult():
     gather_result_rank = 1
     sum_abunds = 1000
 
-    res = GatherResult(ss47, ss4763, cmp_scaled=scaled,
-                        gather_querymh=remaining_mh,
-                        gather_result_rank=gather_result_rank,
-                        total_weighted_hashes = sum_abunds,
-                        orig_query_len=len(ss47.minhash),
-                        orig_query_abunds=orig_query_abunds)
+    res = GatherResult(
+        ss47,
+        ss4763,
+        cmp_scaled=scaled,
+        gather_querymh=remaining_mh,
+        gather_result_rank=gather_result_rank,
+        total_weighted_hashes=sum_abunds,
+        orig_query_len=len(ss47.minhash),
+        orig_query_abunds=orig_query_abunds,
+    )
 
     assert res.query_name == ss47.name
     assert res.match_name == ss4763.name
@@ -481,8 +495,8 @@ def test_GatherResult():
     assert res.query_bp == len(ss47.minhash) * scaled
     assert res.match_bp == len(ss4763.minhash) * scaled
     assert res.ksize == 31
-    assert res.moltype == 'DNA'
-    assert res.query_filename == 'podar-ref/47.fa'
+    assert res.moltype == "DNA"
+    assert res.query_filename == "podar-ref/47.fa"
     assert res.match_filename == ss4763_file
     assert res.query_md5 == ss47.md5sum()
     assert res.match_md5 == ss4763.md5sum()
@@ -516,10 +530,10 @@ def test_GatherResult():
 
 def test_GatherResult_ci():
     # check that values get stored/calculated correctly
-    ss47_file = utils.get_test_data('track_abund/47.fa.sig')
-    ss4763_file = utils.get_test_data('47+63.fa.sig')
-    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype='dna')
-    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype='dna')
+    ss47_file = utils.get_test_data("track_abund/47.fa.sig")
+    ss4763_file = utils.get_test_data("47+63.fa.sig")
+    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype="dna")
+    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype="dna")
 
     ss4763 = ss4763.to_mutable()
     ss4763.filename = ss4763_file
@@ -531,20 +545,24 @@ def test_GatherResult_ci():
     remaining_mh.remove_many(intersect_mh)
 
     orig_query_abunds = ss47.minhash.hashes
-    queryc_ani = ss47.containment_ani(ss4763,estimate_ci=True)
+    queryc_ani = ss47.containment_ani(ss4763, estimate_ci=True)
     matchc_ani = ss4763.containment_ani(ss47, estimate_ci=True)
 
     # make some fake vals to check
     gather_result_rank = 1
     sum_abunds = 1000
 
-    res = GatherResult(ss47, ss4763, cmp_scaled=scaled,
-                        gather_querymh=remaining_mh,
-                        gather_result_rank=gather_result_rank,
-                        total_weighted_hashes = sum_abunds,
-                        orig_query_len=len(ss47.minhash),
-                        orig_query_abunds=orig_query_abunds,
-                        estimate_ani_ci=True)
+    res = GatherResult(
+        ss47,
+        ss4763,
+        cmp_scaled=scaled,
+        gather_querymh=remaining_mh,
+        gather_result_rank=gather_result_rank,
+        total_weighted_hashes=sum_abunds,
+        orig_query_len=len(ss47.minhash),
+        orig_query_abunds=orig_query_abunds,
+        estimate_ani_ci=True,
+    )
 
     # check that we can write prefetch result directly from gather
     pf = PrefetchResult(ss47, ss4763, cmp_scaled=scaled, estimate_ani_ci=True)
@@ -568,130 +586,183 @@ def test_GatherResult_ci():
 
 
 def test_GatherResult_incompatible_sigs():
-    ss47_file = utils.get_test_data('num/47.fa.sig')
-    ss4763_file = utils.get_test_data('47+63.fa.sig')
-    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype='dna')
-    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype='dna')
+    ss47_file = utils.get_test_data("num/47.fa.sig")
+    ss4763_file = utils.get_test_data("47+63.fa.sig")
+    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype="dna")
+    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype="dna")
     orig_query_abunds = ss47.minhash.hashes
 
     with pytest.raises(TypeError) as exc:
-        GatherResult(ss47, ss4763, cmp_scaled=1,
-                        gather_querymh=ss47.minhash,
-                        gather_result_rank=1,
-                        total_weighted_hashes = 1,
-                        orig_query_len=len(ss47.minhash),
-                        orig_query_abunds=orig_query_abunds)
+        GatherResult(
+            ss47,
+            ss4763,
+            cmp_scaled=1,
+            gather_querymh=ss47.minhash,
+            gather_result_rank=1,
+            total_weighted_hashes=1,
+            orig_query_len=len(ss47.minhash),
+            orig_query_abunds=orig_query_abunds,
+        )
     print(str(exc))
-    assert "Error: prefetch and gather results must be between scaled signatures." in str(exc)
+    assert (
+        "Error: prefetch and gather results must be between scaled signatures."
+        in str(exc)
+    )
 
 
 def test_GatherResult_incomplete_input_cmpscaled():
-    ss47_file = utils.get_test_data('47.fa.sig')
-    ss4763_file = utils.get_test_data('47+63.fa.sig')
-    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype='dna')
-    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype='dna')
+    ss47_file = utils.get_test_data("47.fa.sig")
+    ss4763_file = utils.get_test_data("47+63.fa.sig")
+    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype="dna")
+    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype="dna")
     orig_query_abunds = ss47.minhash.hashes
 
     with pytest.raises(ValueError) as exc:
-        GatherResult(ss47, ss4763, cmp_scaled=None,
-                        gather_querymh=ss47.minhash,
-                        gather_result_rank=1,
-                        total_weighted_hashes = 1,
-                        orig_query_len=len(ss47.minhash),
-                        orig_query_abunds=orig_query_abunds)
+        GatherResult(
+            ss47,
+            ss4763,
+            cmp_scaled=None,
+            gather_querymh=ss47.minhash,
+            gather_result_rank=1,
+            total_weighted_hashes=1,
+            orig_query_len=len(ss47.minhash),
+            orig_query_abunds=orig_query_abunds,
+        )
     print(str(exc))
-    assert "Error: must provide comparison scaled value ('cmp_scaled') for GatherResult" in str(exc)
+    assert (
+        "Error: must provide comparison scaled value ('cmp_scaled') for GatherResult"
+        in str(exc)
+    )
 
 
 def test_GatherResult_incomplete_input_gathermh():
-    ss47_file = utils.get_test_data('47.fa.sig')
-    ss4763_file = utils.get_test_data('47+63.fa.sig')
-    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype='dna')
-    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype='dna')
+    ss47_file = utils.get_test_data("47.fa.sig")
+    ss4763_file = utils.get_test_data("47+63.fa.sig")
+    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype="dna")
+    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype="dna")
     orig_query_abunds = ss47.minhash.hashes
 
     with pytest.raises(ValueError) as exc:
-        GatherResult(ss47, ss4763, cmp_scaled=1000,
-                        gather_querymh=None,
-                        gather_result_rank=1,
-                        total_weighted_hashes = 1,
-                        orig_query_len=len(ss47.minhash),
-                        orig_query_abunds=orig_query_abunds)
+        GatherResult(
+            ss47,
+            ss4763,
+            cmp_scaled=1000,
+            gather_querymh=None,
+            gather_result_rank=1,
+            total_weighted_hashes=1,
+            orig_query_len=len(ss47.minhash),
+            orig_query_abunds=orig_query_abunds,
+        )
     print(str(exc))
-    assert "Error: must provide current gather sketch (remaining hashes) for GatherResult" in str(exc)
+    assert (
+        "Error: must provide current gather sketch (remaining hashes) for GatherResult"
+        in str(exc)
+    )
 
 
 def test_GatherResult_incomplete_input_gather_result_rank():
-    ss47_file = utils.get_test_data('47.fa.sig')
-    ss4763_file = utils.get_test_data('47+63.fa.sig')
-    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype='dna')
-    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype='dna')
+    ss47_file = utils.get_test_data("47.fa.sig")
+    ss4763_file = utils.get_test_data("47+63.fa.sig")
+    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype="dna")
+    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype="dna")
     orig_query_abunds = ss47.minhash.hashes
 
     with pytest.raises(ValueError) as exc:
-        GatherResult(ss47, ss4763, cmp_scaled=1000,
-                        gather_querymh=ss47.minhash,
-                        gather_result_rank=None,
-                        total_weighted_hashes = 1,
-                        orig_query_len=len(ss47.minhash),
-                        orig_query_abunds=orig_query_abunds)
+        GatherResult(
+            ss47,
+            ss4763,
+            cmp_scaled=1000,
+            gather_querymh=ss47.minhash,
+            gather_result_rank=None,
+            total_weighted_hashes=1,
+            orig_query_len=len(ss47.minhash),
+            orig_query_abunds=orig_query_abunds,
+        )
     print(str(exc))
     assert "Error: must provide 'gather_result_rank' to GatherResult" in str(exc)
 
 
 def test_GatherResult_incomplete_input_total_weighted_hashes():
-    ss47_file = utils.get_test_data('47.fa.sig')
-    ss4763_file = utils.get_test_data('47+63.fa.sig')
-    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype='dna')
-    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype='dna')
+    ss47_file = utils.get_test_data("47.fa.sig")
+    ss4763_file = utils.get_test_data("47+63.fa.sig")
+    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype="dna")
+    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype="dna")
     orig_query_abunds = ss47.minhash.hashes
 
     with pytest.raises(ValueError) as exc:
-        GatherResult(ss47, ss4763, cmp_scaled=1000,
-                        gather_querymh=ss47.minhash,
-                        gather_result_rank=1,
-                        total_weighted_hashes = None,
-                        orig_query_len=len(ss47.minhash),
-                        orig_query_abunds=orig_query_abunds)
+        GatherResult(
+            ss47,
+            ss4763,
+            cmp_scaled=1000,
+            gather_querymh=ss47.minhash,
+            gather_result_rank=1,
+            total_weighted_hashes=None,
+            orig_query_len=len(ss47.minhash),
+            orig_query_abunds=orig_query_abunds,
+        )
     print(str(exc))
-    assert "Error: must provide sum of all abundances ('total_weighted_hashes') to GatherResult" in str(exc)
+    assert (
+        "Error: must provide sum of all abundances ('total_weighted_hashes') to GatherResult"
+        in str(exc)
+    )
 
     with pytest.raises(ValueError) as exc:
-        GatherResult(ss47, ss4763, cmp_scaled=1000,
-                        gather_querymh=ss47.minhash,
-                        gather_result_rank=1,
-                        total_weighted_hashes = 0,
-                        orig_query_len=len(ss47.minhash),
-                        orig_query_abunds=orig_query_abunds)
+        GatherResult(
+            ss47,
+            ss4763,
+            cmp_scaled=1000,
+            gather_querymh=ss47.minhash,
+            gather_result_rank=1,
+            total_weighted_hashes=0,
+            orig_query_len=len(ss47.minhash),
+            orig_query_abunds=orig_query_abunds,
+        )
     print(str(exc))
-    assert "Error: must provide sum of all abundances ('total_weighted_hashes') to GatherResult" in str(exc)
+    assert (
+        "Error: must provide sum of all abundances ('total_weighted_hashes') to GatherResult"
+        in str(exc)
+    )
 
 
 def test_GatherResult_incomplete_input_orig_query_abunds():
-    ss47_file = utils.get_test_data('47.fa.sig')
-    ss4763_file = utils.get_test_data('47+63.fa.sig')
-    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype='dna')
-    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype='dna')
+    ss47_file = utils.get_test_data("47.fa.sig")
+    ss4763_file = utils.get_test_data("47+63.fa.sig")
+    ss47 = load_one_signature(ss47_file, ksize=31, select_moltype="dna")
+    ss4763 = load_one_signature(ss4763_file, ksize=31, select_moltype="dna")
     orig_query_abunds = None
 
     with pytest.raises(ValueError) as exc:
-        GatherResult(ss47, ss4763, cmp_scaled=1000,
-                        gather_querymh=ss47.minhash,
-                        gather_result_rank=1,
-                        total_weighted_hashes = 1,
-                        orig_query_len=len(ss47.minhash),
-                        orig_query_abunds=orig_query_abunds)
+        GatherResult(
+            ss47,
+            ss4763,
+            cmp_scaled=1000,
+            gather_querymh=ss47.minhash,
+            gather_result_rank=1,
+            total_weighted_hashes=1,
+            orig_query_len=len(ss47.minhash),
+            orig_query_abunds=orig_query_abunds,
+        )
     print(str(exc))
-    assert "Error: must provide original query abundances ('orig_query_abunds') to GatherResult" in str(exc)
+    assert (
+        "Error: must provide original query abundances ('orig_query_abunds') to GatherResult"
+        in str(exc)
+    )
 
     orig_query_abunds = {}
 
     with pytest.raises(ValueError) as exc:
-        GatherResult(ss47, ss4763, cmp_scaled=1000,
-                        gather_querymh=ss47.minhash,
-                        gather_result_rank=1,
-                        total_weighted_hashes = 1,
-                        orig_query_len=len(ss47.minhash),
-                        orig_query_abunds=orig_query_abunds)
+        GatherResult(
+            ss47,
+            ss4763,
+            cmp_scaled=1000,
+            gather_querymh=ss47.minhash,
+            gather_result_rank=1,
+            total_weighted_hashes=1,
+            orig_query_len=len(ss47.minhash),
+            orig_query_abunds=orig_query_abunds,
+        )
     print(str(exc))
-    assert "Error: must provide original query abundances ('orig_query_abunds') to GatherResult" in str(exc)
+    assert (
+        "Error: must provide original query abundances ('orig_query_abunds') to GatherResult"
+        in str(exc)
+    )
diff --git a/tests/test_signature.py b/tests/test_signature.py
index 95ea058dc4..b82a02364e 100644
--- a/tests/test_signature.py
+++ b/tests/test_signature.py
@@ -3,8 +3,13 @@
 import pytest
 
 import sourmash
-from sourmash.signature import SourmashSignature, save_signatures, \
-    load_signatures, load_one_signature, FrozenSourmashSignature
+from sourmash.signature import (
+    SourmashSignature,
+    save_signatures,
+    load_signatures,
+    load_one_signature,
+    FrozenSourmashSignature,
+)
 import sourmash_tst_utils as utils
 from sourmash.minhash import MinHash, FrozenMinHash
 from sourmash_tst_utils import SourmashCommandFailed
@@ -13,7 +18,7 @@
 def test_minhash_copy(track_abundance):
     e = MinHash(n=1, ksize=20, track_abundance=track_abundance)
     e.add_kmer("AT" * 10)
-    sig = SourmashSignature(e, name='foo')
+    SourmashSignature(e, name="foo")
     f = e.copy()
     assert e == f
 
@@ -21,7 +26,7 @@ def test_minhash_copy(track_abundance):
 def test_sig_copy(track_abundance):
     e = MinHash(n=1, ksize=20, track_abundance=track_abundance)
     e.add_kmer("AT" * 10)
-    sig1 = SourmashSignature(e, name='foo')
+    sig1 = SourmashSignature(e, name="foo")
     sig2 = sig1.copy()
     assert sig1 == sig2
 
@@ -29,35 +34,35 @@ def test_sig_copy(track_abundance):
 def test_sig_copy_frozen(track_abundance):
     e = MinHash(n=1, ksize=20, track_abundance=track_abundance)
     e.add_kmer("AT" * 10)
-    sig1 = SourmashSignature(e, name='foo')
+    sig1 = SourmashSignature(e, name="foo")
     sig2 = sig1.copy()
     assert sig1 == sig2
     with pytest.raises(TypeError) as e:
         sig2.minhash.add_hash(5)
-    assert 'FrozenMinHash does not support modification' in str(e.value)
+    assert "FrozenMinHash does not support modification" in str(e.value)
 
 
 def test_sig_copy_frozen_mutable(track_abundance):
     e = MinHash(n=1, ksize=20, track_abundance=track_abundance)
     e.add_kmer("AT" * 10)
-    sig1 = SourmashSignature(e, name='foo')
+    sig1 = SourmashSignature(e, name="foo")
     sig1.minhash = sig1.minhash.to_mutable()
     sig2 = sig1.copy()
     assert sig1 == sig2
     with pytest.raises(TypeError) as e:
         sig2.minhash.add_hash(5)
-    assert 'FrozenMinHash does not support modification' in str(e.value)
+    assert "FrozenMinHash does not support modification" in str(e.value)
 
 
 def test_compare(track_abundance):
     # same content, same name -> equal
     e = MinHash(n=1, ksize=20, track_abundance=track_abundance)
     e.add_kmer("AT" * 10)
-    sig1 = SourmashSignature(e, name='foo')
+    SourmashSignature(e, name="foo")
 
     f = MinHash(n=1, ksize=20, track_abundance=track_abundance)
     f.add_kmer("AT" * 10)
-    sig2 = SourmashSignature(f, name='foo')
+    SourmashSignature(f, name="foo")
 
     assert e == f
 
@@ -66,11 +71,11 @@ def test_compare_ne(track_abundance):
     # same content, different names -> different
     e = MinHash(n=1, ksize=20, track_abundance=track_abundance)
     e.add_kmer("AT" * 10)
-    sig1 = SourmashSignature(e, name='foo')
+    sig1 = SourmashSignature(e, name="foo")
 
     f = MinHash(n=1, ksize=20, track_abundance=track_abundance)
     f.add_kmer("AT" * 10)
-    sig2 = SourmashSignature(f, name='bar')
+    sig2 = SourmashSignature(f, name="bar")
 
     assert sig1 != sig2
 
@@ -79,11 +84,11 @@ def test_compare_ne2(track_abundance):
     # same content, different filename -> different
     e = MinHash(n=1, ksize=20, track_abundance=track_abundance)
     e.add_kmer("AT" * 10)
-    sig1 = SourmashSignature(e, name='foo', filename='a')
+    sig1 = SourmashSignature(e, name="foo", filename="a")
 
     f = MinHash(n=1, ksize=20, track_abundance=track_abundance)
     f.add_kmer("AT" * 10)
-    sig2 = SourmashSignature(f, name='foo', filename='b')
+    sig2 = SourmashSignature(f, name="foo", filename="b")
 
     assert sig1 != sig2
     assert sig2 != sig1
@@ -93,11 +98,11 @@ def test_compare_ne2_reverse(track_abundance):
     # same content, one has filename, other does not -> different
     e = MinHash(n=1, ksize=20, track_abundance=track_abundance)
     e.add_kmer("AT" * 10)
-    sig1 = SourmashSignature(e, name='foo')
+    sig1 = SourmashSignature(e, name="foo")
 
     f = MinHash(n=1, ksize=20, track_abundance=track_abundance)
     f.add_kmer("AT" * 10)
-    sig2 = SourmashSignature(f, filename='b')
+    sig2 = SourmashSignature(f, filename="b")
 
     assert sig2 != sig1
     assert sig1 != sig2
@@ -124,8 +129,8 @@ def test_str(track_abundance):
     print(sig)
     assert repr(sig) == "SourmashSignature('', 59502a74)"
 
-    sig._name = 'fizbar'
-    assert repr(sig) == 'SourmashSignature(\'fizbar\', 59502a74)'
+    sig._name = "fizbar"
+    assert repr(sig) == "SourmashSignature('fizbar', 59502a74)"
 
 
 def test_roundtrip(track_abundance):
@@ -135,7 +140,6 @@ def test_roundtrip(track_abundance):
     s = save_signatures([sig])
     siglist = list(load_signatures(s))
     sig2 = siglist[0]
-    e2 = sig2.minhash
 
     assert sig.similarity(sig2) == 1.0
     assert sig2.similarity(sig) == 1.0
@@ -164,9 +168,8 @@ def test_load_signature_ksize_nonint(track_abundance):
     e.add_kmer("AT" * 10)
     sig = SourmashSignature(e)
     s = save_signatures([sig])
-    siglist = list(load_signatures(s, ksize='20'))
+    siglist = list(load_signatures(s, ksize="20"))
     sig2 = siglist[0]
-    e2 = sig2.minhash
 
     assert sig.similarity(sig2) == 1.0
     assert sig2.similarity(sig) == 1.0
@@ -180,15 +183,13 @@ def test_roundtrip_empty(track_abundance):
     s = save_signatures([sig])
     siglist = list(load_signatures(s))
     sig2 = siglist[0]
-    e2 = sig2.minhash
 
     assert sig.similarity(sig2) == 0
     assert sig2.similarity(sig) == 0
 
 
 def test_roundtrip_scaled(track_abundance):
-    e = MinHash(n=0, ksize=20, track_abundance=track_abundance,
-                         max_hash=10)
+    e = MinHash(n=0, ksize=20, track_abundance=track_abundance, max_hash=10)
     e.add_hash(5)
     sig = SourmashSignature(e)
     s = save_signatures([sig])
@@ -203,8 +204,7 @@ def test_roundtrip_scaled(track_abundance):
 
 
 def test_roundtrip_seed(track_abundance):
-    e = MinHash(n=1, ksize=20, track_abundance=track_abundance,
-                         seed=10)
+    e = MinHash(n=1, ksize=20, track_abundance=track_abundance, seed=10)
     e.add_hash(5)
     sig = SourmashSignature(e)
     s = save_signatures([sig])
@@ -219,26 +219,24 @@ def test_roundtrip_seed(track_abundance):
 
 
 def test_similarity_downsample(track_abundance):
-    e = MinHash(n=0, ksize=20, track_abundance=track_abundance,
-                         max_hash=2**63)
-    f = MinHash(n=0, ksize=20, track_abundance=track_abundance,
-                         max_hash=2**2)
+    e = MinHash(n=0, ksize=20, track_abundance=track_abundance, max_hash=2**63)
+    f = MinHash(n=0, ksize=20, track_abundance=track_abundance, max_hash=2**2)
 
     e.add_hash(1)
     e.add_hash(5)
     assert len(e.hashes) == 2
 
     f.add_hash(1)
-    f.add_hash(5)                 # should be discarded due to max_hash
+    f.add_hash(5)  # should be discarded due to max_hash
     assert len(f.hashes) == 1
 
     ee = SourmashSignature(e)
     ff = SourmashSignature(f)
 
-    with pytest.raises(ValueError) as e:       # mismatch in max_hash
+    with pytest.raises(ValueError) as e:  # mismatch in max_hash
         ee.similarity(ff)
 
-    assert 'mismatch in scaled; comparison fail' in str(e.value)
+    assert "mismatch in scaled; comparison fail" in str(e.value)
 
     x = ee.similarity(ff, downsample=True)
     assert round(x, 1) == 1.0
@@ -252,33 +250,32 @@ def test_add_sequence_bad_dna(track_abundance):
     with pytest.raises(ValueError) as e:
         sig.add_sequence("N" * 21, force=False)
 
-    assert 'invalid DNA character in input k-mer: NNNNNNNNNNNNNNNNNNNNN' in str(e.value)
+    assert "invalid DNA character in input k-mer: NNNNNNNNNNNNNNNNNNNNN" in str(e.value)
 
 
 def test_md5(track_abundance):
     e = MinHash(n=1, ksize=20, track_abundance=track_abundance)
     e.add_hash(5)
     sig = SourmashSignature(e)
-    assert sig.md5sum() == 'eae27d77ca20db309e056e3d2dcd7d69', sig.md5sum()
+    assert sig.md5sum() == "eae27d77ca20db309e056e3d2dcd7d69", sig.md5sum()
 
 
 def test_str_1(track_abundance):
     e = MinHash(n=1, ksize=20, track_abundance=track_abundance)
-    sig = SourmashSignature(e, name='foo')
-    assert str(sig) == 'foo'
+    sig = SourmashSignature(e, name="foo")
+    assert str(sig) == "foo"
 
 
 def test_str_2(track_abundance):
     e = MinHash(n=1, ksize=20, track_abundance=track_abundance)
-    sig = SourmashSignature(e, filename='foo.txt')
-    assert str(sig) == 'foo.txt'
+    sig = SourmashSignature(e, filename="foo.txt")
+    assert str(sig) == "foo.txt"
 
 
 def test_str_3(track_abundance):
     e = MinHash(n=1, ksize=20, track_abundance=track_abundance)
-    sig = SourmashSignature(e, name='foo',
-                            filename='foo.txt')
-    assert str(sig) == 'foo'
+    sig = SourmashSignature(e, name="foo", filename="foo.txt")
+    assert str(sig) == "foo"
 
 
 def test_name_4(track_abundance):
@@ -300,7 +297,7 @@ def test_save_load_multisig(track_abundance):
     print(x)
 
     assert len(y) == 2
-    assert sig1 in y                      # order not guaranteed, note.
+    assert sig1 in y  # order not guaranteed, note.
     assert sig2 in y
     assert sig1 != sig2
 
@@ -309,7 +306,7 @@ def test_load_one_fail_nosig(track_abundance):
     x = save_signatures([])
     print((x,))
     with pytest.raises(ValueError):
-        y = load_one_signature(x)
+        load_one_signature(x)
 
 
 def test_load_one_succeed(track_abundance):
@@ -332,7 +329,7 @@ def test_load_one_fail_multisig(track_abundance):
     x = save_signatures([sig1, sig2])
 
     with pytest.raises(ValueError):
-        y = load_one_signature(x)
+        load_one_signature(x)
 
 
 def test_save_minified(track_abundance):
@@ -343,24 +340,24 @@ def test_save_minified(track_abundance):
     sig2 = SourmashSignature(e2, name="bar baz")
 
     x = save_signatures([sig1, sig2])
-    assert b'\n' not in x
-    assert len(x.split(b'\n')) == 1
+    assert b"\n" not in x
+    assert len(x.split(b"\n")) == 1
 
     y = list(load_signatures(x))
     assert len(y) == 2
-    assert any(sig.name == 'foo' for sig in y)
-    assert any(sig.name == 'bar baz' for sig in y)
+    assert any(sig.name == "foo" for sig in y)
+    assert any(sig.name == "bar baz" for sig in y)
 
 
 def test_load_minified(track_abundance):
-    sigfile = utils.get_test_data('genome-s10+s11.sig')
+    sigfile = utils.get_test_data("genome-s10+s11.sig")
     sigs = load_signatures(sigfile)
 
     minified = save_signatures(sigs)
-    with open(sigfile, 'r') as f:
+    with open(sigfile) as f:
         orig_file = f.read()
     assert len(minified) < len(orig_file)
-    assert b'\n' not in minified
+    assert b"\n" not in minified
 
 
 def test_load_compressed(track_abundance):
@@ -372,8 +369,8 @@ def test_load_compressed(track_abundance):
     y = load_one_signature(x)
     assert sig1 == y
 
-    sigfile = utils.get_test_data('genome-s10+s11.sig.gz')
-    sigs = load_signatures(sigfile)
+    sigfile = utils.get_test_data("genome-s10+s11.sig.gz")
+    load_signatures(sigfile)
 
 
 def test_binary_fp(tmpdir, track_abundance):
@@ -381,9 +378,9 @@ def test_binary_fp(tmpdir, track_abundance):
     e.add_kmer("AT" * 10)
 
     path = tmpdir.join("1.sig")
-    with open(str(path), 'wb') as fp:
+    with open(str(path), "wb") as fp:
         sig = SourmashSignature(e)
-        s = save_signatures([sig], fp)
+        save_signatures([sig], fp)
 
 
 def test_load_signatures_no_file_do_raise(tmpdir):
@@ -409,10 +406,10 @@ def test_max_containment():
     ss1 = SourmashSignature(mh1)
     ss2 = SourmashSignature(mh2)
 
-    assert ss1.contained_by(ss2) == 1/4
-    assert ss2.contained_by(ss1) == 1/2
-    assert ss1.max_containment(ss2) == 1/2
-    assert ss2.max_containment(ss1) == 1/2
+    assert ss1.contained_by(ss2) == 1 / 4
+    assert ss2.contained_by(ss1) == 1 / 2
+    assert ss1.max_containment(ss2) == 1 / 2
+    assert ss2.max_containment(ss1) == 1 / 2
 
 
 def test_max_containment_empty():
@@ -447,32 +444,44 @@ def test_max_containment_equal():
 
 
 def test_containment_ANI():
-    f1 = utils.get_test_data('2.fa.sig')
-    f2 = utils.get_test_data('2+63.fa.sig')
+    f1 = utils.get_test_data("2.fa.sig")
+    f2 = utils.get_test_data("2+63.fa.sig")
     ss1 = sourmash.load_one_signature(f1, ksize=31)
     ss2 = sourmash.load_one_signature(f2, ksize=31)
 
-    s1_cont_s2 = ss1.containment_ani(ss2, estimate_ci =True)
-    s2_cont_s1 = ss2.containment_ani(ss1, estimate_ci =True)
+    s1_cont_s2 = ss1.containment_ani(ss2, estimate_ci=True)
+    s2_cont_s1 = ss2.containment_ani(ss1, estimate_ci=True)
     print("\nss1 contained by ss2", s1_cont_s2)
     print("ss2 contained by ss1", s2_cont_s1)
 
-    assert (round(s1_cont_s2.ani,3), s1_cont_s2.ani_low, s1_cont_s2.ani_high) == (1.0,1.0,1.0)
-    assert (round(s2_cont_s1.ani,3), round(s2_cont_s1.ani_low,3), round(s2_cont_s1.ani_high,3)) == (0.966, 0.965, 0.967)
-
-    s1_mc_s2 = ss1.max_containment_ani(ss2, estimate_ci =True)
-    s2_mc_s1 = ss2.max_containment_ani(ss1, estimate_ci =True)
+    assert (round(s1_cont_s2.ani, 3), s1_cont_s2.ani_low, s1_cont_s2.ani_high) == (
+        1.0,
+        1.0,
+        1.0,
+    )
+    assert (
+        round(s2_cont_s1.ani, 3),
+        round(s2_cont_s1.ani_low, 3),
+        round(s2_cont_s1.ani_high, 3),
+    ) == (0.966, 0.965, 0.967)
+
+    s1_mc_s2 = ss1.max_containment_ani(ss2, estimate_ci=True)
+    s2_mc_s1 = ss2.max_containment_ani(ss1, estimate_ci=True)
     print("mh1 max containment", s1_mc_s2)
     print("mh2 max containment", s2_mc_s1)
     s1_mc_s2.size_is_inaccurate = False
     s2_mc_s1.size_is_inaccurate = False
     assert s1_mc_s2 == s2_mc_s1
-    assert (round(s1_mc_s2.ani, 3), round(s1_mc_s2.ani_low, 3), round(s1_mc_s2.ani_high, 3)) == (1.0,1.0,1.0)
+    assert (
+        round(s1_mc_s2.ani, 3),
+        round(s1_mc_s2.ani_low, 3),
+        round(s1_mc_s2.ani_high, 3),
+    ) == (1.0, 1.0, 1.0)
 
 
 def test_containment_ANI_precalc_containment():
-    f1 = utils.get_test_data('47+63.fa.sig')
-    f2 = utils.get_test_data('2+63.fa.sig')
+    f1 = utils.get_test_data("47+63.fa.sig")
+    f2 = utils.get_test_data("2+63.fa.sig")
     ss1 = sourmash.load_one_signature(f1, ksize=31)
     ss2 = sourmash.load_one_signature(f2, ksize=31)
     # precalc containments and assert same results
@@ -480,38 +489,53 @@ def test_containment_ANI_precalc_containment():
     s2c = ss2.contained_by(ss1)
     mc = max(s1c, s2c)
 
-    assert ss1.containment_ani(ss2, estimate_ci=True) ==  ss1.containment_ani(ss2, containment=s1c, estimate_ci=True)
-    assert ss2.containment_ani(ss1) ==  ss2.containment_ani(ss1, containment=s2c)
-    assert ss1.max_containment_ani(ss2) ==  ss2.max_containment_ani(ss1)
-    assert ss1.max_containment_ani(ss2) ==  ss1.max_containment_ani(ss2, max_containment=mc)
-    assert ss1.max_containment_ani(ss2) ==  ss2.max_containment_ani(ss1, max_containment=mc)
+    assert ss1.containment_ani(ss2, estimate_ci=True) == ss1.containment_ani(
+        ss2, containment=s1c, estimate_ci=True
+    )
+    assert ss2.containment_ani(ss1) == ss2.containment_ani(ss1, containment=s2c)
+    assert ss1.max_containment_ani(ss2) == ss2.max_containment_ani(ss1)
+    assert ss1.max_containment_ani(ss2) == ss1.max_containment_ani(
+        ss2, max_containment=mc
+    )
+    assert ss1.max_containment_ani(ss2) == ss2.max_containment_ani(
+        ss1, max_containment=mc
+    )
 
 
 def test_avg_containment():
-    f1 = utils.get_test_data('47+63.fa.sig')
-    f2 = utils.get_test_data('2+63.fa.sig')
+    f1 = utils.get_test_data("47+63.fa.sig")
+    f2 = utils.get_test_data("2+63.fa.sig")
     ss1 = sourmash.load_one_signature(f1, ksize=31)
     ss2 = sourmash.load_one_signature(f2, ksize=31)
     # check average_containment_ani
     ac_s1 = ss1.avg_containment(ss2)
     ac_s2 = ss2.avg_containment(ss1)
-    assert ac_s1 == ac_s2 == (ss1.contained_by(ss2) + ss2.contained_by(ss1))/2 == 0.6619979467456603
+    assert (
+        ac_s1
+        == ac_s2
+        == (ss1.contained_by(ss2) + ss2.contained_by(ss1)) / 2
+        == 0.6619979467456603
+    )
 
 
 def test_avg_containment_ani():
-    f1 = utils.get_test_data('47+63.fa.sig')
-    f2 = utils.get_test_data('2+63.fa.sig')
+    f1 = utils.get_test_data("47+63.fa.sig")
+    f2 = utils.get_test_data("2+63.fa.sig")
     ss1 = sourmash.load_one_signature(f1, ksize=31)
     ss2 = sourmash.load_one_signature(f2, ksize=31)
     # check average_containment_ani
     ac_s1 = ss1.avg_containment_ani(ss2)
     ac_s2 = ss2.avg_containment_ani(ss1)
-    assert ac_s1 == ac_s2 == (ss1.containment_ani(ss2).ani + ss2.containment_ani(ss1).ani)/2 
+    assert (
+        ac_s1
+        == ac_s2
+        == (ss1.containment_ani(ss2).ani + ss2.containment_ani(ss1).ani) / 2
+    )
 
 
 def test_containment_ANI_downsample():
-    f2 = utils.get_test_data('2+63.fa.sig')
-    f3 = utils.get_test_data('47+63.fa.sig')
+    f2 = utils.get_test_data("2+63.fa.sig")
+    f3 = utils.get_test_data("47+63.fa.sig")
     ss2 = sourmash.load_one_signature(f2, ksize=31)
     ss3 = sourmash.load_one_signature(f3, ksize=31)
     # check that downsampling works properly
@@ -522,8 +546,8 @@ def test_containment_ANI_downsample():
     assert ss2.minhash.scaled != ss3.minhash.scaled
     ds_s3c = ss2.containment_ani(ss3, downsample=True)
     ds_s4c = ss3.containment_ani(ss2, downsample=True)
-    mc_w_ds_1 =  ss2.max_containment_ani(ss3, downsample=True)
-    mc_w_ds_2 =  ss3.max_containment_ani(ss2, downsample=True)
+    mc_w_ds_1 = ss2.max_containment_ani(ss3, downsample=True)
+    mc_w_ds_2 = ss3.max_containment_ani(ss2, downsample=True)
 
     with pytest.raises(ValueError) as e:
         ss2.containment_ani(ss3)
@@ -538,15 +562,15 @@ def test_containment_ANI_downsample():
     assert ss2.minhash.scaled == ss3.minhash.scaled
     ds_s3c_manual = ss2.containment_ani(ss3)
     ds_s4c_manual = ss3.containment_ani(ss2)
-    ds_mc_manual =  ss2.max_containment_ani(ss3)
+    ds_mc_manual = ss2.max_containment_ani(ss3)
     assert ds_s3c == ds_s3c_manual
     assert ds_s4c == ds_s4c_manual
     assert mc_w_ds_1 == mc_w_ds_2 == ds_mc_manual
 
 
 def test_jaccard_ANI():
-    f1 = utils.get_test_data('2.fa.sig')
-    f2 = utils.get_test_data('2+63.fa.sig')
+    f1 = utils.get_test_data("2.fa.sig")
+    f2 = utils.get_test_data("2+63.fa.sig")
     ss1 = sourmash.load_one_signature(f1, ksize=31)
     ss2 = sourmash.load_one_signature(f2)
 
@@ -556,12 +580,16 @@ def test_jaccard_ANI():
     s2_jani_s1 = ss2.jaccard_ani(ss1)
 
     assert s1_jani_s2 == s2_jani_s1
-    assert (s1_jani_s2.ani, s1_jani_s2.p_nothing_in_common, s1_jani_s2.jaccard_error) == (0.9783711630110239, 0.0, 3.891666770716877e-07)
+    assert (
+        s1_jani_s2.ani,
+        s1_jani_s2.p_nothing_in_common,
+        s1_jani_s2.jaccard_error,
+    ) == (0.9783711630110239, 0.0, 3.891666770716877e-07)
 
 
 def test_jaccard_ANI_untrustworthy():
-    f1 = utils.get_test_data('2.fa.sig')
-    f2 = utils.get_test_data('2+63.fa.sig')
+    f1 = utils.get_test_data("2.fa.sig")
+    f2 = utils.get_test_data("2+63.fa.sig")
     ss1 = sourmash.load_one_signature(f1, ksize=31)
     ss2 = sourmash.load_one_signature(f2)
 
@@ -572,28 +600,32 @@ def test_jaccard_ANI_untrustworthy():
     # since size is inaccurate on 2.fa.sig, need to override to be able to get ani
     s1_jani_s2.size_is_inaccurate = False
 
-    assert s1_jani_s2.ani == None
-    assert s1_jani_s2.je_exceeds_threshold==True
+    assert s1_jani_s2.ani is None
+    assert s1_jani_s2.je_exceeds_threshold == True
     assert s1_jani_s2.je_threshold == 1e-7
 
 
 def test_jaccard_ANI_precalc_jaccard():
-    f1 = utils.get_test_data('47+63.fa.sig')
-    f2 = utils.get_test_data('2+63.fa.sig')
+    f1 = utils.get_test_data("47+63.fa.sig")
+    f2 = utils.get_test_data("2+63.fa.sig")
     ss1 = sourmash.load_one_signature(f1, ksize=31)
     ss2 = sourmash.load_one_signature(f2)
     # precalc jaccard and assert same result
     jaccard = ss1.jaccard(ss2)
-    print("\nJACCARD_ANI", ss1.jaccard_ani(ss2,jaccard=jaccard))
+    print("\nJACCARD_ANI", ss1.jaccard_ani(ss2, jaccard=jaccard))
 
-    assert ss1.jaccard_ani(ss2) == ss1.jaccard_ani(ss2, jaccard=jaccard) == ss2.jaccard_ani(ss1, jaccard=jaccard)
+    assert (
+        ss1.jaccard_ani(ss2)
+        == ss1.jaccard_ani(ss2, jaccard=jaccard)
+        == ss2.jaccard_ani(ss1, jaccard=jaccard)
+    )
     wrong_jaccard = jaccard - 0.1
     assert ss1.jaccard_ani(ss2) != ss1.jaccard_ani(ss2, jaccard=wrong_jaccard)
 
 
 def test_jaccard_ANI_downsample():
-    f1 = utils.get_test_data('47+63.fa.sig')
-    f2 = utils.get_test_data('2+63.fa.sig')
+    f1 = utils.get_test_data("47+63.fa.sig")
+    f2 = utils.get_test_data("2+63.fa.sig")
     ss1 = sourmash.load_one_signature(f1, ksize=31)
     ss2 = sourmash.load_one_signature(f2)
 
@@ -619,10 +651,10 @@ def test_frozen_signature_update_1(track_abundance):
     # setting .name should fail on a FrozenSourmashSignature
     e = MinHash(n=1, ksize=20, track_abundance=track_abundance)
     e.add_kmer("AT" * 10)
-    ss = SourmashSignature(e, name='foo').to_frozen()
+    ss = SourmashSignature(e, name="foo").to_frozen()
 
     with pytest.raises(ValueError):
-        ss.name = 'foo2'
+        ss.name = "foo2"
 
 
 def test_frozen_signature_update_2(track_abundance):
@@ -630,7 +662,7 @@ def test_frozen_signature_update_2(track_abundance):
     e = MinHash(n=1, ksize=20, track_abundance=track_abundance)
     e.add_kmer("AT" * 10)
     e2 = e.copy_and_clear()
-    ss = SourmashSignature(e, name='foo').to_frozen()
+    ss = SourmashSignature(e, name="foo").to_frozen()
 
     with pytest.raises(ValueError):
         ss.minhash = e2
@@ -640,9 +672,9 @@ def test_frozen_signature_update_3(track_abundance):
     # setting .minhash should succeed with update() context manager
     e = MinHash(n=1, ksize=20, track_abundance=track_abundance)
     e.add_kmer("AT" * 10)
-    ss = SourmashSignature(e, name='foo').to_frozen()
+    ss = SourmashSignature(e, name="foo").to_frozen()
 
     with ss.update() as ss2:
-        ss2.name = 'foo2'
+        ss2.name = "foo2"
 
-    assert ss2.name == 'foo2'
+    assert ss2.name == "foo2"
diff --git a/tests/test_sketchcomparison.py b/tests/test_sketchcomparison.py
index 30282895fc..5b7e78537d 100644
--- a/tests/test_sketchcomparison.py
+++ b/tests/test_sketchcomparison.py
@@ -11,14 +11,15 @@
 
 import sourmash_tst_utils as utils
 
+
 # can we parameterize scaled too (so don't need separate downsample tests?)
 def test_FracMinHashComparison(track_abundance):
     # build FracMinHash Comparison and check values
     a = MinHash(0, 21, scaled=1, track_abundance=track_abundance)
     b = MinHash(0, 21, scaled=1, track_abundance=track_abundance)
 
-    a_values = { 1:5, 3:3, 5:2, 8:2}
-    b_values = { 1:3, 3:2, 5:1, 6:1, 8:1, 10:1 }
+    a_values = {1: 5, 3: 3, 5: 2, 8: 2}
+    b_values = {1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1}
 
     if track_abundance:
         a.set_abundances(a_values)
@@ -43,23 +44,45 @@ def test_FracMinHashComparison(track_abundance):
     intersect_mh = a.flatten().intersection(b.flatten())
     assert cmp.intersect_mh == intersect_mh == b.flatten().intersection(a.flatten())
     assert cmp.total_unique_intersect_hashes == 4
-    assert cmp.pass_threshold # default threshold is 0; this should pass
+    assert cmp.pass_threshold  # default threshold is 0; this should pass
     if track_abundance:
-        assert cmp.angular_similarity == a.angular_similarity(b) == b.angular_similarity(a)
-        assert cmp.cosine_similarity == a.angular_similarity(b) == b.angular_similarity(a)
-        assert cmp.weighted_intersection(from_mh=cmp.mh1).hashes == intersect_mh.inflate(a).hashes
-        assert cmp.weighted_intersection(from_mh=cmp.mh2).hashes == intersect_mh.inflate(b).hashes
-        assert cmp.weighted_intersection(from_abundD=a_values).hashes == intersect_mh.inflate(a).hashes
-        assert cmp.weighted_intersection(from_abundD=b_values).hashes == intersect_mh.inflate(b).hashes
+        assert (
+            cmp.angular_similarity == a.angular_similarity(b) == b.angular_similarity(a)
+        )
+        assert (
+            cmp.cosine_similarity == a.angular_similarity(b) == b.angular_similarity(a)
+        )
+        assert (
+            cmp.weighted_intersection(from_mh=cmp.mh1).hashes
+            == intersect_mh.inflate(a).hashes
+        )
+        assert (
+            cmp.weighted_intersection(from_mh=cmp.mh2).hashes
+            == intersect_mh.inflate(b).hashes
+        )
+        assert (
+            cmp.weighted_intersection(from_abundD=a_values).hashes
+            == intersect_mh.inflate(a).hashes
+        )
+        assert (
+            cmp.weighted_intersection(from_abundD=b_values).hashes
+            == intersect_mh.inflate(b).hashes
+        )
     else:
         with pytest.raises(TypeError) as exc:
             cmp.angular_similarity
         print(str(exc))
-        assert "Error: Angular (cosine) similarity requires both sketches to track hash abundance." in str(exc)
+        assert (
+            "Error: Angular (cosine) similarity requires both sketches to track hash abundance."
+            in str(exc)
+        )
         with pytest.raises(TypeError) as exc:
             cmp.cosine_similarity
         print(str(exc))
-        assert "Error: Angular (cosine) similarity requires both sketches to track hash abundance." in str(exc)
+        assert (
+            "Error: Angular (cosine) similarity requires both sketches to track hash abundance."
+            in str(exc)
+        )
         assert cmp.weighted_intersection(from_mh=cmp.mh1).hashes == intersect_mh.hashes
         assert cmp.weighted_intersection(from_mh=cmp.mh2).hashes == intersect_mh.hashes
 
@@ -69,8 +92,8 @@ def test_FracMinHashComparison_downsample(track_abundance):
     a = MinHash(0, 21, scaled=1, track_abundance=track_abundance)
     b = MinHash(0, 21, scaled=1, track_abundance=track_abundance)
 
-    a_values = { 1:5, 3:3, 5:2, 8:2}
-    b_values = { 1:3, 3:2, 5:1, 6:1, 8:1, 10:1 }
+    a_values = {1: 5, 3: 3, 5: 2, 8: 2}
+    b_values = {1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1}
 
     if track_abundance:
         a.set_abundances(a_values)
@@ -84,7 +107,7 @@ def test_FracMinHashComparison_downsample(track_abundance):
     ds_b = b.downsample(scaled=cmp_scaled)
 
     # build FracMinHashComparison
-    cmp = FracMinHashComparison(a, b, cmp_scaled = cmp_scaled)
+    cmp = FracMinHashComparison(a, b, cmp_scaled=cmp_scaled)
     assert cmp.mh1 == a
     assert cmp.mh2 == b
     assert cmp.mh1_cmp == ds_a
@@ -99,27 +122,59 @@ def test_FracMinHashComparison_downsample(track_abundance):
     assert cmp.max_containment == ds_a.max_containment(ds_b)
     assert cmp.jaccard == ds_a.jaccard(ds_b) == ds_b.jaccard(ds_a)
     intersect_mh = ds_a.flatten().intersection(ds_b.flatten())
-    assert cmp.intersect_mh == intersect_mh == ds_b.flatten().intersection(ds_a.flatten())
+    assert (
+        cmp.intersect_mh == intersect_mh == ds_b.flatten().intersection(ds_a.flatten())
+    )
     assert cmp.total_unique_intersect_hashes == 8
-    assert cmp.pass_threshold # default threshold is 0; this should pass
+    assert cmp.pass_threshold  # default threshold is 0; this should pass
     if track_abundance:
-        assert cmp.angular_similarity == ds_a.angular_similarity(ds_b) == ds_b.angular_similarity(ds_a)
-        assert cmp.cosine_similarity == ds_a.angular_similarity(ds_b) == ds_b.angular_similarity(ds_a)
-        assert cmp.weighted_intersection(from_mh=cmp.mh1_cmp).hashes == intersect_mh.inflate(ds_a).hashes
-        assert cmp.weighted_intersection(from_mh=cmp.mh2_cmp).hashes == intersect_mh.inflate(ds_b).hashes
-        assert cmp.weighted_intersection(from_abundD=cmp.mh1_cmp.hashes).hashes == intersect_mh.inflate(ds_a).hashes
-        assert cmp.weighted_intersection(from_abundD=cmp.mh2_cmp.hashes).hashes == intersect_mh.inflate(ds_b).hashes
+        assert (
+            cmp.angular_similarity
+            == ds_a.angular_similarity(ds_b)
+            == ds_b.angular_similarity(ds_a)
+        )
+        assert (
+            cmp.cosine_similarity
+            == ds_a.angular_similarity(ds_b)
+            == ds_b.angular_similarity(ds_a)
+        )
+        assert (
+            cmp.weighted_intersection(from_mh=cmp.mh1_cmp).hashes
+            == intersect_mh.inflate(ds_a).hashes
+        )
+        assert (
+            cmp.weighted_intersection(from_mh=cmp.mh2_cmp).hashes
+            == intersect_mh.inflate(ds_b).hashes
+        )
+        assert (
+            cmp.weighted_intersection(from_abundD=cmp.mh1_cmp.hashes).hashes
+            == intersect_mh.inflate(ds_a).hashes
+        )
+        assert (
+            cmp.weighted_intersection(from_abundD=cmp.mh2_cmp.hashes).hashes
+            == intersect_mh.inflate(ds_b).hashes
+        )
     else:
         with pytest.raises(TypeError) as exc:
             cmp.angular_similarity
         print(str(exc))
-        assert "Error: Angular (cosine) similarity requires both sketches to track hash abundance." in str(exc)
+        assert (
+            "Error: Angular (cosine) similarity requires both sketches to track hash abundance."
+            in str(exc)
+        )
         with pytest.raises(TypeError) as exc:
             cmp.cosine_similarity
         print(str(exc))
-        assert "Error: Angular (cosine) similarity requires both sketches to track hash abundance." in str(exc)
-        assert cmp.weighted_intersection(from_mh=cmp.mh1_cmp).hashes == intersect_mh.hashes
-        assert cmp.weighted_intersection(from_mh=cmp.mh2_cmp).hashes == intersect_mh.hashes
+        assert (
+            "Error: Angular (cosine) similarity requires both sketches to track hash abundance."
+            in str(exc)
+        )
+        assert (
+            cmp.weighted_intersection(from_mh=cmp.mh1_cmp).hashes == intersect_mh.hashes
+        )
+        assert (
+            cmp.weighted_intersection(from_mh=cmp.mh2_cmp).hashes == intersect_mh.hashes
+        )
 
 
 def test_FracMinHashComparison_autodownsample(track_abundance):
@@ -127,8 +182,8 @@ def test_FracMinHashComparison_autodownsample(track_abundance):
     a = MinHash(0, 21, scaled=1, track_abundance=track_abundance)
     b = MinHash(0, 21, scaled=2, track_abundance=track_abundance)
 
-    a_values = { 1:5, 3:3, 5:2, 8:2}
-    b_values = { 1:3, 3:2, 5:1, 6:1, 8:1, 10:1 }
+    a_values = {1: 5, 3: 3, 5: 2, 8: 2}
+    b_values = {1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1}
 
     if track_abundance:
         a.set_abundances(a_values)
@@ -157,27 +212,59 @@ def test_FracMinHashComparison_autodownsample(track_abundance):
     assert cmp.max_containment == ds_a.max_containment(ds_b)
     assert cmp.jaccard == ds_a.jaccard(ds_b) == ds_b.jaccard(ds_a)
     intersect_mh = ds_a.flatten().intersection(ds_b.flatten())
-    assert cmp.intersect_mh == intersect_mh == ds_b.flatten().intersection(ds_a.flatten())
+    assert (
+        cmp.intersect_mh == intersect_mh == ds_b.flatten().intersection(ds_a.flatten())
+    )
     assert cmp.total_unique_intersect_hashes == 8
-    assert cmp.pass_threshold # default threshold is 0; this should pass
+    assert cmp.pass_threshold  # default threshold is 0; this should pass
     if track_abundance:
-        assert cmp.angular_similarity == ds_a.angular_similarity(ds_b) == ds_b.angular_similarity(ds_a)
-        assert cmp.cosine_similarity == ds_a.angular_similarity(ds_b) == ds_b.angular_similarity(ds_a)
-        assert cmp.weighted_intersection(from_mh=cmp.mh1_cmp).hashes == intersect_mh.inflate(ds_a).hashes
-        assert cmp.weighted_intersection(from_mh=cmp.mh2_cmp).hashes == intersect_mh.inflate(ds_b).hashes
-        assert cmp.weighted_intersection(from_abundD=a_values).hashes == intersect_mh.inflate(a).hashes
-        assert cmp.weighted_intersection(from_abundD=b_values).hashes == intersect_mh.inflate(b).hashes
+        assert (
+            cmp.angular_similarity
+            == ds_a.angular_similarity(ds_b)
+            == ds_b.angular_similarity(ds_a)
+        )
+        assert (
+            cmp.cosine_similarity
+            == ds_a.angular_similarity(ds_b)
+            == ds_b.angular_similarity(ds_a)
+        )
+        assert (
+            cmp.weighted_intersection(from_mh=cmp.mh1_cmp).hashes
+            == intersect_mh.inflate(ds_a).hashes
+        )
+        assert (
+            cmp.weighted_intersection(from_mh=cmp.mh2_cmp).hashes
+            == intersect_mh.inflate(ds_b).hashes
+        )
+        assert (
+            cmp.weighted_intersection(from_abundD=a_values).hashes
+            == intersect_mh.inflate(a).hashes
+        )
+        assert (
+            cmp.weighted_intersection(from_abundD=b_values).hashes
+            == intersect_mh.inflate(b).hashes
+        )
     else:
         with pytest.raises(TypeError) as exc:
             cmp.angular_similarity
         print(str(exc))
-        assert "Error: Angular (cosine) similarity requires both sketches to track hash abundance." in str(exc)
+        assert (
+            "Error: Angular (cosine) similarity requires both sketches to track hash abundance."
+            in str(exc)
+        )
         with pytest.raises(TypeError) as exc:
             cmp.cosine_similarity
         print(str(exc))
-        assert "Error: Angular (cosine) similarity requires both sketches to track hash abundance." in str(exc)
-        assert cmp.weighted_intersection(from_mh=cmp.mh1_cmp).hashes == intersect_mh.hashes
-        assert cmp.weighted_intersection(from_mh=cmp.mh2_cmp).hashes == intersect_mh.hashes
+        assert (
+            "Error: Angular (cosine) similarity requires both sketches to track hash abundance."
+            in str(exc)
+        )
+        assert (
+            cmp.weighted_intersection(from_mh=cmp.mh1_cmp).hashes == intersect_mh.hashes
+        )
+        assert (
+            cmp.weighted_intersection(from_mh=cmp.mh2_cmp).hashes == intersect_mh.hashes
+        )
 
 
 def test_FracMinHashComparison_ignore_abundance(track_abundance):
@@ -185,9 +272,8 @@ def test_FracMinHashComparison_ignore_abundance(track_abundance):
     a = MinHash(0, 21, scaled=1, track_abundance=track_abundance)
     b = MinHash(0, 21, scaled=1, track_abundance=track_abundance)
 
-    a_values = { 1:5, 3:3, 5:2, 8:2}
-    b_values = { 1:3, 3:2, 5:1, 6:1, 8:1, 10:1 }
-    intersection_w_abund = {1:8, 3:5, 5:3, 8:3}
+    a_values = {1: 5, 3: 3, 5: 2, 8: 2}
+    b_values = {1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1}
 
     if track_abundance:
         a.set_abundances(a_values)
@@ -201,7 +287,7 @@ def test_FracMinHashComparison_ignore_abundance(track_abundance):
     ds_b = b.flatten().downsample(scaled=cmp_scaled)
 
     # build FracMinHashComparison
-    cmp = FracMinHashComparison(a, b, cmp_scaled = cmp_scaled, ignore_abundance=True)
+    cmp = FracMinHashComparison(a, b, cmp_scaled=cmp_scaled, ignore_abundance=True)
     assert cmp.mh1 == a
     assert cmp.mh2 == b
     assert cmp.mh1_cmp == ds_a
@@ -216,18 +302,26 @@ def test_FracMinHashComparison_ignore_abundance(track_abundance):
     assert cmp.max_containment == ds_a.max_containment(ds_b)
     assert cmp.jaccard == ds_a.jaccard(ds_b) == ds_b.jaccard(ds_a)
     intersect_mh = ds_a.flatten().intersection(ds_b.flatten())
-    assert cmp.intersect_mh == intersect_mh == ds_b.flatten().intersection(ds_a.flatten())
+    assert (
+        cmp.intersect_mh == intersect_mh == ds_b.flatten().intersection(ds_a.flatten())
+    )
     assert cmp.total_unique_intersect_hashes == 8
-    assert cmp.pass_threshold # default threshold is 0; this should pass
+    assert cmp.pass_threshold  # default threshold is 0; this should pass
     # with ignore_abundance = True, all of these should not be usable. Do we want errors, or ""/None?
     with pytest.raises(TypeError) as exc:
         cmp.angular_similarity
     print(str(exc))
-    assert "Error: Angular (cosine) similarity requires both sketches to track hash abundance." in str(exc)
+    assert (
+        "Error: Angular (cosine) similarity requires both sketches to track hash abundance."
+        in str(exc)
+    )
     with pytest.raises(TypeError) as exc:
         cmp.cosine_similarity
     print(str(exc))
-    assert "Error: Angular (cosine) similarity requires both sketches to track hash abundance." in str(exc)
+    assert (
+        "Error: Angular (cosine) similarity requires both sketches to track hash abundance."
+        in str(exc)
+    )
     assert not cmp.mh1_cmp.track_abundance
     assert not cmp.mh2_cmp.track_abundance
     assert cmp.weighted_intersection(from_mh=cmp.mh1_cmp).hashes == intersect_mh.hashes
@@ -239,8 +333,8 @@ def test_FracMinHashComparison_fail_threshold(track_abundance):
     a = MinHash(0, 21, scaled=1, track_abundance=track_abundance)
     b = MinHash(0, 21, scaled=1, track_abundance=track_abundance)
 
-    a_values = { 1:5, 3:3, 5:2, 8:2}
-    b_values = { 1:3, 3:2, 5:1, 6:1, 8:1, 10:1 }
+    a_values = {1: 5, 3: 3, 5: 2, 8: 2}
+    b_values = {1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1}
 
     if track_abundance:
         a.set_abundances(a_values)
@@ -254,7 +348,7 @@ def test_FracMinHashComparison_fail_threshold(track_abundance):
     ds_b = b.flatten().downsample(scaled=cmp_scaled)
 
     # build FracMinHashComparison
-    cmp = FracMinHashComparison(a, b, cmp_scaled = cmp_scaled, threshold_bp=40)
+    cmp = FracMinHashComparison(a, b, cmp_scaled=cmp_scaled, threshold_bp=40)
     assert cmp.mh1 == a
     assert cmp.mh2 == b
     assert cmp.ignore_abundance == False
@@ -267,15 +361,19 @@ def test_FracMinHashComparison_fail_threshold(track_abundance):
     assert cmp.max_containment == ds_a.max_containment(ds_b)
     assert cmp.jaccard == a.jaccard(b) == b.jaccard(a)
     intersect_mh = ds_a.flatten().intersection(ds_b.flatten())
-    assert cmp.intersect_mh == intersect_mh == ds_b.flatten().intersection(ds_a.flatten())
+    assert (
+        cmp.intersect_mh == intersect_mh == ds_b.flatten().intersection(ds_a.flatten())
+    )
     assert cmp.total_unique_intersect_hashes == 8
-    assert not cmp.pass_threshold # threshold is 40; this should fail
+    assert not cmp.pass_threshold  # threshold is 40; this should fail
 
 
 def test_FracMinHashComparison_potential_false_negative():
-    f1 = utils.get_test_data('scaled100/GCF_000005845.2_ASM584v2_genomic.fna.gz.sig.gz')
-    f2 = utils.get_test_data('scaled100/GCF_000006945.1_ASM694v1_genomic.fna.gz.sig.gz')
-    f3 = utils.get_test_data('scaled100/GCF_000783305.1_ASM78330v1_genomic.fna.gz.sig.gz')
+    f1 = utils.get_test_data("scaled100/GCF_000005845.2_ASM584v2_genomic.fna.gz.sig.gz")
+    f2 = utils.get_test_data("scaled100/GCF_000006945.1_ASM694v1_genomic.fna.gz.sig.gz")
+    f3 = utils.get_test_data(
+        "scaled100/GCF_000783305.1_ASM78330v1_genomic.fna.gz.sig.gz"
+    )
     a = load_one_signature(f1, ksize=21).minhash
     b = load_one_signature(f2).minhash
     c = load_one_signature(f3).minhash
@@ -289,9 +387,17 @@ def test_FracMinHashComparison_potential_false_negative():
     cmp.estimate_jaccard_ani()
     assert cmp.jaccard_ani == a.jaccard_ani(b).ani == b.jaccard_ani(a).ani
     print(cmp.jaccard_ani)
-    assert cmp.potential_false_negative == a.jaccard_ani(b).p_exceeds_threshold == b.jaccard_ani(a).p_exceeds_threshold
+    assert (
+        cmp.potential_false_negative
+        == a.jaccard_ani(b).p_exceeds_threshold
+        == b.jaccard_ani(a).p_exceeds_threshold
+    )
     assert cmp.potential_false_negative == False
-    assert cmp.jaccard_ani_untrustworthy == a.jaccard_ani(b).je_exceeds_threshold == b.jaccard_ani(a).je_exceeds_threshold
+    assert (
+        cmp.jaccard_ani_untrustworthy
+        == a.jaccard_ani(b).je_exceeds_threshold
+        == b.jaccard_ani(a).je_exceeds_threshold
+    )
 
     cmp.estimate_ani_from_mh1_containment_in_mh2()
     a_cont_ani_manual = a.containment_ani(b)
@@ -308,12 +414,18 @@ def test_FracMinHashComparison_potential_false_negative():
 
     cmp.estimate_max_containment_ani()
     mc_ani_manual = a.max_containment_ani(b)
-    assert cmp.max_containment_ani == max(a.containment_ani(b).ani, b.containment_ani(a).ani) == mc_ani_manual.ani
+    assert (
+        cmp.max_containment_ani
+        == max(a.containment_ani(b).ani, b.containment_ani(a).ani)
+        == mc_ani_manual.ani
+    )
     assert cmp.potential_false_negative == mc_ani_manual.p_exceeds_threshold
-    assert cmp.avg_containment_ani == np.mean([a.containment_ani(b).ani, b.containment_ani(a).ani])
+    assert cmp.avg_containment_ani == np.mean(
+        [a.containment_ani(b).ani, b.containment_ani(a).ani]
+    )
     assert cmp.potential_false_negative == False
 
-    #downsample to where it becomes a potential false negative
+    # downsample to where it becomes a potential false negative
     cmp = FracMinHashComparison(a, b, cmp_scaled=16000)
     cmp.estimate_ani_from_mh1_containment_in_mh2()
     assert cmp.potential_false_negative == True
@@ -323,8 +435,8 @@ def test_FracMinHashComparison_incompatible_ksize(track_abundance):
     a = MinHash(0, 31, scaled=1, track_abundance=track_abundance)
     b = MinHash(0, 21, scaled=2, track_abundance=track_abundance)
 
-    a_values = { 1:5, 3:3, 5:2, 8:2}
-    b_values = { 1:3, 3:2, 5:1, 6:1, 8:1, 10:1 }
+    a_values = {1: 5, 3: 3, 5: 2, 8: 2}
+    b_values = {1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1}
 
     if track_abundance:
         a.set_abundances(a_values)
@@ -343,8 +455,8 @@ def test_FracMinHashComparison_incompatible_moltype(track_abundance):
     a = MinHash(0, 31, scaled=1, track_abundance=track_abundance)
     b = MinHash(0, 31, scaled=2, is_protein=True, track_abundance=track_abundance)
 
-    a_values = { 1:5, 3:3, 5:2, 8:2}
-    b_values = { 1:3, 3:2, 5:1, 6:1, 8:1, 10:1 }
+    a_values = {1: 5, 3: 3, 5: 2, 8: 2}
+    b_values = {1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1}
 
     if track_abundance:
         a.set_abundances(a_values)
@@ -363,8 +475,8 @@ def test_FracMinHashComparison_incompatible_sketchtype(track_abundance):
     a = MinHash(0, 31, scaled=1, track_abundance=track_abundance)
     b = MinHash(10, 31, track_abundance=track_abundance)
 
-    a_values = { 1:5, 3:3, 5:2, 8:2}
-    b_values = { 1:3, 3:2, 5:1, 6:1, 8:1, 10:1 }
+    a_values = {1: 5, 3: 3, 5: 2, 8: 2}
+    b_values = {1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1}
 
     if track_abundance:
         a.set_abundances(a_values)
@@ -384,8 +496,8 @@ def test_FracMinHashComparison_incompatible_cmp_scaled(track_abundance):
     a = MinHash(0, 31, scaled=1, track_abundance=track_abundance)
     b = MinHash(0, 31, scaled=10, track_abundance=track_abundance)
 
-    a_values = { 1:5, 3:3, 5:2, 8:2}
-    b_values = { 1:3, 3:2, 5:1, 6:1, 8:1, 10:1 }
+    a_values = {1: 5, 3: 3, 5: 2, 8: 2}
+    b_values = {1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1}
 
     if track_abundance:
         a.set_abundances(a_values)
@@ -395,7 +507,7 @@ def test_FracMinHashComparison_incompatible_cmp_scaled(track_abundance):
         b.add_many(b_values.keys())
 
     with pytest.raises(ValueError) as exc:
-        FracMinHashComparison(a, b, cmp_scaled = 1)
+        FracMinHashComparison(a, b, cmp_scaled=1)
     print(str(exc))
     assert "new scaled 1 is lower than current sample scaled 10" in str(exc)
 
@@ -404,8 +516,8 @@ def test_FracMinHashComparison_redownsample_without_scaled(track_abundance):
     a = MinHash(0, 31, scaled=1, track_abundance=track_abundance)
     b = MinHash(0, 31, scaled=10, track_abundance=track_abundance)
 
-    a_values = { 1:5, 3:3, 5:2, 8:2}
-    b_values = { 1:3, 3:2, 5:1, 6:1, 8:1, 10:1 }
+    a_values = {1: 5, 3: 3, 5: 2, 8: 2}
+    b_values = {1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1}
 
     if track_abundance:
         a.set_abundances(a_values)
@@ -429,8 +541,8 @@ def test_NumMinHashComparison(track_abundance):
     a = MinHash(10, 21, scaled=0, track_abundance=track_abundance)
     b = MinHash(10, 21, scaled=0, track_abundance=track_abundance)
 
-    a_values = { 1:5, 3:3, 5:2, 8:2}
-    b_values = { 1:3, 3:2, 5:1, 6:1, 8:1, 10:1 }
+    a_values = {1: 5, 3: 3, 5: 2, 8: 2}
+    b_values = {1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1}
 
     if track_abundance:
         a.set_abundances(a_values)
@@ -453,17 +565,27 @@ def test_NumMinHashComparison(track_abundance):
     intersect_mh = a.flatten().intersection(b.flatten())
     assert cmp.intersect_mh == intersect_mh == b.flatten().intersection(a.flatten())
     if track_abundance:
-        assert cmp.angular_similarity == a.angular_similarity(b) == b.angular_similarity(a)
-        assert cmp.cosine_similarity == a.angular_similarity(b) == b.angular_similarity(a)
+        assert (
+            cmp.angular_similarity == a.angular_similarity(b) == b.angular_similarity(a)
+        )
+        assert (
+            cmp.cosine_similarity == a.angular_similarity(b) == b.angular_similarity(a)
+        )
     else:
         with pytest.raises(TypeError) as exc:
             cmp.angular_similarity
         print(str(exc))
-        assert "Error: Angular (cosine) similarity requires both sketches to track hash abundance." in str(exc)
+        assert (
+            "Error: Angular (cosine) similarity requires both sketches to track hash abundance."
+            in str(exc)
+        )
         with pytest.raises(TypeError) as exc:
             cmp.cosine_similarity
         print(str(exc))
-        assert "Error: Angular (cosine) similarity requires both sketches to track hash abundance." in str(exc)
+        assert (
+            "Error: Angular (cosine) similarity requires both sketches to track hash abundance."
+            in str(exc)
+        )
 
 
 def test_NumMinHashComparison_downsample(track_abundance):
@@ -471,8 +593,8 @@ def test_NumMinHashComparison_downsample(track_abundance):
     a = MinHash(10, 21, scaled=0, track_abundance=track_abundance)
     b = MinHash(10, 21, scaled=0, track_abundance=track_abundance)
 
-    a_values = { 1:5, 3:3, 5:2, 8:2}
-    b_values = { 1:3, 3:2, 5:1, 6:1, 8:1, 10:1 }
+    a_values = {1: 5, 3: 3, 5: 2, 8: 2}
+    b_values = {1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1}
 
     if track_abundance:
         a.set_abundances(a_values)
@@ -487,7 +609,7 @@ def test_NumMinHashComparison_downsample(track_abundance):
     ds_a = a.downsample(num=cmp_num)
     ds_b = b.downsample(num=cmp_num)
     # build NumMinHashComparison
-    cmp = NumMinHashComparison(a, b, cmp_num = cmp_num)
+    cmp = NumMinHashComparison(a, b, cmp_num=cmp_num)
     assert cmp.mh1 == a
     assert cmp.mh2 == b
     assert cmp.ignore_abundance == False
@@ -496,19 +618,35 @@ def test_NumMinHashComparison_downsample(track_abundance):
     assert cmp.moltype == "DNA"
     assert cmp.jaccard == ds_a.jaccard(ds_b) == ds_b.jaccard(ds_a)
     intersect_mh = ds_a.flatten().intersection(ds_b.flatten())
-    assert cmp.intersect_mh == intersect_mh == ds_b.flatten().intersection(ds_a.flatten())
+    assert (
+        cmp.intersect_mh == intersect_mh == ds_b.flatten().intersection(ds_a.flatten())
+    )
     if track_abundance:
-        assert cmp.angular_similarity == ds_a.angular_similarity(ds_b) == ds_b.angular_similarity(ds_a)
-        assert cmp.cosine_similarity == ds_a.angular_similarity(ds_b) == ds_b.angular_similarity(ds_a)
+        assert (
+            cmp.angular_similarity
+            == ds_a.angular_similarity(ds_b)
+            == ds_b.angular_similarity(ds_a)
+        )
+        assert (
+            cmp.cosine_similarity
+            == ds_a.angular_similarity(ds_b)
+            == ds_b.angular_similarity(ds_a)
+        )
     else:
         with pytest.raises(TypeError) as exc:
             cmp.angular_similarity
         print(str(exc))
-        assert "Error: Angular (cosine) similarity requires both sketches to track hash abundance." in str(exc)
+        assert (
+            "Error: Angular (cosine) similarity requires both sketches to track hash abundance."
+            in str(exc)
+        )
         with pytest.raises(TypeError) as exc:
             cmp.cosine_similarity
         print(str(exc))
-        assert "Error: Angular (cosine) similarity requires both sketches to track hash abundance." in str(exc)
+        assert (
+            "Error: Angular (cosine) similarity requires both sketches to track hash abundance."
+            in str(exc)
+        )
 
 
 def test_NumMinHashComparison_autodownsample(track_abundance):
@@ -516,8 +654,8 @@ def test_NumMinHashComparison_autodownsample(track_abundance):
     a = MinHash(10, 21, scaled=0, track_abundance=track_abundance)
     b = MinHash(5, 21, scaled=0, track_abundance=track_abundance)
 
-    a_values = { 1:5, 3:3, 5:2, 8:2}
-    b_values = { 1:3, 3:2, 5:1, 6:1, 8:1, 10:1 }
+    a_values = {1: 5, 3: 3, 5: 2, 8: 2}
+    b_values = {1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1}
 
     if track_abundance:
         a.set_abundances(a_values)
@@ -541,27 +679,43 @@ def test_NumMinHashComparison_autodownsample(track_abundance):
     assert cmp.moltype == "DNA"
     assert cmp.jaccard == ds_a.jaccard(ds_b) == ds_b.jaccard(ds_a)
     intersect_mh = ds_a.flatten().intersection(ds_b.flatten())
-    assert cmp.intersect_mh == intersect_mh == ds_b.flatten().intersection(ds_a.flatten())
+    assert (
+        cmp.intersect_mh == intersect_mh == ds_b.flatten().intersection(ds_a.flatten())
+    )
     if track_abundance:
-        assert cmp.angular_similarity == ds_a.angular_similarity(ds_b) == ds_b.angular_similarity(ds_a)
-        assert cmp.cosine_similarity == ds_a.angular_similarity(ds_b) == ds_b.angular_similarity(ds_a)
+        assert (
+            cmp.angular_similarity
+            == ds_a.angular_similarity(ds_b)
+            == ds_b.angular_similarity(ds_a)
+        )
+        assert (
+            cmp.cosine_similarity
+            == ds_a.angular_similarity(ds_b)
+            == ds_b.angular_similarity(ds_a)
+        )
     else:
         with pytest.raises(TypeError) as exc:
             cmp.angular_similarity
         print(str(exc))
-        assert "Error: Angular (cosine) similarity requires both sketches to track hash abundance." in str(exc)
+        assert (
+            "Error: Angular (cosine) similarity requires both sketches to track hash abundance."
+            in str(exc)
+        )
         with pytest.raises(TypeError) as exc:
             cmp.cosine_similarity
         print(str(exc))
-        assert "Error: Angular (cosine) similarity requires both sketches to track hash abundance." in str(exc)
+        assert (
+            "Error: Angular (cosine) similarity requires both sketches to track hash abundance."
+            in str(exc)
+        )
 
 
 def test_NumMinHashComparison_incompatible_ksize(track_abundance):
     a_num = MinHash(20, 31, track_abundance=track_abundance)
     b_num = MinHash(10, 21, track_abundance=track_abundance)
 
-    a_values = { 1:5, 3:3, 5:2, 8:2}
-    b_values = { 1:3, 3:2, 5:1, 6:1, 8:1, 10:1 }
+    a_values = {1: 5, 3: 3, 5: 2, 8: 2}
+    b_values = {1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1}
 
     if track_abundance:
         a_num.set_abundances(a_values)
@@ -581,8 +735,8 @@ def test_NumMinHashComparison_incompatible_moltype(track_abundance):
     a_num = MinHash(20, 31, track_abundance=track_abundance)
     b_num = MinHash(10, 31, is_protein=True, track_abundance=track_abundance)
 
-    a_values = { 1:5, 3:3, 5:2, 8:2}
-    b_values = { 1:3, 3:2, 5:1, 6:1, 8:1, 10:1 }
+    a_values = {1: 5, 3: 3, 5: 2, 8: 2}
+    b_values = {1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1}
 
     if track_abundance:
         a_num.set_abundances(a_values)
@@ -601,8 +755,8 @@ def test_NumMinHashComparison_incompatible_sketchtype(track_abundance):
     a = MinHash(0, 31, scaled=1, track_abundance=track_abundance)
     b = MinHash(10, 31, track_abundance=track_abundance)
 
-    a_values = { 1:5, 3:3, 5:2, 8:2}
-    b_values = { 1:3, 3:2, 5:1, 6:1, 8:1, 10:1 }
+    a_values = {1: 5, 3: 3, 5: 2, 8: 2}
+    b_values = {1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1}
 
     if track_abundance:
         a.set_abundances(a_values)
@@ -621,8 +775,8 @@ def test_NumMinHashComparison_redownsample_without_num(track_abundance):
     a = MinHash(10, 31, track_abundance=track_abundance)
     b = MinHash(5, 31, track_abundance=track_abundance)
 
-    a_values = { 1:5, 3:3, 5:2, 8:2}
-    b_values = { 1:3, 3:2, 5:1, 6:1, 8:1, 10:1 }
+    a_values = {1: 5, 3: 3, 5: 2, 8: 2}
+    b_values = {1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1}
 
     if track_abundance:
         a.set_abundances(a_values)
@@ -645,8 +799,8 @@ def test_NumMinHashComparison_incompatible_cmp_num(track_abundance):
     a = MinHash(200, 31, track_abundance=track_abundance)
     b = MinHash(100, 31, track_abundance=track_abundance)
 
-    a_values = { 1:5, 3:3, 5:2, 8:2}
-    b_values = { 1:3, 3:2, 5:1, 6:1, 8:1, 10:1 }
+    a_values = {1: 5, 3: 3, 5: 2, 8: 2}
+    b_values = {1: 3, 3: 2, 5: 1, 6: 1, 8: 1, 10: 1}
 
     if track_abundance:
         a.set_abundances(a_values)
@@ -656,7 +810,7 @@ def test_NumMinHashComparison_incompatible_cmp_num(track_abundance):
         b.add_many(b_values.keys())
 
     with pytest.raises(ValueError) as exc:
-        NumMinHashComparison(a, b, cmp_num = 150)
+        NumMinHashComparison(a, b, cmp_num=150)
     print(str(exc))
     assert "new sample num is higher than current sample num" in str(exc)
 
@@ -664,11 +818,11 @@ def test_NumMinHashComparison_incompatible_cmp_num(track_abundance):
 def test_FracMinHashComparison_ANI(track_abundance):
     # need real mh here, small test data fails
     if track_abundance:
-        f1 = utils.get_test_data('track_abund/47.fa.sig')
-        f2 = utils.get_test_data('track_abund/63.fa.sig')
+        f1 = utils.get_test_data("track_abund/47.fa.sig")
+        f2 = utils.get_test_data("track_abund/63.fa.sig")
     else:
-        f1 = utils.get_test_data('47.fa.sig')
-        f2 = utils.get_test_data('63.fa.sig')
+        f1 = utils.get_test_data("47.fa.sig")
+        f2 = utils.get_test_data("63.fa.sig")
 
     a = load_one_signature(f1, ksize=31).minhash
     b = load_one_signature(f2, ksize=31).minhash
@@ -677,40 +831,54 @@ def test_FracMinHashComparison_ANI(track_abundance):
     # check jaccard ani
     cmp.estimate_jaccard_ani()
     assert cmp.jaccard_ani == a.jaccard_ani(b).ani == b.jaccard_ani(a).ani
-    assert cmp.potential_false_negative == a.jaccard_ani(b).p_exceeds_threshold == b.jaccard_ani(a).p_exceeds_threshold
-    assert cmp.jaccard_ani_untrustworthy == a.jaccard_ani(b).je_exceeds_threshold == b.jaccard_ani(a).je_exceeds_threshold
+    assert (
+        cmp.potential_false_negative
+        == a.jaccard_ani(b).p_exceeds_threshold
+        == b.jaccard_ani(a).p_exceeds_threshold
+    )
+    assert (
+        cmp.jaccard_ani_untrustworthy
+        == a.jaccard_ani(b).je_exceeds_threshold
+        == b.jaccard_ani(a).je_exceeds_threshold
+    )
 
     cmp.estimate_ani_from_mh1_containment_in_mh2()
     a_cont_ani_manual = a.containment_ani(b)
     assert cmp.ani_from_mh1_containment_in_mh2 == a_cont_ani_manual.ani
     assert cmp.potential_false_negative == a_cont_ani_manual.p_exceeds_threshold
-#    assert cmp.mh1_containment_ani_low is None
-#    assert cmp.mh1_containment_ani_high is None
+    #    assert cmp.mh1_containment_ani_low is None
+    #    assert cmp.mh1_containment_ani_high is None
 
     cmp.estimate_ani_from_mh2_containment_in_mh1()
     b_cont_ani_manual = b.containment_ani(a)
     assert cmp.ani_from_mh2_containment_in_mh1 == b_cont_ani_manual.ani
     assert cmp.potential_false_negative == b_cont_ani_manual.p_exceeds_threshold
-#    assert cmp.mh2_containment_ani_low is None
-#    assert cmp.mh2_containment_ani_high is None
+    #    assert cmp.mh2_containment_ani_low is None
+    #    assert cmp.mh2_containment_ani_high is None
 
     cmp.estimate_max_containment_ani()
     mc_ani_manual = a.max_containment_ani(b)
-    assert cmp.max_containment_ani == max(a.containment_ani(b).ani, b.containment_ani(a).ani) == mc_ani_manual.ani
+    assert (
+        cmp.max_containment_ani
+        == max(a.containment_ani(b).ani, b.containment_ani(a).ani)
+        == mc_ani_manual.ani
+    )
     assert cmp.potential_false_negative == mc_ani_manual.p_exceeds_threshold
-#    assert cmp.max_containment_ani_low is None
-#    assert cmp.max_containment_ani_high is None
-    assert cmp.avg_containment_ani == np.mean([a.containment_ani(b).ani, b.containment_ani(a).ani])
+    #    assert cmp.max_containment_ani_low is None
+    #    assert cmp.max_containment_ani_high is None
+    assert cmp.avg_containment_ani == np.mean(
+        [a.containment_ani(b).ani, b.containment_ani(a).ani]
+    )
 
 
 def test_FracMinHashComparison_ANI_provide_similarity(track_abundance):
     # need real mh here, small test data fails
     if track_abundance:
-        f1 = utils.get_test_data('track_abund/47.fa.sig')
-        f2 = utils.get_test_data('track_abund/63.fa.sig')
+        f1 = utils.get_test_data("track_abund/47.fa.sig")
+        f2 = utils.get_test_data("track_abund/63.fa.sig")
     else:
-        f1 = utils.get_test_data('47.fa.sig')
-        f2 = utils.get_test_data('63.fa.sig')
+        f1 = utils.get_test_data("47.fa.sig")
+        f2 = utils.get_test_data("63.fa.sig")
 
     a = load_one_signature(f1, ksize=31).minhash
     b = load_one_signature(f2, ksize=31).minhash
@@ -720,8 +888,16 @@ def test_FracMinHashComparison_ANI_provide_similarity(track_abundance):
     jaccard = a.jaccard(b)
     cmp.estimate_jaccard_ani(jaccard=jaccard)
     assert cmp.jaccard_ani == a.jaccard_ani(b).ani == b.jaccard_ani(a).ani
-    assert cmp.potential_false_negative == a.jaccard_ani(b).p_exceeds_threshold == b.jaccard_ani(a).p_exceeds_threshold
-    assert cmp.jaccard_ani_untrustworthy == a.jaccard_ani(b).je_exceeds_threshold == b.jaccard_ani(a).je_exceeds_threshold
+    assert (
+        cmp.potential_false_negative
+        == a.jaccard_ani(b).p_exceeds_threshold
+        == b.jaccard_ani(a).p_exceeds_threshold
+    )
+    assert (
+        cmp.jaccard_ani_untrustworthy
+        == a.jaccard_ani(b).je_exceeds_threshold
+        == b.jaccard_ani(a).je_exceeds_threshold
+    )
 
     a_cont = a.contained_by(b)
     b_cont = b.contained_by(a)
@@ -739,19 +915,25 @@ def test_FracMinHashComparison_ANI_provide_similarity(track_abundance):
 
     cmp.estimate_max_containment_ani(max_containment=mc)
     mc_ani_manual = a.max_containment_ani(b)
-    assert cmp.max_containment_ani == max(a.containment_ani(b).ani, b.containment_ani(a).ani) == mc_ani_manual.ani
+    assert (
+        cmp.max_containment_ani
+        == max(a.containment_ani(b).ani, b.containment_ani(a).ani)
+        == mc_ani_manual.ani
+    )
     assert cmp.potential_false_negative == mc_ani_manual.p_exceeds_threshold
-    assert cmp.avg_containment_ani == np.mean([a.containment_ani(b).ani, b.containment_ani(a).ani])
+    assert cmp.avg_containment_ani == np.mean(
+        [a.containment_ani(b).ani, b.containment_ani(a).ani]
+    )
 
 
 def test_FracMinHashComparison_ANI_estimate_CI(track_abundance):
     # need real mh here, small test data fails
     if track_abundance:
-        f1 = utils.get_test_data('track_abund/47.fa.sig')
-        f2 = utils.get_test_data('track_abund/63.fa.sig')
+        f1 = utils.get_test_data("track_abund/47.fa.sig")
+        f2 = utils.get_test_data("track_abund/63.fa.sig")
     else:
-        f1 = utils.get_test_data('47.fa.sig')
-        f2 = utils.get_test_data('63.fa.sig')
+        f1 = utils.get_test_data("47.fa.sig")
+        f2 = utils.get_test_data("63.fa.sig")
 
     a = load_one_signature(f1, ksize=31).minhash
     b = load_one_signature(f2, ksize=31).minhash
@@ -759,8 +941,16 @@ def test_FracMinHashComparison_ANI_estimate_CI(track_abundance):
     cmp = FracMinHashComparison(a, b, estimate_ani_ci=True)
     cmp.estimate_jaccard_ani()
     assert cmp.jaccard_ani == a.jaccard_ani(b).ani == b.jaccard_ani(a).ani
-    assert cmp.potential_false_negative == a.jaccard_ani(b).p_exceeds_threshold == b.jaccard_ani(a).p_exceeds_threshold
-    assert cmp.jaccard_ani_untrustworthy == a.jaccard_ani(b).je_exceeds_threshold == b.jaccard_ani(a).je_exceeds_threshold
+    assert (
+        cmp.potential_false_negative
+        == a.jaccard_ani(b).p_exceeds_threshold
+        == b.jaccard_ani(a).p_exceeds_threshold
+    )
+    assert (
+        cmp.jaccard_ani_untrustworthy
+        == a.jaccard_ani(b).je_exceeds_threshold
+        == b.jaccard_ani(a).je_exceeds_threshold
+    )
 
     cmp.estimate_ani_from_mh1_containment_in_mh2()
     a_cont_ani_manual = a.containment_ani(b, estimate_ci=True)
@@ -778,20 +968,24 @@ def test_FracMinHashComparison_ANI_estimate_CI(track_abundance):
 
     cmp.estimate_max_containment_ani()
     mc_ani_manual = a.max_containment_ani(b, estimate_ci=True)
-    assert cmp.max_containment_ani == max(a.containment_ani(b).ani, b.containment_ani(a).ani) == mc_ani_manual.ani
+    assert (
+        cmp.max_containment_ani
+        == max(a.containment_ani(b).ani, b.containment_ani(a).ani)
+        == mc_ani_manual.ani
+    )
     assert cmp.potential_false_negative == mc_ani_manual.p_exceeds_threshold
     assert cmp.max_containment_ani_low == mc_ani_manual.ani_low
-    assert cmp.max_containment_ani_high ==mc_ani_manual.ani_high
+    assert cmp.max_containment_ani_high == mc_ani_manual.ani_high
 
 
 def test_FracMinHashComparison_ANI_estimate_CI_ci99(track_abundance):
     # need real mh here, small test data fails
     if track_abundance:
-        f1 = utils.get_test_data('track_abund/47.fa.sig')
-        f2 = utils.get_test_data('track_abund/63.fa.sig')
+        f1 = utils.get_test_data("track_abund/47.fa.sig")
+        f2 = utils.get_test_data("track_abund/63.fa.sig")
     else:
-        f1 = utils.get_test_data('47.fa.sig')
-        f2 = utils.get_test_data('63.fa.sig')
+        f1 = utils.get_test_data("47.fa.sig")
+        f2 = utils.get_test_data("63.fa.sig")
 
     a = load_one_signature(f1, ksize=31).minhash
     b = load_one_signature(f2, ksize=31).minhash
@@ -814,20 +1008,24 @@ def test_FracMinHashComparison_ANI_estimate_CI_ci99(track_abundance):
 
     cmp.estimate_max_containment_ani()
     mc_ani_manual = a.max_containment_ani(b, estimate_ci=True, confidence=0.99)
-    assert cmp.max_containment_ani == max(a.containment_ani(b).ani, b.containment_ani(a).ani) == mc_ani_manual.ani
+    assert (
+        cmp.max_containment_ani
+        == max(a.containment_ani(b).ani, b.containment_ani(a).ani)
+        == mc_ani_manual.ani
+    )
     assert cmp.potential_false_negative == mc_ani_manual.p_exceeds_threshold
     assert cmp.max_containment_ani_low == mc_ani_manual.ani_low
-    assert cmp.max_containment_ani_high ==mc_ani_manual.ani_high
+    assert cmp.max_containment_ani_high == mc_ani_manual.ani_high
 
 
 def test_FracMinHashComparison_ANI_downsample(track_abundance):
     # need real mh here, small test data fails
     if track_abundance:
-        f1 = utils.get_test_data('track_abund/47.fa.sig')
-        f2 = utils.get_test_data('track_abund/63.fa.sig')
+        f1 = utils.get_test_data("track_abund/47.fa.sig")
+        f2 = utils.get_test_data("track_abund/63.fa.sig")
     else:
-        f1 = utils.get_test_data('47.fa.sig')
-        f2 = utils.get_test_data('63.fa.sig')
+        f1 = utils.get_test_data("47.fa.sig")
+        f2 = utils.get_test_data("63.fa.sig")
 
     a = load_one_signature(f1, ksize=31).minhash
     b = load_one_signature(f2, ksize=31).minhash
@@ -841,8 +1039,16 @@ def test_FracMinHashComparison_ANI_downsample(track_abundance):
     # check jaccard ani
     cmp.estimate_jaccard_ani()
     assert cmp.jaccard_ani == a.jaccard_ani(b).ani == b.jaccard_ani(a).ani
-    assert cmp.potential_false_negative == a.jaccard_ani(b).p_exceeds_threshold == b.jaccard_ani(a).p_exceeds_threshold
-    assert cmp.jaccard_ani_untrustworthy == a.jaccard_ani(b).je_exceeds_threshold == b.jaccard_ani(a).je_exceeds_threshold
+    assert (
+        cmp.potential_false_negative
+        == a.jaccard_ani(b).p_exceeds_threshold
+        == b.jaccard_ani(a).p_exceeds_threshold
+    )
+    assert (
+        cmp.jaccard_ani_untrustworthy
+        == a.jaccard_ani(b).je_exceeds_threshold
+        == b.jaccard_ani(a).je_exceeds_threshold
+    )
 
     cmp.estimate_ani_from_mh1_containment_in_mh2()
     a_cont_ani_manual = a.containment_ani(b, estimate_ci=True)
@@ -860,7 +1066,11 @@ def test_FracMinHashComparison_ANI_downsample(track_abundance):
 
     cmp.estimate_max_containment_ani()
     mc_ani_manual = a.max_containment_ani(b, estimate_ci=True)
-    assert cmp.max_containment_ani == max(a.containment_ani(b).ani, b.containment_ani(a).ani) == mc_ani_manual.ani
+    assert (
+        cmp.max_containment_ani
+        == max(a.containment_ani(b).ani, b.containment_ani(a).ani)
+        == mc_ani_manual.ani
+    )
     assert cmp.potential_false_negative == mc_ani_manual.p_exceeds_threshold
     assert cmp.max_containment_ani_low == mc_ani_manual.ani_low
-    assert cmp.max_containment_ani_high ==mc_ani_manual.ani_high
+    assert cmp.max_containment_ani_high == mc_ani_manual.ani_high
diff --git a/tests/test_sourmash.py b/tests/test_sourmash.py
index 9ee703f6f7..7aaac0446e 100644
--- a/tests/test_sourmash.py
+++ b/tests/test_sourmash.py
@@ -26,7 +26,8 @@
 
 try:
     import matplotlib
-    matplotlib.use('Agg')
+
+    matplotlib.use("Agg")
 except ImportError:
     pass
 
@@ -40,30 +41,30 @@ def test_citation_file():
     import yaml
 
     thisdir = os.path.dirname(__file__)
-    citation_file = os.path.join(thisdir, '../CITATION.cff')
+    citation_file = os.path.join(thisdir, "../CITATION.cff")
 
     with open(citation_file) as fp:
         x = yaml.safe_load(fp)
 
-    assert x['title'] == "sourmash: a library for MinHash sketching of DNA", x
+    assert x["title"] == "sourmash: a library for MinHash sketching of DNA", x
 
 
 def test_run_sourmash():
-    status, out, err = utils.runscript('sourmash', [], fail_ok=True)
-    assert status != 0                    # no args provided, ok ;)
+    status, out, err = utils.runscript("sourmash", [], fail_ok=True)
+    assert status != 0  # no args provided, ok ;)
 
 
 def test_run_sourmash_badcmd():
-    status, out, err = utils.runscript('sourmash', ['foobarbaz'], fail_ok=True)
-    assert status != 0                    # bad arg!
+    status, out, err = utils.runscript("sourmash", ["foobarbaz"], fail_ok=True)
+    assert status != 0  # bad arg!
     assert "cmd: invalid choice" in err
 
 
 def test_run_sourmash_subcmd_help():
-    status, out, err = utils.runscript('sourmash', ['sbt'], fail_ok=True)
+    status, out, err = utils.runscript("sourmash", ["sbt"], fail_ok=True)
     print(out)
     print(err)
-    assert status != 0               # should fail
+    assert status != 0  # should fail
 
     assert "invalid choice:" in err
     assert "'sbt' (choose from" in err
@@ -73,7 +74,7 @@ def test_run_sourmash_subcmd_help():
 
 
 def test_sourmash_info():
-    status, out, err = utils.runscript('sourmash', ['info'], fail_ok=False)
+    status, out, err = utils.runscript("sourmash", ["info"], fail_ok=False)
 
     # no output to stdout
     assert not out
@@ -83,7 +84,7 @@ def test_sourmash_info():
 
 
 def test_sourmash_info_verbose():
-    status, out, err = utils.runscript('sourmash', ['info', '-v'])
+    status, out, err = utils.runscript("sourmash", ["info", "-v"])
 
     # no output to stdout
     assert not out
@@ -94,6 +95,7 @@ def test_sourmash_info_verbose():
 
 def test_load_pathlist_from_file_does_not_exist():
     from sourmash.sourmash_args import load_pathlist_from_file
+
     with pytest.raises(ValueError) as e:
         load_pathlist_from_file("")
     assert "file '' does not exist" in str(e.value)
@@ -122,7 +124,7 @@ def test_load_pathlist_from_file_badly_formatted(c):
 @utils.in_tempdir
 def test_load_pathlist_from_file_badly_formatted_2(c):
     file_list = c.output("file_list")
-    sig1 = utils.get_test_data('compare/genome-s10.fa.gz.sig')
+    sig1 = utils.get_test_data("compare/genome-s10.fa.gz.sig")
     with open(file_list, "w") as fp:
         fp.write(sig1 + "\n")
         fp.write("{'a':1}")
@@ -134,12 +136,12 @@ def test_load_pathlist_from_file_badly_formatted_2(c):
 @utils.in_tempdir
 def test_load_pathlist_from_file_duplicate(c):
     file_list = c.output("file_list")
-    sig1 = utils.get_test_data('compare/genome-s10.fa.gz.sig')
+    sig1 = utils.get_test_data("compare/genome-s10.fa.gz.sig")
     with open(file_list, "w") as fp:
         fp.write(sig1 + "\n")
         fp.write(sig1 + "\n")
     check = load_pathlist_from_file(file_list)
-    print (check)
+    print(check)
     assert len(check) == 1
 
 
@@ -147,19 +149,18 @@ def test_compare_serial(runtmp):
     # try doing a compare serially
     c = runtmp
 
-    testsigs = utils.get_test_data('genome-s1*.sig')
+    testsigs = utils.get_test_data("genome-s1*.sig")
     testsigs = glob.glob(testsigs)
 
-    c.run_sourmash('compare', '-o', 'cmp', '-k', '21', '--dna', *testsigs)
+    c.run_sourmash("compare", "-o", "cmp", "-k", "21", "--dna", *testsigs)
 
-    cmp_outfile = c.output('cmp')
+    cmp_outfile = c.output("cmp")
     assert os.path.exists(cmp_outfile)
     cmp_out = numpy.load(cmp_outfile)
 
     sigs = []
     for fn in testsigs:
-        sigs.append(sourmash.load_one_signature(fn, ksize=21,
-                                                select_moltype='dna'))
+        sigs.append(sourmash.load_one_signature(fn, ksize=21, select_moltype="dna"))
 
     cmp_calc = numpy.zeros([len(sigs), len(sigs)])
     for i, si in enumerate(sigs):
@@ -168,8 +169,7 @@ def test_compare_serial(runtmp):
 
         sigs = []
         for fn in testsigs:
-            sigs.append(sourmash.load_one_signature(fn, ksize=21,
-                                                    select_moltype='dna'))
+            sigs.append(sourmash.load_one_signature(fn, ksize=21, select_moltype="dna"))
     assert (cmp_out == cmp_calc).all()
 
 
@@ -177,20 +177,18 @@ def test_compare_serial_distance(runtmp):
     # try doing a compare serially, with --distance output
     c = runtmp
 
-    testsigs = utils.get_test_data('genome-s1*.sig')
+    testsigs = utils.get_test_data("genome-s1*.sig")
     testsigs = glob.glob(testsigs)
 
-    c.run_sourmash('compare', '-o', 'cmp', '-k', '21', '--dna', *testsigs,
-                   '--distance')
+    c.run_sourmash("compare", "-o", "cmp", "-k", "21", "--dna", *testsigs, "--distance")
 
-    cmp_outfile = c.output('cmp')
+    cmp_outfile = c.output("cmp")
     assert os.path.exists(cmp_outfile)
     cmp_out = numpy.load(cmp_outfile)
 
     sigs = []
     for fn in testsigs:
-        sigs.append(sourmash.load_one_signature(fn, ksize=21,
-                                                select_moltype='dna'))
+        sigs.append(sourmash.load_one_signature(fn, ksize=21, select_moltype="dna"))
 
     cmp_calc = numpy.zeros([len(sigs), len(sigs)])
     for i, si in enumerate(sigs):
@@ -199,8 +197,7 @@ def test_compare_serial_distance(runtmp):
 
         sigs = []
         for fn in testsigs:
-            sigs.append(sourmash.load_one_signature(fn, ksize=21,
-                                                    select_moltype='dna'))
+            sigs.append(sourmash.load_one_signature(fn, ksize=21, select_moltype="dna"))
     assert (cmp_out == cmp_calc).all()
 
 
@@ -208,20 +205,20 @@ def test_compare_parallel(runtmp):
     # try doing a compare parallel
     c = runtmp
 
-    testsigs = utils.get_test_data('genome-s1*.sig')
+    testsigs = utils.get_test_data("genome-s1*.sig")
     testsigs = glob.glob(testsigs)
 
-    c.run_sourmash('compare', '-o', 'cmp', '-k', '21', '--dna',
-                   "--processes", "2", *testsigs)
+    c.run_sourmash(
+        "compare", "-o", "cmp", "-k", "21", "--dna", "--processes", "2", *testsigs
+    )
 
-    cmp_outfile = c.output('cmp')
+    cmp_outfile = c.output("cmp")
     assert os.path.exists(cmp_outfile)
     cmp_out = numpy.load(cmp_outfile)
 
     sigs = []
     for fn in testsigs:
-        sigs.append(sourmash.load_one_signature(fn, ksize=21,
-                                                select_moltype='dna'))
+        sigs.append(sourmash.load_one_signature(fn, ksize=21, select_moltype="dna"))
 
     cmp_calc = numpy.zeros([len(sigs), len(sigs)])
     for i, si in enumerate(sigs):
@@ -230,32 +227,31 @@ def test_compare_parallel(runtmp):
 
         sigs = []
         for fn in testsigs:
-            sigs.append(sourmash.load_one_signature(fn, ksize=21,
-                                                    select_moltype='dna'))
+            sigs.append(sourmash.load_one_signature(fn, ksize=21, select_moltype="dna"))
     assert (cmp_out == cmp_calc).all()
 
 
 def test_compare_do_serial_compare_with_from_file(runtmp):
     # try doing a compare serial
     c = runtmp
-    testsigs = utils.get_test_data('genome-s1*.sig')
+    testsigs = utils.get_test_data("genome-s1*.sig")
     testsigs = glob.glob(testsigs)
 
-    file_list = c.output('file.list')
-    with open(file_list, 'wt') as fp:
+    file_list = c.output("file.list")
+    with open(file_list, "w") as fp:
         print("\n".join(testsigs), file=fp)
 
-    c.run_sourmash('compare', '-o', 'cmp', '-k', '21', '--dna',
-                   '--from-file', file_list)
+    c.run_sourmash(
+        "compare", "-o", "cmp", "-k", "21", "--dna", "--from-file", file_list
+    )
 
-    cmp_outfile = c.output('cmp')
+    cmp_outfile = c.output("cmp")
     assert os.path.exists(cmp_outfile)
     cmp_out = numpy.load(cmp_outfile)
 
     sigs = []
     for fn in testsigs:
-        sigs.append(sourmash.load_one_signature(fn, ksize=21,
-                                                select_moltype='dna'))
+        sigs.append(sourmash.load_one_signature(fn, ksize=21, select_moltype="dna"))
 
     cmp_calc = numpy.zeros([len(sigs), len(sigs)])
     for i, si in enumerate(sigs):
@@ -264,8 +260,7 @@ def test_compare_do_serial_compare_with_from_file(runtmp):
 
         sigs = []
         for fn in testsigs:
-            sigs.append(sourmash.load_one_signature(fn, ksize=21,
-                                                    select_moltype='dna'))
+            sigs.append(sourmash.load_one_signature(fn, ksize=21, select_moltype="dna"))
 
     assert numpy.array_equal(numpy.sort(cmp_out.flat), numpy.sort(cmp_calc.flat))
 
@@ -274,19 +269,18 @@ def test_compare_do_basic_compare_using_rna_arg(runtmp):
     # try doing a basic compare using --rna instead of --dna
     c = runtmp
 
-    testsigs = utils.get_test_data('genome-s1*.sig')
+    testsigs = utils.get_test_data("genome-s1*.sig")
     testsigs = glob.glob(testsigs)
 
-    c.run_sourmash('compare', '-o', 'cmp', '-k', '21', '--rna', *testsigs)
+    c.run_sourmash("compare", "-o", "cmp", "-k", "21", "--rna", *testsigs)
 
-    cmp_outfile = c.output('cmp')
+    cmp_outfile = c.output("cmp")
     assert os.path.exists(cmp_outfile)
     cmp_out = numpy.load(cmp_outfile)
 
     sigs = []
     for fn in testsigs:
-        sigs.append(sourmash.load_one_signature(fn, ksize=21,
-                                                select_moltype='dna'))
+        sigs.append(sourmash.load_one_signature(fn, ksize=21, select_moltype="dna"))
 
     cmp_calc = numpy.zeros([len(sigs), len(sigs)])
     for i, si in enumerate(sigs):
@@ -299,19 +293,18 @@ def test_compare_do_basic_compare_using_rna_arg(runtmp):
 def test_compare_do_basic_using_nucleotide_arg(runtmp):
     # try doing a basic compare using --nucleotide instead of --dna/--rna
     c = runtmp
-    testsigs = utils.get_test_data('genome-s1*.sig')
+    testsigs = utils.get_test_data("genome-s1*.sig")
     testsigs = glob.glob(testsigs)
 
-    c.run_sourmash('compare', '-o', 'cmp', '-k', '21', '--nucleotide', *testsigs)
+    c.run_sourmash("compare", "-o", "cmp", "-k", "21", "--nucleotide", *testsigs)
 
-    cmp_outfile = c.output('cmp')
+    cmp_outfile = c.output("cmp")
     assert os.path.exists(cmp_outfile)
     cmp_out = numpy.load(cmp_outfile)
 
     sigs = []
     for fn in testsigs:
-        sigs.append(sourmash.load_one_signature(fn, ksize=21,
-                                                select_moltype='dna'))
+        sigs.append(sourmash.load_one_signature(fn, ksize=21, select_moltype="dna"))
 
     cmp_calc = numpy.zeros([len(sigs), len(sigs)])
     for i, si in enumerate(sigs):
@@ -324,22 +317,24 @@ def test_compare_do_basic_using_nucleotide_arg(runtmp):
 def test_compare_quiet(runtmp):
     # test 'compare -q' has no output
     c = runtmp
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    c.run_sourmash('sketch', 'translate', '-p', 'k=31,num=500', testdata1, testdata2)
+    c.run_sourmash("sketch", "translate", "-p", "k=31,num=500", testdata1, testdata2)
 
-    c.run_sourmash('compare', 'short.fa.sig',
-                   'short2.fa.sig', '--csv', 'xxx', '-q')
+    c.run_sourmash("compare", "short.fa.sig", "short2.fa.sig", "--csv", "xxx", "-q")
     assert not c.last_result.out
     assert not c.last_result.err
 
 
 def test_compare_do_traverse_directory_parse_args(runtmp):
     # test 'compare' on a directory, using sourmash.cli.parse_args.
-    import sourmash.commands, sourmash.cli
-    args = sourmash.cli.parse_args(['compare', '-k', '21', '--dna',
-                                    utils.get_test_data('compare')])
+    import sourmash.commands
+    import sourmash.cli
+
+    args = sourmash.cli.parse_args(
+        ["compare", "-k", "21", "--dna", utils.get_test_data("compare")]
+    )
 
     sourmash.commands.compare(args)
 
@@ -347,41 +342,39 @@ def test_compare_do_traverse_directory_parse_args(runtmp):
 def test_compare_do_traverse_directory(runtmp):
     # test 'compare' on a directory
     c = runtmp
-    c.run_sourmash('compare', '-k 21',
-                   '--dna', utils.get_test_data('compare'))
+    c.run_sourmash("compare", "-k 21", "--dna", utils.get_test_data("compare"))
     print(c.last_result.out)
-    assert 'genome-s10.fa.gz' in c.last_result.out
-    assert 'genome-s11.fa.gz' in c.last_result.out
+    assert "genome-s10.fa.gz" in c.last_result.out
+    assert "genome-s11.fa.gz" in c.last_result.out
 
 
 def test_compare_do_traverse_directory_compare_force(runtmp):
     # test 'compare' on a directory, with -f
     c = runtmp
-    sig1 = utils.get_test_data('compare/genome-s10.fa.gz.sig')
-    sig2 = utils.get_test_data('compare/genome-s11.fa.gz.sig')
-    newdir = c.output('newdir')
+    sig1 = utils.get_test_data("compare/genome-s10.fa.gz.sig")
+    sig2 = utils.get_test_data("compare/genome-s11.fa.gz.sig")
+    newdir = c.output("newdir")
     os.mkdir(newdir)
 
-    shutil.copyfile(sig1, os.path.join(newdir, 'sig1'))
-    shutil.copyfile(sig2, os.path.join(newdir, 'sig2'))
+    shutil.copyfile(sig1, os.path.join(newdir, "sig1"))
+    shutil.copyfile(sig2, os.path.join(newdir, "sig2"))
 
-    c.run_sourmash('compare', '-k 21',
-                   '--dna', newdir, '-f')
+    c.run_sourmash("compare", "-k 21", "--dna", newdir, "-f")
     print(c.last_result.out)
-    assert 'genome-s10.fa.gz' in c.last_result.out
-    assert 'genome-s11.fa.gz' in c.last_result.out
+    assert "genome-s10.fa.gz" in c.last_result.out
+    assert "genome-s11.fa.gz" in c.last_result.out
 
 
 def test_compare_output_csv(runtmp):
     # test 'sourmash compare --csv'
     c = runtmp
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    c.run_sourmash('sketch', 'dna', '-p', 'k=31,num=500', testdata1, testdata2)
-    c.run_sourmash('compare', 'short.fa.sig', 'short2.fa.sig', '--csv', 'xxx')
+    c.run_sourmash("sketch", "dna", "-p", "k=31,num=500", testdata1, testdata2)
+    c.run_sourmash("compare", "short.fa.sig", "short2.fa.sig", "--csv", "xxx")
 
-    with open(c.output('xxx')) as fp:
+    with open(c.output("xxx")) as fp:
         r = iter(csv.reader(fp))
         row = next(r)
         print(row)
@@ -394,21 +387,20 @@ def test_compare_output_csv(runtmp):
         assert float(row[1]) == 1.0
 
         # exactly three lines
-        with pytest.raises(StopIteration) as e:
+        with pytest.raises(StopIteration):
             next(r)
 
 
 def test_compare_output_csv_gz(runtmp):
     # test 'sourmash compare --csv' with a .gz file
     c = runtmp
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    c.run_sourmash('sketch', 'dna', '-p', 'k=31,num=500', testdata1, testdata2)
-    c.run_sourmash('compare', 'short.fa.sig', 'short2.fa.sig',
-                   '--csv', 'xxx.gz')
+    c.run_sourmash("sketch", "dna", "-p", "k=31,num=500", testdata1, testdata2)
+    c.run_sourmash("compare", "short.fa.sig", "short2.fa.sig", "--csv", "xxx.gz")
 
-    with gzip.open(c.output('xxx.gz'), 'rt', newline='') as fp:
+    with gzip.open(c.output("xxx.gz"), "rt", newline="") as fp:
         r = iter(csv.reader(fp))
         row = next(r)
         print(row)
@@ -421,85 +413,97 @@ def test_compare_output_csv_gz(runtmp):
         assert float(row[1]) == 1.0
 
         # exactly three lines
-        with pytest.raises(StopIteration) as e:
+        with pytest.raises(StopIteration):
             next(r)
 
 
 def test_compare_downsample(runtmp):
     # test 'compare' with implicit downsampling
     c = runtmp
-    testdata1 = utils.get_test_data('short.fa')
-    c.run_sourmash('sketch', 'dna', '-p', 'k=31,scaled=200', testdata1)
+    testdata1 = utils.get_test_data("short.fa")
+    c.run_sourmash("sketch", "dna", "-p", "k=31,scaled=200", testdata1)
 
-    testdata2 = utils.get_test_data('short2.fa')
-    c.run_sourmash('sketch', 'dna', '-p', 'k=31,scaled=100', testdata2)
+    testdata2 = utils.get_test_data("short2.fa")
+    c.run_sourmash("sketch", "dna", "-p", "k=31,scaled=100", testdata2)
 
-    c.run_sourmash('compare', 'short.fa.sig', 'short2.fa.sig', '--csv', 'xxx')
+    c.run_sourmash("compare", "short.fa.sig", "short2.fa.sig", "--csv", "xxx")
 
     print(c.last_result.status, c.last_result.out, c.last_result.err)
-    assert 'downsampling to scaled value of 200' in c.last_result.err
-    with open(c.output('xxx')) as fp:
+    assert "downsampling to scaled value of 200" in c.last_result.err
+    with open(c.output("xxx")) as fp:
         lines = fp.readlines()
         assert len(lines) == 3
-        assert lines[1].startswith('1.0,0.6666')
-        assert lines[2].startswith('0.6666')
+        assert lines[1].startswith("1.0,0.6666")
+        assert lines[2].startswith("0.6666")
 
 
 def test_compare_downsample_scaled(runtmp):
     # test 'compare' with explicit --scaled downsampling
     c = runtmp
-    testdata1 = utils.get_test_data('short.fa')
-    c.run_sourmash('sketch', 'dna', '-p', 'k=31,scaled=200', testdata1)
+    testdata1 = utils.get_test_data("short.fa")
+    c.run_sourmash("sketch", "dna", "-p", "k=31,scaled=200", testdata1)
 
-    testdata2 = utils.get_test_data('short2.fa')
-    c.run_sourmash('sketch', 'dna', '-p', 'k=31,scaled=100', testdata2)
+    testdata2 = utils.get_test_data("short2.fa")
+    c.run_sourmash("sketch", "dna", "-p", "k=31,scaled=100", testdata2)
 
-    c.run_sourmash('compare', 'short.fa.sig', 'short2.fa.sig', '--csv', 'xxx',
-                   '--scaled', '300')
+    c.run_sourmash(
+        "compare", "short.fa.sig", "short2.fa.sig", "--csv", "xxx", "--scaled", "300"
+    )
 
     print(c.last_result.status, c.last_result.out, c.last_result.err)
-    assert 'downsampling to scaled value of 300' in c.last_result.err
-    with open(c.output('xxx')) as fp:
+    assert "downsampling to scaled value of 300" in c.last_result.err
+    with open(c.output("xxx")) as fp:
         lines = fp.readlines()
         assert len(lines) == 3
-        assert lines[1].startswith('1.0,0.0')
-        assert lines[2].startswith('0.0')
+        assert lines[1].startswith("1.0,0.0")
+        assert lines[2].startswith("0.0")
 
 
 def test_compare_downsample_scaled_too_low(runtmp):
     # test 'compare' with explicit --scaled downsampling, but lower than min
     c = runtmp
-    testdata1 = utils.get_test_data('short.fa')
-    c.run_sourmash('sketch', 'dna', '-p', 'k=31,scaled=200', testdata1)
+    testdata1 = utils.get_test_data("short.fa")
+    c.run_sourmash("sketch", "dna", "-p", "k=31,scaled=200", testdata1)
 
-    testdata2 = utils.get_test_data('short2.fa')
-    c.run_sourmash('sketch', 'dna', '-p', 'k=31,scaled=100', testdata2)
+    testdata2 = utils.get_test_data("short2.fa")
+    c.run_sourmash("sketch", "dna", "-p", "k=31,scaled=100", testdata2)
 
-    c.run_sourmash('compare', 'short.fa.sig', 'short2.fa.sig', '--csv', 'xxx',
-                   '--scaled', '100')
+    c.run_sourmash(
+        "compare", "short.fa.sig", "short2.fa.sig", "--csv", "xxx", "--scaled", "100"
+    )
 
     print(c.last_result.status, c.last_result.out, c.last_result.err)
-    assert 'downsampling to scaled value of 200' in c.last_result.err
-    assert "WARNING: --scaled specified 100, but max scaled of sketches is 200" in c.last_result.err
-    with open(c.output('xxx')) as fp:
+    assert "downsampling to scaled value of 200" in c.last_result.err
+    assert (
+        "WARNING: --scaled specified 100, but max scaled of sketches is 200"
+        in c.last_result.err
+    )
+    with open(c.output("xxx")) as fp:
         lines = fp.readlines()
         assert len(lines) == 3
-        assert lines[1].startswith('1.0,0.6666')
-        assert lines[2].startswith('0.6666')
+        assert lines[1].startswith("1.0,0.6666")
+        assert lines[2].startswith("0.6666")
 
 
 def test_compare_downsample_scaled_fail_num(runtmp):
     # test 'compare' with explicit --scaled downsampling; fail on num sketch
     c = runtmp
-    testdata1 = utils.get_test_data('short.fa')
-    c.run_sourmash('sketch', 'dna', '-p', 'k=31,num=20', testdata1)
+    testdata1 = utils.get_test_data("short.fa")
+    c.run_sourmash("sketch", "dna", "-p", "k=31,num=20", testdata1)
 
-    testdata2 = utils.get_test_data('short2.fa')
-    c.run_sourmash('sketch', 'dna', '-p', 'k=31,scaled=100', testdata2)
+    testdata2 = utils.get_test_data("short2.fa")
+    c.run_sourmash("sketch", "dna", "-p", "k=31,scaled=100", testdata2)
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('compare', 'short.fa.sig', 'short2.fa.sig',
-                       '--csv', 'xxx', '--scaled', '300')
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash(
+            "compare",
+            "short.fa.sig",
+            "short2.fa.sig",
+            "--csv",
+            "xxx",
+            "--scaled",
+            "300",
+        )
 
     print(c.last_result.status, c.last_result.out, c.last_result.err)
     assert "cannot mix scaled signatures with num signatures" in c.last_result.err
@@ -508,75 +512,88 @@ def test_compare_downsample_scaled_fail_num(runtmp):
 def test_compare_downsample_scaled_fail_all_num(runtmp):
     # test 'compare' with explicit --scaled downsampling; fail on all num sketches
     c = runtmp
-    testdata1 = utils.get_test_data('short.fa')
-    c.run_sourmash('sketch', 'dna', '-p', 'k=31,num=20', testdata1)
+    testdata1 = utils.get_test_data("short.fa")
+    c.run_sourmash("sketch", "dna", "-p", "k=31,num=20", testdata1)
 
-    testdata2 = utils.get_test_data('short2.fa')
-    c.run_sourmash('sketch', 'dna', '-p', 'k=31,num=30', testdata2)
+    testdata2 = utils.get_test_data("short2.fa")
+    c.run_sourmash("sketch", "dna", "-p", "k=31,num=30", testdata2)
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('compare', 'short.fa.sig', 'short2.fa.sig',
-                       '--csv', 'xxx', '--scaled', '300')
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash(
+            "compare",
+            "short.fa.sig",
+            "short2.fa.sig",
+            "--csv",
+            "xxx",
+            "--scaled",
+            "300",
+        )
 
     print(c.last_result.status, c.last_result.out, c.last_result.err)
-    assert "ERROR: cannot specify --scaled with non-scaled signatures." in c.last_result.err
+    assert (
+        "ERROR: cannot specify --scaled with non-scaled signatures."
+        in c.last_result.err
+    )
 
 
 def test_compare_output_multiple_k(runtmp):
     # test 'compare' when given multiple k-mer sizes -> should fail
     c = runtmp
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    c.run_sourmash('sketch', 'translate', '-p', 'k=21,num=500', testdata1)
-    c.run_sourmash('sketch', 'translate', '-p', 'k=31,num=500', testdata2)
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    c.run_sourmash("sketch", "translate", "-p", "k=21,num=500", testdata1)
+    c.run_sourmash("sketch", "translate", "-p", "k=31,num=500", testdata2)
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('compare', 'short.fa.sig', 'short2.fa.sig', '--csv', 'xxx',
-                       fail_ok=True)
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash(
+            "compare", "short.fa.sig", "short2.fa.sig", "--csv", "xxx", fail_ok=True
+        )
 
     print(c.last_result.status, c.last_result.out, c.last_result.err)
 
     assert c.last_result.status == -1
-    assert 'multiple k-mer sizes loaded; please specify one' in c.last_result.err
-    assert '(saw k-mer sizes 21, 31)' in c.last_result.err
+    assert "multiple k-mer sizes loaded; please specify one" in c.last_result.err
+    assert "(saw k-mer sizes 21, 31)" in c.last_result.err
 
 
 def test_compare_output_multiple_moltype(runtmp):
     # 'compare' should fail when given multiple moltypes
     c = runtmp
 
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    c.run_sourmash('sketch', 'dna', '-p', 'k=21,num=500', testdata1)
-    c.run_sourmash('sketch', 'translate', '-p', 'k=21,num=500', testdata2)
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    c.run_sourmash("sketch", "dna", "-p", "k=21,num=500", testdata1)
+    c.run_sourmash("sketch", "translate", "-p", "k=21,num=500", testdata2)
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('compare', 'short.fa.sig', 'short2.fa.sig', '--csv', 'xxx',
-                       fail_ok=True)
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash(
+            "compare", "short.fa.sig", "short2.fa.sig", "--csv", "xxx", fail_ok=True
+        )
 
     assert c.last_result.status == -1
     print(c.last_result.err)
-    assert 'multiple molecule types loaded;' in c.last_result.err
+    assert "multiple molecule types loaded;" in c.last_result.err
 
 
 def test_compare_dayhoff(runtmp):
     # test 'compare' works with dayhoff moltype
     c = runtmp
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    c.run_sourmash('sketch', 'translate', '-p', 'k=21,num=500', '--dayhoff', testdata1)
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    c.run_sourmash("sketch", "translate", "-p", "k=21,num=500", "--dayhoff", testdata1)
     assert c.last_result.status == 0
 
-    c.run_sourmash('sketch', 'translate', '-p', 'k=21,num=500', '--dayhoff', testdata2)
+    c.run_sourmash("sketch", "translate", "-p", "k=21,num=500", "--dayhoff", testdata2)
     assert c.last_result.status == 0
 
-    c.run_sourmash('compare', 'short.fa.sig', 'short2.fa.sig',
-                   '--dayhoff', '--csv', 'xxx')
-    true_out = '''[1.   0.94]
+    c.run_sourmash(
+        "compare", "short.fa.sig", "short2.fa.sig", "--dayhoff", "--csv", "xxx"
+    )
+    true_out = """[1.   0.94]
 [0.94 1.  ]
-min similarity in matrix: 0.940'''.splitlines()
+min similarity in matrix: 0.940""".splitlines()
     for line in c.last_result.out:
-        cleaned_line = line.split('...')[-1].strip()
+        cleaned_line = line.split("...")[-1].strip()
         cleaned_line in true_out
     assert c.last_result.status == 0
 
@@ -584,21 +601,20 @@ def test_compare_dayhoff(runtmp):
 def test_compare_hp(runtmp):
     # test that 'compare' works with --hp moltype
     c = runtmp
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    c.run_sourmash('sketch', 'translate', '-p', 'k=21,num=500', '--hp', testdata1)
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    c.run_sourmash("sketch", "translate", "-p", "k=21,num=500", "--hp", testdata1)
     assert c.last_result.status == 0
 
-    c.run_sourmash('sketch', 'translate', '-p', 'k=21,num=500', '--hp', testdata2)
+    c.run_sourmash("sketch", "translate", "-p", "k=21,num=500", "--hp", testdata2)
     assert c.last_result.status == 0
 
-    c.run_sourmash('compare', 'short.fa.sig',
-                   'short2.fa.sig', '--hp',  '--csv', 'xxx')
-    true_out = '''[1.   0.94]
+    c.run_sourmash("compare", "short.fa.sig", "short2.fa.sig", "--hp", "--csv", "xxx")
+    true_out = """[1.   0.94]
 [0.94 1.  ]
-min similarity in matrix: 0.940'''.splitlines()
+min similarity in matrix: 0.940""".splitlines()
     for line in c.last_result.out:
-        cleaned_line = line.split('...')[-1].strip()
+        cleaned_line = line.split("...")[-1].strip()
         cleaned_line in true_out
     assert c.last_result.status == 0
 
@@ -607,7 +623,7 @@ def _load_compare_matrix_and_sigs(compare_csv, sigfiles, *, ksize=31):
     # load in the output of 'compare' together with sigs
 
     # load compare CSV
-    with open(compare_csv, 'rt', newline="") as fp:
+    with open(compare_csv, newline="") as fp:
         r = iter(csv.reader(fp))
         headers = next(r)
 
@@ -619,7 +635,7 @@ def _load_compare_matrix_and_sigs(compare_csv, sigfiles, *, ksize=31):
         print(mat)
 
     # load in all the input signatures
-    idx_to_sig = dict()
+    idx_to_sig = {}
     for idx, filename in enumerate(sigfiles):
         ss = sourmash.load_one_signature(filename, ksize=ksize)
         idx_to_sig[idx] = ss
@@ -631,15 +647,17 @@ def test_compare_containment(runtmp):
     # test compare --containment
     c = runtmp
 
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    c.run_sourmash('compare', '--containment', '-k', '31',
-                   '--csv', 'output.csv', *testdata_sigs)
+    c.run_sourmash(
+        "compare", "--containment", "-k", "31", "--csv", "output.csv", *testdata_sigs
+    )
 
     # load the matrix output
-    mat, idx_to_sig = _load_compare_matrix_and_sigs(c.output('output.csv'),
-                                                    testdata_sigs)
+    mat, idx_to_sig = _load_compare_matrix_and_sigs(
+        c.output("output.csv"), testdata_sigs
+    )
 
     # check explicit containment against output of compare
     for i in range(len(idx_to_sig)):
@@ -657,15 +675,24 @@ def test_compare_containment_distance(runtmp):
     # test compare --containment --distance-matrix
     c = runtmp
 
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    c.run_sourmash('compare', '--containment', '--distance-matrix', '-k', '31',
-                   '--csv', 'output.csv', *testdata_sigs)
+    c.run_sourmash(
+        "compare",
+        "--containment",
+        "--distance-matrix",
+        "-k",
+        "31",
+        "--csv",
+        "output.csv",
+        *testdata_sigs,
+    )
 
     # load the matrix output
-    mat, idx_to_sig = _load_compare_matrix_and_sigs(c.output('output.csv'),
-                                                    testdata_sigs)
+    mat, idx_to_sig = _load_compare_matrix_and_sigs(
+        c.output("output.csv"), testdata_sigs
+    )
 
     # check explicit containment against output of compare
     for i in range(len(idx_to_sig)):
@@ -683,15 +710,23 @@ def test_compare_max_containment(runtmp):
     # test compare --max-containment
 
     c = runtmp
-    testdata_glob = utils.get_test_data('scaled/*.sig')
+    testdata_glob = utils.get_test_data("scaled/*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    c.run_sourmash('compare', '--max-containment', '-k', '31',
-                   '--csv', 'output.csv', *testdata_sigs)
+    c.run_sourmash(
+        "compare",
+        "--max-containment",
+        "-k",
+        "31",
+        "--csv",
+        "output.csv",
+        *testdata_sigs,
+    )
 
     # load the matrix output
-    mat, idx_to_sig = _load_compare_matrix_and_sigs(c.output('output.csv'),
-                                                    testdata_sigs)
+    mat, idx_to_sig = _load_compare_matrix_and_sigs(
+        c.output("output.csv"), testdata_sigs
+    )
 
     # check explicit containment against output of compare
     for i in range(len(idx_to_sig)):
@@ -709,15 +744,23 @@ def test_compare_avg_containment(runtmp):
     # test compare --avg-containment
     c = runtmp
 
-    testdata_glob = utils.get_test_data('scaled/*.sig')
+    testdata_glob = utils.get_test_data("scaled/*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    c.run_sourmash('compare', '--avg-containment', '-k', '31',
-                   '--csv', 'output.csv', *testdata_sigs)
+    c.run_sourmash(
+        "compare",
+        "--avg-containment",
+        "-k",
+        "31",
+        "--csv",
+        "output.csv",
+        *testdata_sigs,
+    )
 
     # load the matrix output
-    mat, idx_to_sig = _load_compare_matrix_and_sigs(c.output('output.csv'),
-                                                    testdata_sigs)
+    mat, idx_to_sig = _load_compare_matrix_and_sigs(
+        c.output("output.csv"), testdata_sigs
+    )
 
     # check explicit containment against output of compare
     for i in range(len(idx_to_sig)):
@@ -735,93 +778,125 @@ def test_compare_max_containment_and_containment(runtmp):
     # make sure that can't specify both --max-containment and --containment
     c = runtmp
 
-    testdata_glob = utils.get_test_data('scaled/*.sig')
+    testdata_glob = utils.get_test_data("scaled/*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('compare', '--max-containment', '-k', '31',
-                       '--containment',
-                       '--csv', 'output.csv', *testdata_sigs)
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash(
+            "compare",
+            "--max-containment",
+            "-k",
+            "31",
+            "--containment",
+            "--csv",
+            "output.csv",
+            *testdata_sigs,
+        )
 
     print(c.last_result.err)
-    assert "ERROR: cannot specify more than one containment argument!" in c.last_result.err
+    assert (
+        "ERROR: cannot specify more than one containment argument!" in c.last_result.err
+    )
 
 
 def test_compare_avg_containment_and_containment(runtmp):
     # make sure that can't specify both --avg-containment and --containment
     c = runtmp
 
-    testdata_glob = utils.get_test_data('scaled/*.sig')
+    testdata_glob = utils.get_test_data("scaled/*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('compare', '--avg-containment', '-k', '31',
-                       '--containment',
-                       '--csv', 'output.csv', *testdata_sigs)
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash(
+            "compare",
+            "--avg-containment",
+            "-k",
+            "31",
+            "--containment",
+            "--csv",
+            "output.csv",
+            *testdata_sigs,
+        )
 
     print(c.last_result.err)
-    assert "ERROR: cannot specify more than one containment argument!" in c.last_result.err
+    assert (
+        "ERROR: cannot specify more than one containment argument!" in c.last_result.err
+    )
 
 
 def test_compare_avg_containment_and_max_containment(runtmp):
     # make sure that can't specify both --avg-containment and --max-containment
     c = runtmp
 
-    testdata_glob = utils.get_test_data('scaled/*.sig')
+    testdata_glob = utils.get_test_data("scaled/*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('compare', '--avg-containment', '-k', '31',
-                       '--max-containment',
-                       '--csv', 'output.csv', *testdata_sigs)
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash(
+            "compare",
+            "--avg-containment",
+            "-k",
+            "31",
+            "--max-containment",
+            "--csv",
+            "output.csv",
+            *testdata_sigs,
+        )
 
     print(c.last_result.err)
-    assert "ERROR: cannot specify more than one containment argument!" in c.last_result.err
+    assert (
+        "ERROR: cannot specify more than one containment argument!" in c.last_result.err
+    )
 
 
 def test_compare_containment_abund_flatten_warning(runtmp):
     # check warning message about ignoring abund signatures
 
-    c  = runtmp
-    s47 = utils.get_test_data('track_abund/47.fa.sig')
-    s63 = utils.get_test_data('track_abund/63.fa.sig')
+    c = runtmp
+    s47 = utils.get_test_data("track_abund/47.fa.sig")
+    s63 = utils.get_test_data("track_abund/63.fa.sig")
 
-    c.run_sourmash('compare', '--containment', '-k', '31', s47, s63)
+    c.run_sourmash("compare", "--containment", "-k", "31", s47, s63)
     print(c.last_result.out)
     print(c.last_result.err)
 
-    assert 'NOTE: --containment, --max-containment, --avg-containment, and --estimate-ani ignore signature abundances.' in \
-        c.last_result.err
+    assert (
+        "NOTE: --containment, --max-containment, --avg-containment, and --estimate-ani ignore signature abundances."
+        in c.last_result.err
+    )
 
 
 def test_compare_ani_abund_flatten(runtmp):
     # check warning message about ignoring abund signatures
 
     c = runtmp
-    s47 = utils.get_test_data('track_abund/47.fa.sig')
-    s63 = utils.get_test_data('track_abund/63.fa.sig')
+    s47 = utils.get_test_data("track_abund/47.fa.sig")
+    s63 = utils.get_test_data("track_abund/63.fa.sig")
 
-    c.run_sourmash('compare', '--estimate-ani', '-k', '31', s47, s63)
+    c.run_sourmash("compare", "--estimate-ani", "-k", "31", s47, s63)
     print(c.last_result.out)
     print(c.last_result.err)
 
-    assert 'NOTE: --containment, --max-containment, --avg-containment, and --estimate-ani ignore signature abundances.' in \
-        c.last_result.err
+    assert (
+        "NOTE: --containment, --max-containment, --avg-containment, and --estimate-ani ignore signature abundances."
+        in c.last_result.err
+    )
 
 
 def test_compare_containment_require_scaled(runtmp):
     # check warning message about scaled signatures & containment
     c = runtmp
 
-    s47 = utils.get_test_data('num/47.fa.sig')
-    s63 = utils.get_test_data('num/63.fa.sig')
+    s47 = utils.get_test_data("num/47.fa.sig")
+    s63 = utils.get_test_data("num/63.fa.sig")
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('compare', '--containment', '-k', '31', s47, s63,
-                       fail_ok=True)
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash("compare", "--containment", "-k", "31", s47, s63, fail_ok=True)
 
-    assert 'must use scaled signatures with --containment, --max-containment, and --avg-containment' in \
-        c.last_result.err
+    assert (
+        "must use scaled signatures with --containment, --max-containment, and --avg-containment"
+        in c.last_result.err
+    )
     assert c.last_result.status != 0
 
 
@@ -829,13 +904,13 @@ def test_do_plot_comparison(runtmp):
     # make sure 'plot' outputs files ;)
     c = runtmp
 
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    c.run_sourmash('sketch', 'dna', '-p', 'k=31,num=500', testdata1, testdata2)
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    c.run_sourmash("sketch", "dna", "-p", "k=31,num=500", testdata1, testdata2)
 
-    c.run_sourmash('compare', 'short.fa.sig', 'short2.fa.sig', '-o', 'cmp')
+    c.run_sourmash("compare", "short.fa.sig", "short2.fa.sig", "-o", "cmp")
 
-    c.run_sourmash('plot', 'cmp')
+    c.run_sourmash("plot", "cmp")
 
     assert os.path.exists(c.output("cmp.dendro.png"))
     assert os.path.exists(c.output("cmp.matrix.png"))
@@ -845,13 +920,13 @@ def test_do_plot_comparison_2_pdf(runtmp):
     # test plot --pdf
     c = runtmp
 
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    c.run_sourmash('sketch', 'translate', '-p', 'k=31,num=500', testdata1, testdata2)
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    c.run_sourmash("sketch", "translate", "-p", "k=31,num=500", testdata1, testdata2)
 
-    c.run_sourmash('compare', 'short.fa.sig', 'short2.fa.sig', '-o', 'cmp')
+    c.run_sourmash("compare", "short.fa.sig", "short2.fa.sig", "-o", "cmp")
 
-    c.run_sourmash('plot', 'cmp', '--pdf')
+    c.run_sourmash("plot", "cmp", "--pdf")
     assert os.path.exists(c.output("cmp.dendro.pdf"))
     assert os.path.exists(c.output("cmp.matrix.pdf"))
 
@@ -860,13 +935,13 @@ def test_do_plot_comparison_3(runtmp):
     # test plot --labels
     c = runtmp
 
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    c.run_sourmash('sketch', 'translate', '-p', 'k=31,num=500', testdata1, testdata2)
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    c.run_sourmash("sketch", "translate", "-p", "k=31,num=500", testdata1, testdata2)
 
-    c.run_sourmash('compare', 'short.fa.sig', 'short2.fa.sig', '-o', 'cmp')
+    c.run_sourmash("compare", "short.fa.sig", "short2.fa.sig", "-o", "cmp")
 
-    c.run_sourmash('plot', 'cmp', '--labels')
+    c.run_sourmash("plot", "cmp", "--labels")
 
     assert os.path.exists(c.output("cmp.dendro.png"))
     assert os.path.exists(c.output("cmp.matrix.png"))
@@ -876,15 +951,15 @@ def test_do_plot_comparison_4_output_dir(runtmp):
     # test plot --output-dir
     c = runtmp
 
-    output_dir = c.output('xyz_test')
+    output_dir = c.output("xyz_test")
 
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    c.run_sourmash('sketch', 'translate', '-p', 'k=31,num=500', testdata1, testdata2)
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    c.run_sourmash("sketch", "translate", "-p", "k=31,num=500", testdata1, testdata2)
 
-    c.run_sourmash('compare', 'short.fa.sig', 'short2.fa.sig', '-o', 'cmp')
+    c.run_sourmash("compare", "short.fa.sig", "short2.fa.sig", "-o", "cmp")
 
-    c.run_sourmash('plot', 'cmp', '--labels', '--output-dir', output_dir)
+    c.run_sourmash("plot", "cmp", "--labels", "--output-dir", output_dir)
 
     assert os.path.exists(os.path.join(output_dir, "cmp.dendro.png"))
     assert os.path.exists(os.path.join(output_dir, "cmp.matrix.png"))
@@ -896,13 +971,13 @@ def test_do_plot_comparison_5_force(runtmp):
 
     D = numpy.zeros([2, 2])
     D[0, 0] = 5
-    with open(c.output('cmp'), 'wb') as fp:
+    with open(c.output("cmp"), "wb") as fp:
         numpy.save(fp, D)
 
-    with open(c.output('cmp.labels.txt'), 'wt') as fp:
+    with open(c.output("cmp.labels.txt"), "w") as fp:
         fp.write("a\nb\n")
 
-    c.run_sourmash('plot', 'cmp', '--labels', '-f')
+    c.run_sourmash("plot", "cmp", "--labels", "-f")
     print(c.last_result.status, c.last_result.out, c.last_result.err)
     assert c.last_result.status == 0
 
@@ -913,14 +988,14 @@ def test_do_plot_comparison_4_fail_not_distance(runtmp):
 
     D = numpy.zeros([2, 2])
     D[0, 0] = 5
-    with open(c.output('cmp'), 'wb') as fp:
+    with open(c.output("cmp"), "wb") as fp:
         numpy.save(fp, D)
 
-    with open(c.output('cmp.labels.txt'), 'wt') as fp:
+    with open(c.output("cmp.labels.txt"), "w") as fp:
         fp.write("a\nb\n")
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('plot', 'cmp', '--labels', fail_ok=True)
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash("plot", "cmp", "--labels", fail_ok=True)
 
     print(c.last_result.status, c.last_result.out, c.last_result.err)
     assert c.last_result.status != 0
@@ -928,14 +1003,25 @@ def test_do_plot_comparison_4_fail_not_distance(runtmp):
 
 def test_plot_6_labels_default(runtmp):
     # plot --labels is default
-    testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
-    testdata2 = utils.get_test_data('genome-s11.fa.gz.sig')
-    testdata3 = utils.get_test_data('genome-s12.fa.gz.sig')
-    testdata4 = utils.get_test_data('genome-s10+s11.sig')
-
-    runtmp.run_sourmash('compare', testdata1, testdata2, testdata3, testdata4, '-o', 'cmp', '-k', '21', '--dna')
-
-    runtmp.sourmash('plot', 'cmp', '--labels')
+    testdata1 = utils.get_test_data("genome-s10.fa.gz.sig")
+    testdata2 = utils.get_test_data("genome-s11.fa.gz.sig")
+    testdata3 = utils.get_test_data("genome-s12.fa.gz.sig")
+    testdata4 = utils.get_test_data("genome-s10+s11.sig")
+
+    runtmp.run_sourmash(
+        "compare",
+        testdata1,
+        testdata2,
+        testdata3,
+        testdata4,
+        "-o",
+        "cmp",
+        "-k",
+        "21",
+        "--dna",
+    )
+
+    runtmp.sourmash("plot", "cmp", "--labels")
 
     print(runtmp.last_result.out)
 
@@ -949,14 +1035,25 @@ def test_plot_6_labels_default(runtmp):
 
 def test_plot_6_labels(runtmp):
     # specifing --labels gives the right result
-    testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
-    testdata2 = utils.get_test_data('genome-s11.fa.gz.sig')
-    testdata3 = utils.get_test_data('genome-s12.fa.gz.sig')
-    testdata4 = utils.get_test_data('genome-s10+s11.sig')
-
-    runtmp.run_sourmash('compare', testdata1, testdata2, testdata3, testdata4, '-o', 'cmp', '-k', '21', '--dna')
-
-    runtmp.sourmash('plot', 'cmp', '--labels')
+    testdata1 = utils.get_test_data("genome-s10.fa.gz.sig")
+    testdata2 = utils.get_test_data("genome-s11.fa.gz.sig")
+    testdata3 = utils.get_test_data("genome-s12.fa.gz.sig")
+    testdata4 = utils.get_test_data("genome-s10+s11.sig")
+
+    runtmp.run_sourmash(
+        "compare",
+        testdata1,
+        testdata2,
+        testdata3,
+        testdata4,
+        "-o",
+        "cmp",
+        "-k",
+        "21",
+        "--dna",
+    )
+
+    runtmp.sourmash("plot", "cmp", "--labels")
 
     print(runtmp.last_result.out)
 
@@ -970,14 +1067,25 @@ def test_plot_6_labels(runtmp):
 
 def test_plot_6_indices(runtmp):
     # test plot --indices
-    testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
-    testdata2 = utils.get_test_data('genome-s11.fa.gz.sig')
-    testdata3 = utils.get_test_data('genome-s12.fa.gz.sig')
-    testdata4 = utils.get_test_data('genome-s10+s11.sig')
-
-    runtmp.run_sourmash('compare', testdata1, testdata2, testdata3, testdata4, '-o', 'cmp', '-k', '21', '--dna')
-
-    runtmp.sourmash('plot', 'cmp', '--indices')
+    testdata1 = utils.get_test_data("genome-s10.fa.gz.sig")
+    testdata2 = utils.get_test_data("genome-s11.fa.gz.sig")
+    testdata3 = utils.get_test_data("genome-s12.fa.gz.sig")
+    testdata4 = utils.get_test_data("genome-s10+s11.sig")
+
+    runtmp.run_sourmash(
+        "compare",
+        testdata1,
+        testdata2,
+        testdata3,
+        testdata4,
+        "-o",
+        "cmp",
+        "-k",
+        "21",
+        "--dna",
+    )
+
+    runtmp.sourmash("plot", "cmp", "--indices")
 
     print(runtmp.last_result.out)
 
@@ -991,14 +1099,25 @@ def test_plot_6_indices(runtmp):
 
 def test_plot_6_no_labels(runtmp):
     # test plot --no-labels
-    testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
-    testdata2 = utils.get_test_data('genome-s11.fa.gz.sig')
-    testdata3 = utils.get_test_data('genome-s12.fa.gz.sig')
-    testdata4 = utils.get_test_data('genome-s10+s11.sig')
-
-    runtmp.run_sourmash('compare', testdata1, testdata2, testdata3, testdata4, '-o', 'cmp', '-k', '21', '--dna')
-
-    runtmp.sourmash('plot', 'cmp', '--no-labels')
+    testdata1 = utils.get_test_data("genome-s10.fa.gz.sig")
+    testdata2 = utils.get_test_data("genome-s11.fa.gz.sig")
+    testdata3 = utils.get_test_data("genome-s12.fa.gz.sig")
+    testdata4 = utils.get_test_data("genome-s10+s11.sig")
+
+    runtmp.run_sourmash(
+        "compare",
+        testdata1,
+        testdata2,
+        testdata3,
+        testdata4,
+        "-o",
+        "cmp",
+        "-k",
+        "21",
+        "--dna",
+    )
+
+    runtmp.sourmash("plot", "cmp", "--no-labels")
 
     print(runtmp.last_result.out)
 
@@ -1012,14 +1131,25 @@ def test_plot_6_no_labels(runtmp):
 
 def test_plot_6_no_indices(runtmp):
     # test plot --no-labels
-    testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
-    testdata2 = utils.get_test_data('genome-s11.fa.gz.sig')
-    testdata3 = utils.get_test_data('genome-s12.fa.gz.sig')
-    testdata4 = utils.get_test_data('genome-s10+s11.sig')
-
-    runtmp.run_sourmash('compare', testdata1, testdata2, testdata3, testdata4, '-o', 'cmp', '-k', '21', '--dna')
-
-    runtmp.sourmash('plot', 'cmp', '--no-labels')
+    testdata1 = utils.get_test_data("genome-s10.fa.gz.sig")
+    testdata2 = utils.get_test_data("genome-s11.fa.gz.sig")
+    testdata3 = utils.get_test_data("genome-s12.fa.gz.sig")
+    testdata4 = utils.get_test_data("genome-s10+s11.sig")
+
+    runtmp.run_sourmash(
+        "compare",
+        testdata1,
+        testdata2,
+        testdata3,
+        testdata4,
+        "-o",
+        "cmp",
+        "-k",
+        "21",
+        "--dna",
+    )
+
+    runtmp.sourmash("plot", "cmp", "--no-labels")
 
     print(runtmp.last_result.out)
 
@@ -1033,14 +1163,25 @@ def test_plot_6_no_indices(runtmp):
 
 def test_plot_6_no_labels_no_indices(runtmp):
     # test plot --no-labels --no-indices
-    testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
-    testdata2 = utils.get_test_data('genome-s11.fa.gz.sig')
-    testdata3 = utils.get_test_data('genome-s12.fa.gz.sig')
-    testdata4 = utils.get_test_data('genome-s10+s11.sig')
-
-    runtmp.run_sourmash('compare', testdata1, testdata2, testdata3, testdata4, '-o', 'cmp', '-k', '21', '--dna')
-
-    runtmp.sourmash('plot', 'cmp', '--no-labels', '--no-indices')
+    testdata1 = utils.get_test_data("genome-s10.fa.gz.sig")
+    testdata2 = utils.get_test_data("genome-s11.fa.gz.sig")
+    testdata3 = utils.get_test_data("genome-s12.fa.gz.sig")
+    testdata4 = utils.get_test_data("genome-s10+s11.sig")
+
+    runtmp.run_sourmash(
+        "compare",
+        testdata1,
+        testdata2,
+        testdata3,
+        testdata4,
+        "-o",
+        "cmp",
+        "-k",
+        "21",
+        "--dna",
+    )
+
+    runtmp.sourmash("plot", "cmp", "--no-labels", "--no-indices")
 
     print((runtmp.last_result.out,))
 
@@ -1054,14 +1195,25 @@ def test_plot_6_no_labels_no_indices(runtmp):
 
 def test_plot_6_indices_labels(runtmp):
     # check that --labels --indices => --labels
-    testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
-    testdata2 = utils.get_test_data('genome-s11.fa.gz.sig')
-    testdata3 = utils.get_test_data('genome-s12.fa.gz.sig')
-    testdata4 = utils.get_test_data('genome-s10+s11.sig')
-
-    runtmp.run_sourmash('compare', testdata1, testdata2, testdata3, testdata4, '-o', 'cmp', '-k', '21', '--dna')
-
-    runtmp.sourmash('plot', 'cmp', '--labels', '--indices')
+    testdata1 = utils.get_test_data("genome-s10.fa.gz.sig")
+    testdata2 = utils.get_test_data("genome-s11.fa.gz.sig")
+    testdata3 = utils.get_test_data("genome-s12.fa.gz.sig")
+    testdata4 = utils.get_test_data("genome-s10+s11.sig")
+
+    runtmp.run_sourmash(
+        "compare",
+        testdata1,
+        testdata2,
+        testdata3,
+        testdata4,
+        "-o",
+        "cmp",
+        "-k",
+        "21",
+        "--dna",
+    )
+
+    runtmp.sourmash("plot", "cmp", "--labels", "--indices")
 
     print(runtmp.last_result.out)
 
@@ -1075,21 +1227,32 @@ def test_plot_6_indices_labels(runtmp):
 
 def test_plot_override_labeltext(runtmp):
     # test overriding labeltext
-    testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
-    testdata2 = utils.get_test_data('genome-s11.fa.gz.sig')
-    testdata3 = utils.get_test_data('genome-s12.fa.gz.sig')
-    testdata4 = utils.get_test_data('genome-s10+s11.sig')
-
-    runtmp.run_sourmash('compare', testdata1, testdata2, testdata3, testdata4, '-o', 'cmp', '-k', '21', '--dna')
-
-    with open(runtmp.output('new.labels.txt'), 'wt') as fp:
-        fp.write('a\nb\nc\nd\n')
-
-    runtmp.sourmash('plot', 'cmp', '--labeltext', 'new.labels.txt')
+    testdata1 = utils.get_test_data("genome-s10.fa.gz.sig")
+    testdata2 = utils.get_test_data("genome-s11.fa.gz.sig")
+    testdata3 = utils.get_test_data("genome-s12.fa.gz.sig")
+    testdata4 = utils.get_test_data("genome-s10+s11.sig")
+
+    runtmp.run_sourmash(
+        "compare",
+        testdata1,
+        testdata2,
+        testdata3,
+        testdata4,
+        "-o",
+        "cmp",
+        "-k",
+        "21",
+        "--dna",
+    )
+
+    with open(runtmp.output("new.labels.txt"), "w") as fp:
+        fp.write("a\nb\nc\nd\n")
+
+    runtmp.sourmash("plot", "cmp", "--labeltext", "new.labels.txt")
 
     print(runtmp.last_result.out)
 
-    assert 'loading labels from new.labels.txt' in runtmp.last_result.err
+    assert "loading labels from new.labels.txt" in runtmp.last_result.err
 
     expected = """\
 0\ta
@@ -1101,46 +1264,59 @@ def test_plot_override_labeltext(runtmp):
 
 def test_plot_override_labeltext_fail(runtmp):
     # test failed override of labeltext
-    testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
-    testdata2 = utils.get_test_data('genome-s11.fa.gz.sig')
-    testdata3 = utils.get_test_data('genome-s12.fa.gz.sig')
-    testdata4 = utils.get_test_data('genome-s10+s11.sig')
-
-    runtmp.sourmash('compare', testdata1, testdata2, testdata3, testdata4, '-o', 'cmp', '-k', '21', '--dna')
-
-    with open(runtmp.output('new.labels.txt'), 'wt') as fp:
-        fp.write('a\nb\nc\n')
+    testdata1 = utils.get_test_data("genome-s10.fa.gz.sig")
+    testdata2 = utils.get_test_data("genome-s11.fa.gz.sig")
+    testdata3 = utils.get_test_data("genome-s12.fa.gz.sig")
+    testdata4 = utils.get_test_data("genome-s10+s11.sig")
+
+    runtmp.sourmash(
+        "compare",
+        testdata1,
+        testdata2,
+        testdata3,
+        testdata4,
+        "-o",
+        "cmp",
+        "-k",
+        "21",
+        "--dna",
+    )
+
+    with open(runtmp.output("new.labels.txt"), "w") as fp:
+        fp.write("a\nb\nc\n")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('plot', 'cmp', '--labeltext', 'new.labels.txt')
+        runtmp.sourmash("plot", "cmp", "--labeltext", "new.labels.txt")
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
     assert runtmp.last_result.status != 0
-    assert 'loading labels from new.labels.txt' in runtmp.last_result.err
-    assert '3 labels != matrix size, exiting' in runtmp.last_result.err
+    assert "loading labels from new.labels.txt" in runtmp.last_result.err
+    assert "3 labels != matrix size, exiting" in runtmp.last_result.err
 
 
 def test_plot_reordered_labels_csv(runtmp):
     # test 'plot --csv' & correct ordering of labels
     c = runtmp
 
-    ss2 = utils.get_test_data('2.fa.sig')
-    ss47 = utils.get_test_data('47.fa.sig')
-    ss63 = utils.get_test_data('63.fa.sig')
+    ss2 = utils.get_test_data("2.fa.sig")
+    ss47 = utils.get_test_data("47.fa.sig")
+    ss63 = utils.get_test_data("63.fa.sig")
 
-    c.run_sourmash('compare', '-k', '31', '-o', 'cmp', ss2, ss47, ss63)
-    c.run_sourmash('plot', 'cmp', '--csv', 'neworder.csv')
+    c.run_sourmash("compare", "-k", "31", "-o", "cmp", ss2, ss47, ss63)
+    c.run_sourmash("plot", "cmp", "--csv", "neworder.csv")
 
-    with open(c.output('neworder.csv'), newline="") as fp:
+    with open(c.output("neworder.csv"), newline="") as fp:
         r = csv.DictReader(fp)
 
         akker_vals = set()
         for row in r:
-            akker_vals.add(row['CP001071.1 Akkermansia muciniphila ATCC BAA-835, complete genome'])
+            akker_vals.add(
+                row["CP001071.1 Akkermansia muciniphila ATCC BAA-835, complete genome"]
+            )
 
-        assert '1.0' in akker_vals
-        assert '0.0' in akker_vals
+        assert "1.0" in akker_vals
+        assert "0.0" in akker_vals
         assert len(akker_vals) == 2
 
 
@@ -1148,35 +1324,48 @@ def test_plot_reordered_labels_csv_gz(runtmp):
     # test 'plot --csv' with a .gz output
     c = runtmp
 
-    ss2 = utils.get_test_data('2.fa.sig')
-    ss47 = utils.get_test_data('47.fa.sig')
-    ss63 = utils.get_test_data('63.fa.sig')
+    ss2 = utils.get_test_data("2.fa.sig")
+    ss47 = utils.get_test_data("47.fa.sig")
+    ss63 = utils.get_test_data("63.fa.sig")
 
-    c.run_sourmash('compare', '-k', '31', '-o', 'cmp', ss2, ss47, ss63)
-    c.run_sourmash('plot', 'cmp', '--csv', 'neworder.csv.gz')
+    c.run_sourmash("compare", "-k", "31", "-o", "cmp", ss2, ss47, ss63)
+    c.run_sourmash("plot", "cmp", "--csv", "neworder.csv.gz")
 
-    with gzip.open(c.output('neworder.csv.gz'), 'rt', newline="") as fp:
+    with gzip.open(c.output("neworder.csv.gz"), "rt", newline="") as fp:
         r = csv.DictReader(fp)
 
         akker_vals = set()
         for row in r:
-            akker_vals.add(row['CP001071.1 Akkermansia muciniphila ATCC BAA-835, complete genome'])
+            akker_vals.add(
+                row["CP001071.1 Akkermansia muciniphila ATCC BAA-835, complete genome"]
+            )
 
-        assert '1.0' in akker_vals
-        assert '0.0' in akker_vals
+        assert "1.0" in akker_vals
+        assert "0.0" in akker_vals
         assert len(akker_vals) == 2
 
 
 def test_plot_subsample_1(runtmp):
     # test plotting with --subsample
-    testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
-    testdata2 = utils.get_test_data('genome-s11.fa.gz.sig')
-    testdata3 = utils.get_test_data('genome-s12.fa.gz.sig')
-    testdata4 = utils.get_test_data('genome-s10+s11.sig')
-
-    runtmp.sourmash('compare', testdata1, testdata2, testdata3, testdata4, '-o', 'cmp', '-k', '21', '--dna')
-
-    runtmp.sourmash('plot', 'cmp', '--subsample', '3')
+    testdata1 = utils.get_test_data("genome-s10.fa.gz.sig")
+    testdata2 = utils.get_test_data("genome-s11.fa.gz.sig")
+    testdata3 = utils.get_test_data("genome-s12.fa.gz.sig")
+    testdata4 = utils.get_test_data("genome-s10+s11.sig")
+
+    runtmp.sourmash(
+        "compare",
+        testdata1,
+        testdata2,
+        testdata3,
+        testdata4,
+        "-o",
+        "cmp",
+        "-k",
+        "21",
+        "--dna",
+    )
+
+    runtmp.sourmash("plot", "cmp", "--subsample", "3")
 
     print(runtmp.last_result.out)
 
@@ -1189,14 +1378,25 @@ def test_plot_subsample_1(runtmp):
 
 def test_plot_subsample_2(runtmp):
     # test plotting --subsample with --subsample-seed
-    testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
-    testdata2 = utils.get_test_data('genome-s11.fa.gz.sig')
-    testdata3 = utils.get_test_data('genome-s12.fa.gz.sig')
-    testdata4 = utils.get_test_data('genome-s10+s11.sig')
-
-    runtmp.sourmash('compare', testdata1, testdata2, testdata3, testdata4, '-o', 'cmp', '-k', '21', '--dna')
-
-    runtmp.sourmash('plot', 'cmp', '--subsample', '3', '--subsample-seed=2')
+    testdata1 = utils.get_test_data("genome-s10.fa.gz.sig")
+    testdata2 = utils.get_test_data("genome-s11.fa.gz.sig")
+    testdata3 = utils.get_test_data("genome-s12.fa.gz.sig")
+    testdata4 = utils.get_test_data("genome-s10+s11.sig")
+
+    runtmp.sourmash(
+        "compare",
+        testdata1,
+        testdata2,
+        testdata3,
+        testdata4,
+        "-o",
+        "cmp",
+        "-k",
+        "21",
+        "--dna",
+    )
+
+    runtmp.sourmash("plot", "cmp", "--subsample", "3", "--subsample-seed=2")
 
     print(runtmp.last_result.out)
     expected = """\
@@ -1208,25 +1408,25 @@ def test_plot_subsample_2(runtmp):
 
 @utils.in_tempdir
 def test_search_query_sig_does_not_exist(c):
-    testdata1 = utils.get_test_data('short.fa')
-    c.run_sourmash('sketch', 'translate', '-p', 'k=31,num=500', testdata1)
+    testdata1 = utils.get_test_data("short.fa")
+    c.run_sourmash("sketch", "translate", "-p", "k=31,num=500", testdata1)
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('search', 'short2.fa.sig', 'short.fa.sig', fail_ok=True)
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash("search", "short2.fa.sig", "short.fa.sig", fail_ok=True)
 
     print(c.last_result.status, c.last_result.out, c.last_result.err)
     assert c.last_result.status == -1
     assert "Cannot open query file 'short2.fa.sig'" in c.last_result.err
-    assert len(c.last_result.err.split('\n\r')) < 5
+    assert len(c.last_result.err.split("\n\r")) < 5
 
 
 @utils.in_tempdir
 def test_search_subject_sig_does_not_exist(c):
-    testdata1 = utils.get_test_data('short.fa')
-    c.run_sourmash('sketch', 'translate', '-p', 'k=31,num=500', testdata1)
+    testdata1 = utils.get_test_data("short.fa")
+    c.run_sourmash("sketch", "translate", "-p", "k=31,num=500", testdata1)
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('search', 'short.fa.sig', 'short2.fa.sig', fail_ok=True)
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash("search", "short.fa.sig", "short2.fa.sig", fail_ok=True)
 
     print(c.last_result.status, c.last_result.out, c.last_result.err)
     assert c.last_result.status == -1
@@ -1235,12 +1435,13 @@ def test_search_subject_sig_does_not_exist(c):
 
 @utils.in_tempdir
 def test_search_second_subject_sig_does_not_exist(c):
-    testdata1 = utils.get_test_data('short.fa')
-    c.run_sourmash('sketch', 'translate', '-p', 'k=31,num=500', testdata1)
+    testdata1 = utils.get_test_data("short.fa")
+    c.run_sourmash("sketch", "translate", "-p", "k=31,num=500", testdata1)
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('search', 'short.fa.sig', 'short.fa.sig',
-                       'short2.fa.sig', fail_ok=True)
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash(
+            "search", "short.fa.sig", "short.fa.sig", "short2.fa.sig", fail_ok=True
+        )
 
     print(c.last_result.status, c.last_result.out, c.last_result.err)
     assert c.last_result.status == -1
@@ -1249,35 +1450,35 @@ def test_search_second_subject_sig_does_not_exist(c):
 
 @utils.in_tempdir
 def test_search(c):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    c.run_sourmash('sketch', 'dna', '-p', 'k=31,num=500', testdata1, testdata2)
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    c.run_sourmash("sketch", "dna", "-p", "k=31,num=500", testdata1, testdata2)
 
-    c.run_sourmash('search', 'short.fa.sig', 'short2.fa.sig')
+    c.run_sourmash("search", "short.fa.sig", "short2.fa.sig")
     print(c.last_result.status, c.last_result.out, c.last_result.err)
-    assert '1 matches' in c.last_result.out
-    assert '93.0%' in c.last_result.out
+    assert "1 matches" in c.last_result.out
+    assert "93.0%" in c.last_result.out
 
 
 def test_search_ignore_abundance(runtmp):
     # note: uses num signatures.
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    runtmp.sourmash('sketch', 'dna', '-p','k=31,num=500,abund', testdata1, testdata2)
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,num=500,abund", testdata1, testdata2)
 
     # Make sure there's different percent matches when using or
     # not using abundance
-    runtmp.sourmash('search', 'short.fa.sig', 'short2.fa.sig')
+    runtmp.sourmash("search", "short.fa.sig", "short2.fa.sig")
     out1 = runtmp.last_result.out
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '1 matches' in runtmp.last_result.out
-    assert '81.5%' in runtmp.last_result.out
+    assert "1 matches" in runtmp.last_result.out
+    assert "81.5%" in runtmp.last_result.out
 
-    runtmp.sourmash('search', '--ignore-abundance', 'short.fa.sig', 'short2.fa.sig')
+    runtmp.sourmash("search", "--ignore-abundance", "short.fa.sig", "short2.fa.sig")
     out2 = runtmp.last_result.out
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '1 matches' in runtmp.last_result.out
-    assert '93.0%' in runtmp.last_result.out
+    assert "1 matches" in runtmp.last_result.out
+    assert "93.0%" in runtmp.last_result.out
 
     # Make sure results are different!
     assert out1 != out2
@@ -1285,102 +1486,104 @@ def test_search_ignore_abundance(runtmp):
 
 def test_search_abund_subj_flat(runtmp):
     # test Index.search_abund requires an abund subj
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.sourmash('search', sig47, sig63)
+        runtmp.sourmash("search", sig47, sig63)
 
-    assert "'search_abund' requires subject signatures with abundance information" in str(exc.value)
+    assert (
+        "'search_abund' requires subject signatures with abundance information"
+        in str(exc.value)
+    )
 
 
 def test_search_abund_csv(runtmp):
     # test search with abundance signatures, look at CSV output
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    runtmp.sourmash('sketch', 'dna', '-p','k=31,scaled=1,abund', testdata1, testdata2)
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,scaled=1,abund", testdata1, testdata2)
 
-    runtmp.sourmash('search', 'short.fa.sig', 'short2.fa.sig', '-o', 'xxx.csv')
-    out1 = runtmp.last_result.out
+    runtmp.sourmash("search", "short.fa.sig", "short2.fa.sig", "-o", "xxx.csv")
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '1 matches' in runtmp.last_result.out
-    assert '82.7%' in runtmp.last_result.out
+    assert "1 matches" in runtmp.last_result.out
+    assert "82.7%" in runtmp.last_result.out
 
-    with open(runtmp.output('xxx.csv'), newline="") as fp:
+    with open(runtmp.output("xxx.csv"), newline="") as fp:
         r = csv.DictReader(fp)
         row = next(r)
 
         print(row)
 
-        assert float(row['similarity']) == 0.8266277454288367
-        assert row['md5'] == 'bf752903d635b1eb83c53fe4aae951db'
-        assert row['filename'].endswith('short2.fa.sig')
-        assert row['md5'] == 'bf752903d635b1eb83c53fe4aae951db'
-        assert row['query_filename'].endswith('short.fa')
-        assert row['query_name'] == ''
-        assert row['query_md5'] == '9191284a'
-        assert row['filename'] == 'short2.fa.sig', row['filename']
+        assert float(row["similarity"]) == 0.8266277454288367
+        assert row["md5"] == "bf752903d635b1eb83c53fe4aae951db"
+        assert row["filename"].endswith("short2.fa.sig")
+        assert row["md5"] == "bf752903d635b1eb83c53fe4aae951db"
+        assert row["query_filename"].endswith("short.fa")
+        assert row["query_name"] == ""
+        assert row["query_md5"] == "9191284a"
+        assert row["filename"] == "short2.fa.sig", row["filename"]
 
 
 @utils.in_tempdir
 def test_search_csv(c):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    c.run_sourmash('sketch', 'dna', '-p', 'k=31,num=500', testdata1, testdata2)
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    c.run_sourmash("sketch", "dna", "-p", "k=31,num=500", testdata1, testdata2)
 
-    c.run_sourmash('search', 'short.fa.sig', 'short2.fa.sig', '-o', 'xxx.csv')
+    c.run_sourmash("search", "short.fa.sig", "short2.fa.sig", "-o", "xxx.csv")
     print(c.last_result.status, c.last_result.out, c.last_result.err)
 
-    csv_file = c.output('xxx.csv')
+    csv_file = c.output("xxx.csv")
 
     with open(csv_file) as fp:
         reader = csv.DictReader(fp)
         row = next(reader)
         print(row)
-        assert float(row['similarity']) == 0.93
-        assert row['filename'].endswith('short2.fa.sig')
-        assert row['md5'] == '914591cd1130aa915fe0c0c63db8f19d'
-        assert row['query_filename'].endswith('short.fa')
-        assert row['query_name'] == ''
-        assert row['query_md5'] == 'e26a306d'
+        assert float(row["similarity"]) == 0.93
+        assert row["filename"].endswith("short2.fa.sig")
+        assert row["md5"] == "914591cd1130aa915fe0c0c63db8f19d"
+        assert row["query_filename"].endswith("short.fa")
+        assert row["query_name"] == ""
+        assert row["query_md5"] == "e26a306d"
 
 
 @utils.in_tempdir
 def test_search_lca_db(c):
     # can we do a 'sourmash search' on an LCA database?
-    query = utils.get_test_data('47.fa.sig')
-    lca_db = utils.get_test_data('lca/47+63.lca.json')
+    query = utils.get_test_data("47.fa.sig")
+    lca_db = utils.get_test_data("lca/47+63.lca.json")
 
-    c.run_sourmash('search', query, lca_db)
+    c.run_sourmash("search", query, lca_db)
     print(c)
-    assert 'NC_009665.1 Shewanella baltica OS185, complete genome' in str(c)
+    assert "NC_009665.1 Shewanella baltica OS185, complete genome" in str(c)
 
 
 def test_search_query_db_md5(runtmp):
     # pull a search query out of a database with an md5sum
-    db = utils.get_test_data('prot/protein.sbt.zip')
-    runtmp.run_sourmash('search', db, db, '--md5', '16869d2c8a1')
+    db = utils.get_test_data("prot/protein.sbt.zip")
+    runtmp.run_sourmash("search", db, db, "--md5", "16869d2c8a1")
 
-    assert '100.0%       GCA_001593925' in str(runtmp)
+    assert "100.0%       GCA_001593925" in str(runtmp)
 
 
 def test_gather_query_db_md5(runtmp, linear_gather, prefetch_gather):
     # pull a search query out of a database with an md5sum
-    db = utils.get_test_data('prot/protein.sbt.zip')
-    runtmp.run_sourmash('gather', db, db, '--md5', '16869d2c8a1',
-                        linear_gather, prefetch_gather)
+    db = utils.get_test_data("prot/protein.sbt.zip")
+    runtmp.run_sourmash(
+        "gather", db, db, "--md5", "16869d2c8a1", linear_gather, prefetch_gather
+    )
 
-    assert '340.9 kbp    100.0%  100.0%    GCA_001593925' in str(runtmp)
+    assert "340.9 kbp    100.0%  100.0%    GCA_001593925" in str(runtmp)
 
 
 def test_gather_query_db_md5_ambiguous(runtmp, linear_gather, prefetch_gather):
     c = runtmp
     # what if we give an ambiguous md5 prefix?
-    db = utils.get_test_data('prot/protein.sbt.zip')
+    db = utils.get_test_data("prot/protein.sbt.zip")
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('gather', db, db, '--md5', '1', linear_gather,
-                       prefetch_gather)
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash("gather", db, db, "--md5", "1", linear_gather, prefetch_gather)
 
     err = c.last_result.err
     assert "Error! Multiple signatures start with md5 '1'" in err
@@ -1388,38 +1591,46 @@ def test_gather_query_db_md5_ambiguous(runtmp, linear_gather, prefetch_gather):
 
 def test_gather_lca_db(runtmp, linear_gather, prefetch_gather):
     # can we do a 'sourmash gather' on an LCA database?
-    query = utils.get_test_data('47+63.fa.sig')
-    lca_db = utils.get_test_data('lca/47+63.lca.json')
+    query = utils.get_test_data("47+63.fa.sig")
+    lca_db = utils.get_test_data("lca/47+63.lca.json")
 
-    runtmp.sourmash('gather', query, lca_db, linear_gather, prefetch_gather)
+    runtmp.sourmash("gather", query, lca_db, linear_gather, prefetch_gather)
     print(runtmp)
     out = runtmp.last_result.out
 
-    assert 'NC_009665.1 Shewanella baltica OS185' in out
-    assert 'WARNING: final scaled was 10000, vs query scaled of 1000' in out
+    assert "NC_009665.1 Shewanella baltica OS185" in out
+    assert "WARNING: final scaled was 10000, vs query scaled of 1000" in out
 
 
 def test_gather_csv_output_filename_bug(runtmp, linear_gather, prefetch_gather):
     c = runtmp
 
     # check a bug where the database filename in the output CSV was incorrect
-    query = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    lca_db_1 = utils.get_test_data('lca/delmont-1.lca.json')
-    lca_db_2 = utils.get_test_data('lca/delmont-2.lca.json')
-
-    c.run_sourmash('gather', query, lca_db_1, lca_db_2, '-o', 'out.csv',
-                   linear_gather, prefetch_gather)
-    with open(c.output('out.csv'), 'rt') as fp:
+    query = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    lca_db_1 = utils.get_test_data("lca/delmont-1.lca.json")
+    lca_db_2 = utils.get_test_data("lca/delmont-2.lca.json")
+
+    c.run_sourmash(
+        "gather",
+        query,
+        lca_db_1,
+        lca_db_2,
+        "-o",
+        "out.csv",
+        linear_gather,
+        prefetch_gather,
+    )
+    with open(c.output("out.csv")) as fp:
         r = csv.DictReader(fp)
         row = next(r)
-        assert row['filename'] == lca_db_1
+        assert row["filename"] == lca_db_1
 
 
 def test_compare_no_such_file(runtmp):
     # 'compare' fails on nonexistent files
     c = runtmp
-    with pytest.raises(SourmashCommandFailed) as e:
-        c.run_sourmash('compare', 'nosuchfile.sig')
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash("compare", "nosuchfile.sig")
 
     assert "Error while reading signatures from 'nosuchfile.sig'." in c.last_result.err
 
@@ -1427,8 +1638,8 @@ def test_compare_no_such_file(runtmp):
 def test_compare_no_such_file_force(runtmp):
     # can still run compare on nonexistent with -f
     c = runtmp
-    with pytest.raises(SourmashCommandFailed) as e:
-        c.run_sourmash('compare', 'nosuchfile.sig', '-f')
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash("compare", "nosuchfile.sig", "-f")
 
     print(c.last_result.err)
     assert "Error while reading signatures from 'nosuchfile.sig'."
@@ -1437,191 +1648,197 @@ def test_compare_no_such_file_force(runtmp):
 def test_compare_no_matching_sigs(runtmp):
     # compare fails when no sketches found with desired ksize
     c = runtmp
-    query = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
+    query = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.last_result.status, c.last_result.out, c.last_result.err = \
-            c.run_sourmash('compare', '-k', '100', query, fail_ok=True)
+    with pytest.raises(SourmashCommandFailed):
+        c.last_result.status, c.last_result.out, c.last_result.err = c.run_sourmash(
+            "compare", "-k", "100", query, fail_ok=True
+        )
 
     print(c.last_result.out)
     print(c.last_result.err)
     assert c.last_result.status
-    assert 'warning: no signatures loaded at given ksize/molecule type' in c.last_result.err
-    assert 'no signatures found! exiting.' in c.last_result.err
+    assert (
+        "warning: no signatures loaded at given ksize/molecule type"
+        in c.last_result.err
+    )
+    assert "no signatures found! exiting." in c.last_result.err
 
 
 def test_compare_deduce_molecule(runtmp):
     # deduce DNA vs protein from query, if it is unique
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=10,num=500', testdata1,testdata2)
+    runtmp.sourmash("sketch", "translate", "-p", "k=10,num=500", testdata1, testdata2)
 
-    runtmp.sourmash('compare', 'short.fa.sig', 'short2.fa.sig')
+    runtmp.sourmash("compare", "short.fa.sig", "short2.fa.sig")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert 'min similarity in matrix: 0.91' in runtmp.last_result.out
+    assert "min similarity in matrix: 0.91" in runtmp.last_result.out
 
 
 def test_compare_choose_molecule_dna(runtmp):
     # choose molecule type
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('compute', '-k', '30', '--dna', '--protein', testdata1, testdata2)
+    runtmp.sourmash("compute", "-k", "30", "--dna", "--protein", testdata1, testdata2)
 
-    runtmp.sourmash('compare', '--dna', 'short.fa.sig', 'short2.fa.sig')
+    runtmp.sourmash("compare", "--dna", "short.fa.sig", "short2.fa.sig")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert 'min similarity in matrix: 0.938' in runtmp.last_result.out
+    assert "min similarity in matrix: 0.938" in runtmp.last_result.out
 
 
 def test_compare_choose_molecule_protein(runtmp):
     # choose molecule type
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('compute', '-k', '30', '--dna', '--protein', testdata1, testdata2)
+    runtmp.sourmash("compute", "-k", "30", "--dna", "--protein", testdata1, testdata2)
 
-    runtmp.sourmash('compare', '--protein', 'short.fa.sig', 'short2.fa.sig')
+    runtmp.sourmash("compare", "--protein", "short.fa.sig", "short2.fa.sig")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert 'min similarity in matrix: 0.91' in runtmp.last_result.out
+    assert "min similarity in matrix: 0.91" in runtmp.last_result.out
 
 
 def test_compare_no_choose_molecule_fail(runtmp):
     # choose molecule type
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'dna', '-p', 'k=30,num=500',testdata1)
+    runtmp.sourmash("sketch", "dna", "-p", "k=30,num=500", testdata1)
 
-    runtmp.sourmash('sketch', 'protein', '-p', 'k=30,num=500', testdata2)
+    runtmp.sourmash("sketch", "protein", "-p", "k=30,num=500", testdata2)
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('compare', 'short.fa.sig', 'short2.fa.sig')
+        runtmp.sourmash("compare", "short.fa.sig", "short2.fa.sig")
 
-    assert 'multiple molecule types loaded; please specify' in runtmp.last_result.err
+    assert "multiple molecule types loaded; please specify" in runtmp.last_result.err
     assert runtmp.last_result.status != 0
 
 
 def test_compare_deduce_ksize(runtmp):
     # deduce ksize, if it is unique
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'dna', '-p', 'k=29,num=500', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "k=29,num=500", testdata1, testdata2)
 
-    runtmp.sourmash('compare', 'short.fa.sig', 'short2.fa.sig')
+    runtmp.sourmash("compare", "short.fa.sig", "short2.fa.sig")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert 'min similarity in matrix: 0.938' in runtmp.last_result.out
+    assert "min similarity in matrix: 0.938" in runtmp.last_result.out
 
 
 def test_search_deduce_molecule(runtmp):
     # deduce DNA vs protein from query, if it is unique
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=10,num=500', testdata1, testdata2)
+    runtmp.sourmash("sketch", "translate", "-p", "k=10,num=500", testdata1, testdata2)
 
-    runtmp.sourmash('search', 'short.fa.sig', 'short2.fa.sig')
+    runtmp.sourmash("search", "short.fa.sig", "short2.fa.sig")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '1 matches' in runtmp.last_result.out
-    assert '(k=10, protein)' in runtmp.last_result.err
+    assert "1 matches" in runtmp.last_result.out
+    assert "(k=10, protein)" in runtmp.last_result.err
 
 
 def test_search_deduce_ksize(runtmp):
     # deduce ksize from query, if it is unique
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=23,num=500', testdata1, testdata2)
+    runtmp.sourmash("sketch", "translate", "-p", "k=23,num=500", testdata1, testdata2)
 
-    runtmp.sourmash('search', 'short.fa.sig', 'short2.fa.sig')
+    runtmp.sourmash("search", "short.fa.sig", "short2.fa.sig")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '1 matches' in runtmp.last_result.out
-    assert 'k=23' in runtmp.last_result.err
+    assert "1 matches" in runtmp.last_result.out
+    assert "k=23" in runtmp.last_result.err
 
 
 def test_do_sourmash_index_multik_fail(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=31,num=500', testdata1)
+    runtmp.sourmash("sketch", "translate", "-p", "k=31,num=500", testdata1)
 
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=32,num=500', testdata2)
+    runtmp.sourmash("sketch", "translate", "-p", "k=32,num=500", testdata2)
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('index', 'zzz', 'short.fa.sig',  'short2.fa.sig')
+        runtmp.sourmash("index", "zzz", "short.fa.sig", "short2.fa.sig")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
     assert runtmp.last_result.status == -1
 
 
 def test_do_sourmash_index_multimol_fail(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'translate', testdata1)
+    runtmp.sourmash("sketch", "translate", testdata1)
 
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=30,num=500', testdata2)
+    runtmp.sourmash("sketch", "translate", "-p", "k=30,num=500", testdata2)
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('index', 'zzz', 'short.fa.sig', 'short2.fa.sig')
+        runtmp.sourmash("index", "zzz", "short.fa.sig", "short2.fa.sig")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
     assert runtmp.last_result.status == -1
 
 
 def test_do_sourmash_index_multinum_fail(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=31,num=500', testdata1)
+    runtmp.sourmash("sketch", "translate", "-p", "k=31,num=500", testdata1)
 
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=31,num=1000', testdata2)
+    runtmp.sourmash("sketch", "translate", "-p", "k=31,num=1000", testdata2)
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('index', 'zzz', 'short.fa.sig', 'short2.fa.sig')
+        runtmp.sourmash("index", "zzz", "short.fa.sig", "short2.fa.sig")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
     assert runtmp.last_result.status == -1
-    assert 'trying to build an SBT with incompatible signatures.' in runtmp.last_result.err
+    assert (
+        "trying to build an SBT with incompatible signatures." in runtmp.last_result.err
+    )
 
 
 def test_do_sourmash_index_multiscaled_fail(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'dna', '-p', 'scaled=10', testdata1)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=10", testdata1)
 
-    runtmp.sourmash('sketch', 'dna', '-p', 'scaled=1', testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=1", testdata2)
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('index', '-k', '31', 'zzz', 'short.fa.sig', 'short2.fa.sig')
+        runtmp.sourmash("index", "-k", "31", "zzz", "short.fa.sig", "short2.fa.sig")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
     assert runtmp.last_result.status == -1
-    assert 'trying to build an SBT with incompatible signatures.' in runtmp.last_result.err
+    assert (
+        "trying to build an SBT with incompatible signatures." in runtmp.last_result.err
+    )
 
 
 @utils.in_tempdir
 def test_do_sourmash_index_multiscaled_rescale(c):
     # test sourmash index --scaled
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    c.run_sourmash('sketch', 'dna', '-p', 'scaled=10', testdata1)
-    c.run_sourmash('sketch', 'dna', '-p', 'scaled=1', testdata2)
+    c.run_sourmash("sketch", "dna", "-p", "scaled=10", testdata1)
+    c.run_sourmash("sketch", "dna", "-p", "scaled=1", testdata2)
 
-    c.run_sourmash('index', 'zzz',
-                   'short.fa.sig',
-                   'short2.fa.sig',
-                   '-k', '31',
-                   '--scaled', '10')
+    c.run_sourmash(
+        "index", "zzz", "short.fa.sig", "short2.fa.sig", "-k", "31", "--scaled", "10"
+    )
 
     print(c)
     assert c.last_result.status == 0
@@ -1630,190 +1847,202 @@ def test_do_sourmash_index_multiscaled_rescale(c):
 @utils.in_tempdir
 def test_do_sourmash_index_multiscaled_rescale_fail(c):
     # test sourmash index --scaled with invalid rescaling (10 -> 5)
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    c.run_sourmash('sketch', 'dna', '-p', 'scaled=10', testdata1)
-    c.run_sourmash('sketch', 'dna', '-p', 'scaled=1', testdata2)
+    c.run_sourmash("sketch", "dna", "-p", "scaled=10", testdata1)
+    c.run_sourmash("sketch", "dna", "-p", "scaled=1", testdata2)
     # this should fail: cannot go from a scaled value of 10 to 5
 
     with pytest.raises(SourmashCommandFailed) as e:
-        c.run_sourmash('index', 'zzz',
-                       'short.fa.sig',
-                       'short2.fa.sig',
-                       '-k', '31',
-                       '--scaled', '5')
+        c.run_sourmash(
+            "index", "zzz", "short.fa.sig", "short2.fa.sig", "-k", "31", "--scaled", "5"
+        )
 
     print(e.value)
     assert c.last_result.status == -1
-    assert 'new scaled 5 is lower than current sample scaled 10' in c.last_result.err
+    assert "new scaled 5 is lower than current sample scaled 10" in c.last_result.err
 
 
 def test_do_sourmash_sbt_search_output(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'dna', '-p', 'k=31,num=500', testdata1,testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,num=500", testdata1, testdata2)
 
-    runtmp.sourmash('index', 'zzz', 'short.fa.sig', 'short2.fa.sig', '-k', '31')
+    runtmp.sourmash("index", "zzz", "short.fa.sig", "short2.fa.sig", "-k", "31")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('search', 'short.fa.sig', 'zzz', '-o', 'foo')
+    runtmp.sourmash("search", "short.fa.sig", "zzz", "-o", "foo")
 
-    output = Path(runtmp.output('foo')).read_text()
+    output = Path(runtmp.output("foo")).read_text()
     print(output)
-    assert 'e26a306d26512' in output
-    assert '914591cd1130aa915' in output
+    assert "e26a306d26512" in output
+    assert "914591cd1130aa915" in output
 
 
 # check against a bug in sbt search triggered by incorrect max Jaccard
 # calculation.
 def test_do_sourmash_sbt_search_check_bug(runtmp):
     # mins: 431
-    testdata1 = utils.get_test_data('sbt-search-bug/nano.sig')
+    testdata1 = utils.get_test_data("sbt-search-bug/nano.sig")
 
     # mins: 6264
-    testdata2 = utils.get_test_data('sbt-search-bug/bacteroides.sig')
+    testdata2 = utils.get_test_data("sbt-search-bug/bacteroides.sig")
 
-    runtmp.sourmash('index', 'zzz', testdata1, testdata2, '-k', '31')
+    runtmp.sourmash("index", "zzz", testdata1, testdata2, "-k", "31")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('search', testdata1, 'zzz')
+    runtmp.sourmash("search", testdata1, "zzz")
 
-    assert '1 matches' in runtmp.last_result.out
+    assert "1 matches" in runtmp.last_result.out
 
-    tree = load_sbt_index(runtmp.output('zzz.sbt.zip'))
-    assert tree._nodes[0].metadata['min_n_below'] == 431
+    tree = load_sbt_index(runtmp.output("zzz.sbt.zip"))
+    assert tree._nodes[0].metadata["min_n_below"] == 431
 
 
 def test_do_sourmash_sbt_search_empty_sig(runtmp):
     # mins: 431
-    testdata1 = utils.get_test_data('sbt-search-bug/nano.sig')
+    testdata1 = utils.get_test_data("sbt-search-bug/nano.sig")
 
     # mins: 0
-    testdata2 = utils.get_test_data('sbt-search-bug/empty.sig')
+    testdata2 = utils.get_test_data("sbt-search-bug/empty.sig")
 
-    runtmp.sourmash('index', 'zzz', testdata1, testdata2, '-k', '31')
+    runtmp.sourmash("index", "zzz", testdata1, testdata2, "-k", "31")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('search', testdata1, 'zzz')
+    runtmp.sourmash("search", testdata1, "zzz")
 
-    assert '1 matches' in runtmp.last_result.out
+    assert "1 matches" in runtmp.last_result.out
 
-    tree = load_sbt_index(runtmp.output('zzz.sbt.zip'))
-    assert tree._nodes[0].metadata['min_n_below'] == 1
+    tree = load_sbt_index(runtmp.output("zzz.sbt.zip"))
+    assert tree._nodes[0].metadata["min_n_below"] == 1
 
 
 def test_do_sourmash_sbt_move_and_search_output(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'dna', '-p', 'k=31,num=500', testdata1,testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,num=500", testdata1, testdata2)
 
-    runtmp.sourmash('index', 'zzz.sbt.json', 'short.fa.sig', 'short2.fa.sig', '-k', '31')
+    runtmp.sourmash(
+        "index", "zzz.sbt.json", "short.fa.sig", "short2.fa.sig", "-k", "31"
+    )
 
-    assert os.path.exists(runtmp.output('zzz.sbt.json'))
+    assert os.path.exists(runtmp.output("zzz.sbt.json"))
 
     print(runtmp.last_result.out)
 
-    with open(runtmp.output('zzz.sbt.json')) as fp:
+    with open(runtmp.output("zzz.sbt.json")) as fp:
         d = json.load(fp)
-        assert d['storage']['args']['path'] == '.sbt.zzz'
+        assert d["storage"]["args"]["path"] == ".sbt.zzz"
 
-    newpath = runtmp.output('subdir')
+    newpath = runtmp.output("subdir")
     os.mkdir(newpath)
 
     # move both JSON file and subdirectory.
-    shutil.move(runtmp.output('zzz.sbt.json'), newpath)
-    shutil.move(runtmp.output('.sbt.zzz'), newpath)
+    shutil.move(runtmp.output("zzz.sbt.json"), newpath)
+    shutil.move(runtmp.output(".sbt.zzz"), newpath)
 
-    status, out, err = utils.runscript('sourmash',
-                                        ['search', '../short.fa.sig',
-                                        'zzz.sbt.json', '-o', 'foo'],
-                                        in_directory=newpath)
+    status, out, err = utils.runscript(
+        "sourmash",
+        ["search", "../short.fa.sig", "zzz.sbt.json", "-o", "foo"],
+        in_directory=newpath,
+    )
 
-    output = Path(os.path.join(newpath, 'foo')).read_text()
+    output = Path(os.path.join(newpath, "foo")).read_text()
     print(output)
-    assert '914591cd1130aa91' in output
-    assert 'e26a306d2651' in output
+    assert "914591cd1130aa91" in output
+    assert "e26a306d2651" in output
 
 
 def test_search_deduce_ksize_and_select_appropriate(runtmp):
     # deduce ksize from query and select correct signature from DB
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=24,num=500', testdata1)
+    runtmp.sourmash("sketch", "translate", "-p", "k=24,num=500", testdata1)
 
     # The DB contains signatres for multiple ksizes
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=23,num=500', '-p', 'k=24,num=500', testdata2)
+    runtmp.sourmash(
+        "sketch", "translate", "-p", "k=23,num=500", "-p", "k=24,num=500", testdata2
+    )
 
-    runtmp.sourmash('search', 'short.fa.sig', 'short2.fa.sig')
+    runtmp.sourmash("search", "short.fa.sig", "short2.fa.sig")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '1 matches' in runtmp.last_result.out
-    assert 'k=24' in runtmp.last_result.err
+    assert "1 matches" in runtmp.last_result.out
+    assert "k=24" in runtmp.last_result.err
 
 
 def test_search_deduce_ksize_not_unique(runtmp):
     # deduce ksize from query, fail because it is not unique
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=23,num=500', '-p', 'k=25,num=500', testdata1, testdata2)
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+
+    runtmp.sourmash(
+        "sketch",
+        "translate",
+        "-p",
+        "k=23,num=500",
+        "-p",
+        "k=25,num=500",
+        testdata1,
+        testdata2,
+    )
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('search', 'short.fa.sig', 'short2.fa.sig')
+        runtmp.sourmash("search", "short.fa.sig", "short2.fa.sig")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
     assert runtmp.last_result.status == -1
-    assert '2 signatures matching ksize' in runtmp.last_result.err
+    assert "2 signatures matching ksize" in runtmp.last_result.err
 
 
 @utils.in_tempdir
 def test_search_deduce_ksize_no_match(c):
     # no matching sigs in search sig list
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    c.run_sourmash('sketch', 'translate', '-p', 'k=23,num=500', testdata1)
-    c.run_sourmash('sketch', 'translate', '-p', 'k=25,num=500', testdata2)
+    c.run_sourmash("sketch", "translate", "-p", "k=23,num=500", testdata1)
+    c.run_sourmash("sketch", "translate", "-p", "k=25,num=500", testdata2)
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('search', 'short.fa.sig', 'short2.fa.sig')
+        c.run_sourmash("search", "short.fa.sig", "short2.fa.sig")
     assert "no compatible signatures found in 'short2.fa.sig'" in str(exc.value)
 
 
 def test_search_deduce_ksize_vs_user_specified(runtmp):
     # user specified ksize is not available
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=23,num=500', testdata1, testdata2)
+    runtmp.sourmash("sketch", "translate", "-p", "k=23,num=500", testdata1, testdata2)
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('search', '-k', '24', 'short.fa.sig', 'short2.fa.sig')
+        runtmp.sourmash("search", "-k", "24", "short.fa.sig", "short2.fa.sig")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
     assert runtmp.last_result.status == -1
-    assert '0 signatures matching ksize' in runtmp.last_result.err
+    assert "0 signatures matching ksize" in runtmp.last_result.err
 
 
 def test_search_containment(runtmp):
     # search with --containment in signatures
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'dna', '-p', 'scaled=1', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=1", testdata1, testdata2)
 
-    runtmp.sourmash('search', 'short.fa.sig', 'short2.fa.sig', '--containment')
+    runtmp.sourmash("search", "short.fa.sig", "short2.fa.sig", "--containment")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '1 matches' in runtmp.last_result.out
-    assert '95.6%' in runtmp.last_result.out
+    assert "1 matches" in runtmp.last_result.out
+    assert "95.6%" in runtmp.last_result.out
 
 
 def test_search_containment_abund(runtmp):
@@ -1830,28 +2059,34 @@ def test_search_containment_abund(runtmp):
     mh2.add_many((1, 5))
 
     # build signatures
-    x = sourmash.SourmashSignature(mh1, name='a')
-    y = sourmash.SourmashSignature(mh2, name='b')
+    x = sourmash.SourmashSignature(mh1, name="a")
+    y = sourmash.SourmashSignature(mh2, name="b")
 
     # save!
-    with open(runtmp.output('a.sig'), 'wt') as fp:
+    with open(runtmp.output("a.sig"), "w") as fp:
         sourmash.save_signatures([x], fp)
-    with open(runtmp.output('b.sig'), 'wt') as fp:
+    with open(runtmp.output("b.sig"), "w") as fp:
         sourmash.save_signatures([y], fp)
 
     # run sourmash search --containment
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.sourmash('search', 'a.sig', 'b.sig', '-o', 'xxx.csv',
-                        '--containment')
+        runtmp.sourmash("search", "a.sig", "b.sig", "-o", "xxx.csv", "--containment")
 
-    assert "ERROR: cannot do containment searches on an abund signature; maybe specify --ignore-abundance?" in str(exc)
+    assert (
+        "ERROR: cannot do containment searches on an abund signature; maybe specify --ignore-abundance?"
+        in str(exc)
+    )
 
     # run sourmash search --max-containment
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.sourmash('search', 'a.sig', 'b.sig', '-o', 'xxx.csv',
-                        '--max-containment')
+        runtmp.sourmash(
+            "search", "a.sig", "b.sig", "-o", "xxx.csv", "--max-containment"
+        )
 
-    assert "ERROR: cannot do containment searches on an abund signature; maybe specify --ignore-abundance?" in str(exc)
+    assert (
+        "ERROR: cannot do containment searches on an abund signature; maybe specify --ignore-abundance?"
+        in str(exc)
+    )
 
 
 def test_search_containment_abund_ignore(runtmp):
@@ -1868,25 +2103,32 @@ def test_search_containment_abund_ignore(runtmp):
     mh2.add_many((1, 5))
 
     # build signatures
-    x = sourmash.SourmashSignature(mh1, name='a')
-    y = sourmash.SourmashSignature(mh2, name='b')
+    x = sourmash.SourmashSignature(mh1, name="a")
+    y = sourmash.SourmashSignature(mh2, name="b")
 
     # save!
-    with open(runtmp.output('a.sig'), 'wt') as fp:
+    with open(runtmp.output("a.sig"), "w") as fp:
         sourmash.save_signatures([x], fp)
-    with open(runtmp.output('b.sig'), 'wt') as fp:
+    with open(runtmp.output("b.sig"), "w") as fp:
         sourmash.save_signatures([y], fp)
 
     # run sourmash search
-    runtmp.sourmash('search', 'a.sig', 'b.sig', '-o', 'xxx.csv',
-                    '--containment', '--ignore-abundance')
+    runtmp.sourmash(
+        "search",
+        "a.sig",
+        "b.sig",
+        "-o",
+        "xxx.csv",
+        "--containment",
+        "--ignore-abundance",
+    )
 
     # check results
-    with open(runtmp.output('xxx.csv'), 'rt') as fp:
+    with open(runtmp.output("xxx.csv")) as fp:
         r = csv.DictReader(fp)
         row = next(r)
-        similarity = row['similarity']
-        print(f'search output: similarity is {similarity}')
+        similarity = row["similarity"]
+        print(f"search output: similarity is {similarity}")
     print(mh1.contained_by(mh2))
 
     assert float(similarity) == mh1.contained_by(mh2)
@@ -1895,150 +2137,154 @@ def test_search_containment_abund_ignore(runtmp):
 
 def test_search_containment_sbt(runtmp):
     # search with --containment in an SBT
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'dna', '-p', 'scaled=1', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=1", testdata1, testdata2)
 
-    runtmp.sourmash('index', '-k', '31', 'zzz', 'short2.fa.sig')
+    runtmp.sourmash("index", "-k", "31", "zzz", "short2.fa.sig")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('search', 'short.fa.sig', 'zzz', '--containment')
+    runtmp.sourmash("search", "short.fa.sig", "zzz", "--containment")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '1 matches' in runtmp.last_result.out
-    assert '95.6%' in runtmp.last_result.out
+    assert "1 matches" in runtmp.last_result.out
+    assert "95.6%" in runtmp.last_result.out
 
 
 def test_search_containment_s10(runtmp):
     # check --containment for s10/s10-small
-    q1 = utils.get_test_data('scaled/genome-s10.fa.gz.sig')
-    q2 = utils.get_test_data('scaled/genome-s10-small.fa.gz.sig')
+    q1 = utils.get_test_data("scaled/genome-s10.fa.gz.sig")
+    q2 = utils.get_test_data("scaled/genome-s10-small.fa.gz.sig")
 
-    runtmp.sourmash('search', q1, q2, '--containment')
+    runtmp.sourmash("search", q1, q2, "--containment")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '1 matches' in runtmp.last_result.out
-    assert '16.7%' in runtmp.last_result.out
+    assert "1 matches" in runtmp.last_result.out
+    assert "16.7%" in runtmp.last_result.out
 
 
 def test_search_containment_s10_no_max(run):
     # check --containment for s10/s10-small
-    q1 = utils.get_test_data('scaled/genome-s10.fa.gz.sig')
-    q2 = utils.get_test_data('scaled/genome-s10-small.fa.gz.sig')
+    q1 = utils.get_test_data("scaled/genome-s10.fa.gz.sig")
+    q2 = utils.get_test_data("scaled/genome-s10-small.fa.gz.sig")
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        run.run_sourmash('search', q1, q2, '--containment',
-                       '--max-containment')
+    with pytest.raises(SourmashCommandFailed):
+        run.run_sourmash("search", q1, q2, "--containment", "--max-containment")
 
     print(run.last_result.out)
     print(run.last_result.err)
-    assert "ERROR: cannot specify both --containment and --max-containment!" in run.last_result.err
+    assert (
+        "ERROR: cannot specify both --containment and --max-containment!"
+        in run.last_result.err
+    )
 
 
 def test_search_max_containment_s10_pairwise(runtmp):
     # check --max-containment for s10/s10-small
-    q1 = utils.get_test_data('scaled/genome-s10.fa.gz.sig')
-    q2 = utils.get_test_data('scaled/genome-s10-small.fa.gz.sig')
+    q1 = utils.get_test_data("scaled/genome-s10.fa.gz.sig")
+    q2 = utils.get_test_data("scaled/genome-s10-small.fa.gz.sig")
 
-    runtmp.sourmash('search', q1, q2,'--max-containment')
+    runtmp.sourmash("search", q1, q2, "--max-containment")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '1 matches' in runtmp.last_result.out
-    assert '100.0%' in runtmp.last_result.out
+    assert "1 matches" in runtmp.last_result.out
+    assert "100.0%" in runtmp.last_result.out
 
 
 def test_search_containment_s10_siglist(runtmp):
     # check --containment for s10/s10-small
-    q1 = utils.get_test_data('scaled/genome-s10.fa.gz.sig')
-    q2 = utils.get_test_data('scaled/*.sig')
+    q1 = utils.get_test_data("scaled/genome-s10.fa.gz.sig")
+    q2 = utils.get_test_data("scaled/*.sig")
     q2 = glob.glob(q2)
 
-    runtmp.sourmash('search', q1, *q2, '--containment')
+    runtmp.sourmash("search", q1, *q2, "--containment")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '3 matches' in runtmp.last_result.out
-    assert ' 16.7%       ../genome-s10-small.fa.gz' in runtmp.last_result.out
-    assert '100.0%       ../genome-s10.fa.gz' in runtmp.last_result.out
-    assert '100.0%       ../genome-s10+s11.fa.gz' in runtmp.last_result.out
+    assert "3 matches" in runtmp.last_result.out
+    assert " 16.7%       ../genome-s10-small.fa.gz" in runtmp.last_result.out
+    assert "100.0%       ../genome-s10.fa.gz" in runtmp.last_result.out
+    assert "100.0%       ../genome-s10+s11.fa.gz" in runtmp.last_result.out
 
 
 def test_search_max_containment_s10_siglist(runtmp):
     # check --max-containment for s10/s10-small
-    q1 = utils.get_test_data('scaled/genome-s10.fa.gz.sig')
-    q2 = utils.get_test_data('scaled/*.sig')
+    q1 = utils.get_test_data("scaled/genome-s10.fa.gz.sig")
+    q2 = utils.get_test_data("scaled/*.sig")
     q2 = glob.glob(q2)
 
-    runtmp.sourmash('search', q1, *q2, '--max-containment')
+    runtmp.sourmash("search", q1, *q2, "--max-containment")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '3 matches' in runtmp.last_result.out
-    assert '100.0%       ../genome-s10-small.fa.gz' in runtmp.last_result.out
-    assert '100.0%       ../genome-s10.fa.gz' in runtmp.last_result.out
-    assert '100.0%       ../genome-s10+s11.fa.gz' in runtmp.last_result.out
+    assert "3 matches" in runtmp.last_result.out
+    assert "100.0%       ../genome-s10-small.fa.gz" in runtmp.last_result.out
+    assert "100.0%       ../genome-s10.fa.gz" in runtmp.last_result.out
+    assert "100.0%       ../genome-s10+s11.fa.gz" in runtmp.last_result.out
 
 
 def test_search_containment_s10_sbt(runtmp):
     # check --containment for s10/s10-small
-    q1 = utils.get_test_data('scaled/genome-s10.fa.gz.sig')
-    q2 = utils.get_test_data('scaled/all.sbt.zip')
+    q1 = utils.get_test_data("scaled/genome-s10.fa.gz.sig")
+    q2 = utils.get_test_data("scaled/all.sbt.zip")
 
-    runtmp.sourmash('search', q1, q2, '--containment')
+    runtmp.sourmash("search", q1, q2, "--containment")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '3 matches' in runtmp.last_result.out
-    assert '100.0%       ../genome-s10+s11.fa.gz' in runtmp.last_result.out
-    assert '100.0%       ../genome-s10.fa.gz' in runtmp.last_result.out
-    assert ' 16.7%       ../genome-s10-small.fa.gz' in runtmp.last_result.out
+    assert "3 matches" in runtmp.last_result.out
+    assert "100.0%       ../genome-s10+s11.fa.gz" in runtmp.last_result.out
+    assert "100.0%       ../genome-s10.fa.gz" in runtmp.last_result.out
+    assert " 16.7%       ../genome-s10-small.fa.gz" in runtmp.last_result.out
 
 
 def test_search_containment_s10_sbt_best_only(runtmp):
     # check --containment for s10/s10-small
-    q1 = utils.get_test_data('scaled/genome-s10.fa.gz.sig')
-    q2 = utils.get_test_data('scaled/all.sbt.zip')
+    q1 = utils.get_test_data("scaled/genome-s10.fa.gz.sig")
+    q2 = utils.get_test_data("scaled/all.sbt.zip")
 
-    runtmp.sourmash('search', q1, q2, '--containment', '--best-only')
+    runtmp.sourmash("search", q1, q2, "--containment", "--best-only")
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert '100.0%       ' in runtmp.last_result.out # there are at least two perfect matches!
+    assert (
+        "100.0%       " in runtmp.last_result.out
+    )  # there are at least two perfect matches!
 
     assert runtmp.last_result.status == 0
 
 
 def test_search_containment_s10_sbt_empty(runtmp):
     # check --containment for s10/s10-small at absurd scaled/empty mh
-    q1 = utils.get_test_data('scaled/genome-s10.fa.gz.sig')
-    q2 = utils.get_test_data('scaled/all.sbt.zip')
+    q1 = utils.get_test_data("scaled/genome-s10.fa.gz.sig")
+    q2 = utils.get_test_data("scaled/all.sbt.zip")
 
-    runtmp.sourmash('search', q1, q2, '--scaled', '1e7', '--containment')
+    runtmp.sourmash("search", q1, q2, "--scaled", "1e7", "--containment")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '0 matches' in runtmp.last_result.out
+    assert "0 matches" in runtmp.last_result.out
 
 
 def test_search_max_containment_s10_sbt(runtmp):
     # check --max-containment for s10/s10-small
-    q1 = utils.get_test_data('scaled/genome-s10.fa.gz.sig')
-    q2 = utils.get_test_data('scaled/all.sbt.zip')
+    q1 = utils.get_test_data("scaled/genome-s10.fa.gz.sig")
+    q2 = utils.get_test_data("scaled/all.sbt.zip")
 
-    runtmp.sourmash('search', q1, q2, '--max-containment')
+    runtmp.sourmash("search", q1, q2, "--max-containment")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '3 matches' in runtmp.last_result.out
-    assert '100.0%       ../genome-s10-small.fa.gz' in runtmp.last_result.out
-    assert '100.0%       ../genome-s10.fa.gz' in runtmp.last_result.out
-    assert '100.0%       ../genome-s10+s11.fa.gz' in runtmp.last_result.out
+    assert "3 matches" in runtmp.last_result.out
+    assert "100.0%       ../genome-s10-small.fa.gz" in runtmp.last_result.out
+    assert "100.0%       ../genome-s10.fa.gz" in runtmp.last_result.out
+    assert "100.0%       ../genome-s10+s11.fa.gz" in runtmp.last_result.out
 
 
 def test_search_max_containment_s10_sbt_best_only(runtmp):
     # check --max-containment for s10/s10-small
-    q1 = utils.get_test_data('scaled/genome-s10.fa.gz.sig')
-    q2 = utils.get_test_data('scaled/all.sbt.zip')
+    q1 = utils.get_test_data("scaled/genome-s10.fa.gz.sig")
+    q2 = utils.get_test_data("scaled/all.sbt.zip")
 
-    runtmp.sourmash('search', q1, q2, '--max-containment', '--best-only')
+    runtmp.sourmash("search", q1, q2, "--max-containment", "--best-only")
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
@@ -2048,120 +2294,142 @@ def test_search_max_containment_s10_sbt_best_only(runtmp):
 
 def test_search_max_containment_s10_sbt_empty(runtmp):
     # check --max-containment for s10/s10-small at absurd scaled/empty mh.
-    q1 = utils.get_test_data('scaled/genome-s10.fa.gz.sig')
-    q2 = utils.get_test_data('scaled/all.sbt.zip')
+    q1 = utils.get_test_data("scaled/genome-s10.fa.gz.sig")
+    q2 = utils.get_test_data("scaled/all.sbt.zip")
 
-    runtmp.sourmash('search', q1, q2, '--scaled', '1e7', '--max-containment')
+    runtmp.sourmash("search", q1, q2, "--scaled", "1e7", "--max-containment")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '0 matches' in runtmp.last_result.out
+    assert "0 matches" in runtmp.last_result.out
 
 
 def test_search_containment_s10_lca(runtmp):
     # check --containment for s10/s10-small
-    q1 = utils.get_test_data('scaled/genome-s10.fa.gz.sig')
-    q2 = utils.get_test_data('scaled/all.lca.json')
+    q1 = utils.get_test_data("scaled/genome-s10.fa.gz.sig")
+    q2 = utils.get_test_data("scaled/all.lca.json")
 
-    runtmp.sourmash('search', q1, q2, '--containment')
+    runtmp.sourmash("search", q1, q2, "--containment")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '3 matches' in runtmp.last_result.out
-    assert '100.0%       455c2f95' in runtmp.last_result.out
-    assert '100.0%       684aa226' in runtmp.last_result.out
-    assert ' 16.7%       7f7835d2' in runtmp.last_result.out
+    assert "3 matches" in runtmp.last_result.out
+    assert "100.0%       455c2f95" in runtmp.last_result.out
+    assert "100.0%       684aa226" in runtmp.last_result.out
+    assert " 16.7%       7f7835d2" in runtmp.last_result.out
 
 
 def test_search_max_containment_s10_lca(runtmp):
     # check --max-containment for s10/s10-small
-    q1 = utils.get_test_data('scaled/genome-s10.fa.gz.sig')
-    q2 = utils.get_test_data('scaled/all.lca.json')
+    q1 = utils.get_test_data("scaled/genome-s10.fa.gz.sig")
+    q2 = utils.get_test_data("scaled/all.lca.json")
 
-    runtmp.sourmash('search', q1, q2, '--max-containment')
+    runtmp.sourmash("search", q1, q2, "--max-containment")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '3 matches' in runtmp.last_result.out
-    assert '100.0%       455c2f95' in runtmp.last_result.out
-    assert '100.0%       684aa226' in runtmp.last_result.out
-    assert '100.0%       7f7835d2' in runtmp.last_result.out
+    assert "3 matches" in runtmp.last_result.out
+    assert "100.0%       455c2f95" in runtmp.last_result.out
+    assert "100.0%       684aa226" in runtmp.last_result.out
+    assert "100.0%       7f7835d2" in runtmp.last_result.out
 
 
 def test_search_gzip(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch','dna','-p','k=31,num=500', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,num=500", testdata1, testdata2)
 
-    data = Path(runtmp.output('short.fa.sig')).read_bytes()
-    with gzip.open(runtmp.output('zzz.gz'), 'wb') as fp:
+    data = Path(runtmp.output("short.fa.sig")).read_bytes()
+    with gzip.open(runtmp.output("zzz.gz"), "wb") as fp:
         fp.write(data)
 
-    data = Path(runtmp.output('short2.fa.sig')).read_bytes()
-    with gzip.open(runtmp.output('yyy.gz'), 'wb') as fp:
+    data = Path(runtmp.output("short2.fa.sig")).read_bytes()
+    with gzip.open(runtmp.output("yyy.gz"), "wb") as fp:
         fp.write(data)
 
-    runtmp.sourmash('search', 'zzz.gz', 'yyy.gz')
+    runtmp.sourmash("search", "zzz.gz", "yyy.gz")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '1 matches' in runtmp.last_result.out
-    assert '93.0%' in runtmp.last_result.out
+    assert "1 matches" in runtmp.last_result.out
+    assert "93.0%" in runtmp.last_result.out
 
 
 def test_search_2(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    testdata3 = utils.get_test_data('short3.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    testdata3 = utils.get_test_data("short3.fa")
 
-    runtmp.sourmash('sketch','dna','-p','k=31,num=500', testdata1, testdata2, testdata3)
+    runtmp.sourmash(
+        "sketch", "dna", "-p", "k=31,num=500", testdata1, testdata2, testdata3
+    )
 
-    runtmp.sourmash('search', 'short.fa.sig', 'short2.fa.sig', 'short3.fa.sig')
+    runtmp.sourmash("search", "short.fa.sig", "short2.fa.sig", "short3.fa.sig")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '2 matches' in runtmp.last_result.out
-    assert '93.0%' in runtmp.last_result.out
-    assert '89.6%' in runtmp.last_result.out
+    assert "2 matches" in runtmp.last_result.out
+    assert "93.0%" in runtmp.last_result.out
+    assert "89.6%" in runtmp.last_result.out
 
 
 def test_search_3(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    testdata3 = utils.get_test_data('short3.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    testdata3 = utils.get_test_data("short3.fa")
 
-    runtmp.sourmash('sketch','dna','-p','k=31,num=500', testdata1, testdata2, testdata3)
+    runtmp.sourmash(
+        "sketch", "dna", "-p", "k=31,num=500", testdata1, testdata2, testdata3
+    )
 
-    runtmp.sourmash('search', '-n', '1', 'short.fa.sig', 'short2.fa.sig', 'short3.fa.sig')
+    runtmp.sourmash(
+        "search", "-n", "1", "short.fa.sig", "short2.fa.sig", "short3.fa.sig"
+    )
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '2 matches above threshold 0.080; showing first 1:' in runtmp.last_result.out
+    assert "2 matches above threshold 0.080; showing first 1:" in runtmp.last_result.out
 
 
 def test_search_4(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    testdata3 = utils.get_test_data('short3.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    testdata3 = utils.get_test_data("short3.fa")
 
-    runtmp.sourmash('sketch','dna','-p','k=31,num=500', testdata1, testdata2, testdata3)
+    runtmp.sourmash(
+        "sketch", "dna", "-p", "k=31,num=500", testdata1, testdata2, testdata3
+    )
 
-    runtmp.sourmash('search', '-n', '0', 'short.fa.sig', 'short2.fa.sig', 'short3.fa.sig')
+    runtmp.sourmash(
+        "search", "-n", "0", "short.fa.sig", "short2.fa.sig", "short3.fa.sig"
+    )
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '2 matches above threshold 0.080:' in runtmp.last_result.out
-    assert 'short2.fa' in runtmp.last_result.out
-    assert 'short3.fa' in runtmp.last_result.out
+    assert "2 matches above threshold 0.080:" in runtmp.last_result.out
+    assert "short2.fa" in runtmp.last_result.out
+    assert "short3.fa" in runtmp.last_result.out
 
 
 def test_search_5_num_results(runtmp):
-    query = utils.get_test_data('gather/combined.sig')
-    against = glob.glob(utils.get_test_data('gather/GCF*.sig'))
+    query = utils.get_test_data("gather/combined.sig")
+    against = glob.glob(utils.get_test_data("gather/GCF*.sig"))
 
-    runtmp.sourmash('search', '-n', '5', query, *against)
+    runtmp.sourmash("search", "-n", "5", query, *against)
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '12 matches above threshold 0.080; showing first 5:' in runtmp.last_result.out
+    assert (
+        "12 matches above threshold 0.080; showing first 5:" in runtmp.last_result.out
+    )
 
 
 def test_index_check_scaled_bounds_negative(runtmp):
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('index', 'zzz', 'short.fa.sig', 'short2.fa.sig', '-k', '31', '--scaled', '-5', '--dna')
+        runtmp.sourmash(
+            "index",
+            "zzz",
+            "short.fa.sig",
+            "short2.fa.sig",
+            "-k",
+            "31",
+            "--scaled",
+            "-5",
+            "--dna",
+        )
 
     print(runtmp.last_result.err)
 
@@ -2170,37 +2438,70 @@ def test_index_check_scaled_bounds_negative(runtmp):
 
 def test_index_check_scaled_bounds_less_than_minimum(runtmp):
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('index', 'zzz', 'short.fa.sig', 'short2.fa.sig', '-k', '31', '--scaled', '50', '--dna')
-
-    assert "WARNING: scaled value should be >= 100. Continuing anyway." in runtmp.last_result.err
+        runtmp.sourmash(
+            "index",
+            "zzz",
+            "short.fa.sig",
+            "short2.fa.sig",
+            "-k",
+            "31",
+            "--scaled",
+            "50",
+            "--dna",
+        )
+
+    assert (
+        "WARNING: scaled value should be >= 100. Continuing anyway."
+        in runtmp.last_result.err
+    )
 
 
 def test_index_check_scaled_bounds_more_than_maximum(runtmp):
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('index', 'zzz', 'short.fa.sig', 'short2.fa.sig', '-k', '31', '--scaled', '1e9', '--dna')
-
-    assert "WARNING: scaled value should be <= 1e6. Continuing anyway." in runtmp.last_result.err
+        runtmp.sourmash(
+            "index",
+            "zzz",
+            "short.fa.sig",
+            "short2.fa.sig",
+            "-k",
+            "31",
+            "--scaled",
+            "1e9",
+            "--dna",
+        )
+
+    assert (
+        "WARNING: scaled value should be <= 1e6. Continuing anyway."
+        in runtmp.last_result.err
+    )
 
 
 @utils.in_tempdir
 def test_index_metagenome_fromfile(c):
     # test index --from-file
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
     # construct a file list
-    with open(c.output('sig.list'), 'wt') as fp:
+    with open(c.output("sig.list"), "w") as fp:
         fp.write("\n".join(testdata_sigs))
 
-    cmd = ['index', 'gcf_all', testdata_sigs[0], '-k', '21',
-           '--from-file', c.output('sig.list')]
+    cmd = [
+        "index",
+        "gcf_all",
+        testdata_sigs[0],
+        "-k",
+        "21",
+        "--from-file",
+        c.output("sig.list"),
+    ]
     c.run_sourmash(*cmd)
 
-    assert os.path.exists(c.output('gcf_all.sbt.zip'))
+    assert os.path.exists(c.output("gcf_all.sbt.zip"))
 
-    cmd = 'search {} gcf_all -k 21'.format(query_sig)
+    cmd = f"search {query_sig} gcf_all -k 21"
     cmd = cmd.split()
     c.run_sourmash(*cmd)
 
@@ -2208,28 +2509,31 @@ def test_index_metagenome_fromfile(c):
     print(out)
     print(c.last_result.err)
 
-    assert ' 33.2%       NC_003198.1 Salmonella enterica subsp. enterica serovar T...' in out
-    assert '12 matches above threshold 0.080; showing first 3:' in out
+    assert (
+        " 33.2%       NC_003198.1 Salmonella enterica subsp. enterica serovar T..."
+        in out
+    )
+    assert "12 matches above threshold 0.080; showing first 3:" in out
+
 
 @utils.in_tempdir
 def test_index_metagenome_fromfile_no_cmdline_sig(c):
     # test index --from-file
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
     # construct a file list
-    with open(c.output('sig.list'), 'wt') as fp:
+    with open(c.output("sig.list"), "w") as fp:
         fp.write("\n".join(testdata_sigs))
 
-    cmd = ['index', 'gcf_all', '-k', '21',
-           '--from-file', c.output('sig.list')]
+    cmd = ["index", "gcf_all", "-k", "21", "--from-file", c.output("sig.list")]
     c.run_sourmash(*cmd)
 
-    assert os.path.exists(c.output('gcf_all.sbt.zip'))
+    assert os.path.exists(c.output("gcf_all.sbt.zip"))
 
-    cmd = 'search {} gcf_all -k 21'.format(query_sig)
+    cmd = f"search {query_sig} gcf_all -k 21"
     cmd = cmd.split()
     c.run_sourmash(*cmd)
 
@@ -2237,81 +2541,98 @@ def test_index_metagenome_fromfile_no_cmdline_sig(c):
     print(out)
     print(c.last_result.err)
 
-    assert ' 33.2%       NC_003198.1 Salmonella enterica subsp. enterica serovar T' in out
-    assert '12 matches above threshold 0.080; showing first 3:' in out
+    assert (
+        " 33.2%       NC_003198.1 Salmonella enterica subsp. enterica serovar T" in out
+    )
+    assert "12 matches above threshold 0.080; showing first 3:" in out
 
 
 def test_search_metagenome(runtmp):
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all']
+    cmd = ["index", "gcf_all"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
 
     runtmp.sourmash(*cmd)
 
-    assert os.path.exists(runtmp.output('gcf_all.sbt.zip'))
+    assert os.path.exists(runtmp.output("gcf_all.sbt.zip"))
 
-    runtmp.sourmash('search', query_sig, 'gcf_all', '-k', '21')
+    runtmp.sourmash("search", query_sig, "gcf_all", "-k", "21")
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert ' 33.2%       NC_003198.1 Salmonella enterica subsp. enterica serovar T' in runtmp.last_result.out
-    assert '12 matches above threshold 0.080; showing first 3:' in runtmp.last_result.out
+    assert (
+        " 33.2%       NC_003198.1 Salmonella enterica subsp. enterica serovar T"
+        in runtmp.last_result.out
+    )
+    assert (
+        "12 matches above threshold 0.080; showing first 3:" in runtmp.last_result.out
+    )
 
 
 def test_search_metagenome_traverse(runtmp):
-    testdata_dir = utils.get_test_data('gather')
+    testdata_dir = utils.get_test_data("gather")
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    runtmp.sourmash('search', query_sig, testdata_dir, '-k', '21')
+    runtmp.sourmash("search", query_sig, testdata_dir, "-k", "21")
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert ' 33.2%       NC_003198.1 Salmonella enterica subsp. enterica serovar T' in runtmp.last_result.out
-    assert '13 matches above threshold 0.080; showing first 3:' in runtmp.last_result.out
+    assert (
+        " 33.2%       NC_003198.1 Salmonella enterica subsp. enterica serovar T"
+        in runtmp.last_result.out
+    )
+    assert (
+        "13 matches above threshold 0.080; showing first 3:" in runtmp.last_result.out
+    )
 
 
 def test_search_metagenome_traverse_check_csv(runtmp):
     # this test confirms that the CSV 'filename' output for signatures loaded
     # via directory traversal properly contains the actual path to the
     # signature file from which the signature was loaded.
-    testdata_dir = utils.get_test_data('gather')
+    testdata_dir = utils.get_test_data("gather")
 
-    query_sig = utils.get_test_data('gather/combined.sig')
-    out_csv = runtmp.output('out.csv')
+    query_sig = utils.get_test_data("gather/combined.sig")
+    out_csv = runtmp.output("out.csv")
 
-    runtmp.sourmash('search', query_sig, testdata_dir, '-k', '21', '-o', out_csv)
+    runtmp.sourmash("search", query_sig, testdata_dir, "-k", "21", "-o", out_csv)
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    with open(out_csv, 'rt') as fp:
+    with open(out_csv) as fp:
         prefix_len = len(testdata_dir)
         r = csv.DictReader(fp)
         for row in r:
             print(row)
-            filename = row['filename']
+            filename = row["filename"]
             assert filename.startswith(testdata_dir), filename
             # should have full path to file sig was loaded from
             assert len(filename) > prefix_len
 
-    assert ' 33.2%       NC_003198.1 Salmonella enterica subsp. enterica serovar T' in runtmp.last_result.out
-    assert '13 matches above threshold 0.080; showing first 3:' in runtmp.last_result.out
+    assert (
+        " 33.2%       NC_003198.1 Salmonella enterica subsp. enterica serovar T"
+        in runtmp.last_result.out
+    )
+    assert (
+        "13 matches above threshold 0.080; showing first 3:" in runtmp.last_result.out
+    )
 
 
 @utils.in_thisdir
 def test_search_incompatible(c):
-    num_sig = utils.get_test_data('num/47.fa.sig')
-    scaled_sig = utils.get_test_data('47.fa.sig')
+    num_sig = utils.get_test_data("num/47.fa.sig")
+    scaled_sig = utils.get_test_data("47.fa.sig")
 
-    with pytest.raises(SourmashCommandFailed) as exc:
+    with pytest.raises(SourmashCommandFailed):
         c.run_sourmash("search", scaled_sig, num_sig, fail_ok=True)
     assert c.last_result.status != 0
     print(c.last_result.out)
@@ -2324,52 +2645,61 @@ def test_search_incompatible(c):
 def test_search_traverse_incompatible(c):
     # build a directory with some signatures in it, search for compatible
     # signatures.
-    searchdir = c.output('searchme')
+    searchdir = c.output("searchme")
     os.mkdir(searchdir)
 
-    num_sig = utils.get_test_data('num/47.fa.sig')
-    scaled_sig = utils.get_test_data('47.fa.sig')
-    shutil.copyfile(num_sig, c.output('searchme/num.sig'))
-    shutil.copyfile(scaled_sig, c.output('searchme/scaled.sig'))
+    num_sig = utils.get_test_data("num/47.fa.sig")
+    scaled_sig = utils.get_test_data("47.fa.sig")
+    shutil.copyfile(num_sig, c.output("searchme/num.sig"))
+    shutil.copyfile(scaled_sig, c.output("searchme/scaled.sig"))
 
-    c.run_sourmash("search", scaled_sig, c.output('searchme'))
-    assert '100.0%       NC_009665.1 Shewanella baltica OS185, complete genome' in c.last_result.out
+    c.run_sourmash("search", scaled_sig, c.output("searchme"))
+    assert (
+        "100.0%       NC_009665.1 Shewanella baltica OS185, complete genome"
+        in c.last_result.out
+    )
 
 
 def test_search_check_scaled_bounds_negative(runtmp):
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
-    testdata_sigs = glob.glob(testdata_glob)
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
+    glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('search', query_sig, 'gcf_all', '-k', '21', '--scaled', '-5')
+        runtmp.sourmash("search", query_sig, "gcf_all", "-k", "21", "--scaled", "-5")
 
     assert "ERROR: scaled value must be positive" in runtmp.last_result.err
 
 
 def test_search_check_scaled_bounds_less_than_minimum(runtmp):
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
-    testdata_sigs = glob.glob(testdata_glob)
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
+    glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('search', query_sig, 'gcf_all', '-k', '21', '--scaled', '50')
+        runtmp.sourmash("search", query_sig, "gcf_all", "-k", "21", "--scaled", "50")
 
-    assert "WARNING: scaled value should be >= 100. Continuing anyway." in runtmp.last_result.err
+    assert (
+        "WARNING: scaled value should be >= 100. Continuing anyway."
+        in runtmp.last_result.err
+    )
 
 
 def test_search_check_scaled_bounds_more_than_maximum(runtmp):
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
-    testdata_sigs = glob.glob(testdata_glob)
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
+    glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('search', query_sig, 'gcf_all', '-k', '21', '--scaled', '1e9')
+        runtmp.sourmash("search", query_sig, "gcf_all", "-k", "21", "--scaled", "1e9")
 
-    assert "WARNING: scaled value should be <= 1e6. Continuing anyway." in runtmp.last_result.err
+    assert (
+        "WARNING: scaled value should be <= 1e6. Continuing anyway."
+        in runtmp.last_result.err
+    )
 
 
 # explanation: you cannot downsample a scaled SBT to match a scaled
@@ -2377,77 +2707,108 @@ def test_search_check_scaled_bounds_more_than_maximum(runtmp):
 # (you *can* downsample a signature to match an SBT.)
 def test_search_metagenome_sbt_downsample_fail(runtmp):
     # test downsample on SBT => failure, with --fail-on-empty-databases
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all']
+    cmd = ["index", "gcf_all"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
 
     runtmp.sourmash(*cmd)
 
-    assert os.path.exists(runtmp.output('gcf_all.sbt.zip'))
+    assert os.path.exists(runtmp.output("gcf_all.sbt.zip"))
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('search', query_sig, 'gcf_all', '-k', '21', '--scaled', '100000')
+        runtmp.sourmash(
+            "search", query_sig, "gcf_all", "-k", "21", "--scaled", "100000"
+        )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
     assert runtmp.last_result.status == -1
     assert "ERROR: cannot use 'gcf_all' for this query." in runtmp.last_result.err
-    assert "search scaled value 100000 is less than database scaled value of 10000" in runtmp.last_result.err
+    assert (
+        "search scaled value 100000 is less than database scaled value of 10000"
+        in runtmp.last_result.err
+    )
 
 
 def test_search_metagenome_sbt_downsample_nofail(runtmp):
     # test downsample on SBT => failure but ok with --no-fail-on-empty-database
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all']
+    cmd = ["index", "gcf_all"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
 
     runtmp.sourmash(*cmd)
 
-    assert os.path.exists(runtmp.output('gcf_all.sbt.zip'))
+    assert os.path.exists(runtmp.output("gcf_all.sbt.zip"))
 
-    runtmp.sourmash('search', query_sig, 'gcf_all', '-k', '21', '--scaled', '100000', '--no-fail-on-empty-database')
+    runtmp.sourmash(
+        "search",
+        query_sig,
+        "gcf_all",
+        "-k",
+        "21",
+        "--scaled",
+        "100000",
+        "--no-fail-on-empty-database",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
     assert runtmp.last_result.status == 0
     assert "ERROR: cannot use 'gcf_all' for this query." in runtmp.last_result.err
-    assert "search scaled value 100000 is less than database scaled value of 10000" in runtmp.last_result.err
+    assert (
+        "search scaled value 100000 is less than database scaled value of 10000"
+        in runtmp.last_result.err
+    )
     assert "0 matches" in runtmp.last_result.out
 
 
 def test_search_metagenome_downsample_containment(runtmp):
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all']
+    cmd = ["index", "gcf_all"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
 
     runtmp.sourmash(*cmd)
 
-    assert os.path.exists(runtmp.output('gcf_all.sbt.zip'))
+    assert os.path.exists(runtmp.output("gcf_all.sbt.zip"))
 
-    runtmp.sourmash('search', query_sig, 'gcf_all', '-k', '21', '--scaled', '100000', '--containment')
+    runtmp.sourmash(
+        "search",
+        query_sig,
+        "gcf_all",
+        "-k",
+        "21",
+        "--scaled",
+        "100000",
+        "--containment",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert ' 32.9%       NC_003198.1 Salmonella enterica subsp. enterica serovar T' in runtmp.last_result.out
-    assert '12 matches above threshold 0.080; showing first 3:' in runtmp.last_result.out
+    assert (
+        " 32.9%       NC_003198.1 Salmonella enterica subsp. enterica serovar T"
+        in runtmp.last_result.out
+    )
+    assert (
+        "12 matches above threshold 0.080; showing first 3:" in runtmp.last_result.out
+    )
 
 
 @utils.in_tempdir
@@ -2455,36 +2816,46 @@ def test_search_metagenome_downsample_index(c):
     # does same search as search_metagenome_downsample_containment but
     # rescales during indexing
 
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
     # downscale during indexing, rather than during search.
-    c.run_sourmash('index', 'gcf_all', *testdata_sigs, '-k', '21',
-                   '--scaled', '100000')
+    c.run_sourmash("index", "gcf_all", *testdata_sigs, "-k", "21", "--scaled", "100000")
 
-    assert os.path.exists(c.output('gcf_all.sbt.zip'))
+    assert os.path.exists(c.output("gcf_all.sbt.zip"))
 
-    c.run_sourmash('search', query_sig, 'gcf_all', '-k', '21',
-                   '--containment')
+    c.run_sourmash("search", query_sig, "gcf_all", "-k", "21", "--containment")
     print(c)
 
-    assert ' 32.9%       NC_003198.1 Salmonella enterica subsp. enterica serovar T' in str(
-        c)
-    assert ' 29.7%       NC_003197.2 Salmonella enterica subsp. enterica serovar T' in str(
-        c)
-    assert '12 matches above threshold 0.080; showing first 3:' in str(c)
+    assert (
+        " 32.9%       NC_003198.1 Salmonella enterica subsp. enterica serovar T"
+        in str(c)
+    )
+    assert (
+        " 29.7%       NC_003197.2 Salmonella enterica subsp. enterica serovar T"
+        in str(c)
+    )
+    assert "12 matches above threshold 0.080; showing first 3:" in str(c)
 
 
 def test_search_with_picklist(runtmp):
     # test 'sourmash search' with picklists
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    metag_sig = utils.get_test_data('gather/combined.sig')
-    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
-
-    runtmp.sourmash('search', metag_sig, *gcf_sigs, '--containment',
-                    '-k', '21', '--picklist', f"{picklist}:md5:md5")
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    metag_sig = utils.get_test_data("gather/combined.sig")
+    picklist = utils.get_test_data("gather/thermotoga-picklist.csv")
+
+    runtmp.sourmash(
+        "search",
+        metag_sig,
+        *gcf_sigs,
+        "--containment",
+        "-k",
+        "21",
+        "--picklist",
+        f"{picklist}:md5:md5",
+    )
 
     err = runtmp.last_result.err
     print(err)
@@ -2502,12 +2873,20 @@ def test_search_with_picklist(runtmp):
 
 def test_search_with_picklist_exclude(runtmp):
     # test 'sourmash search' with picklists
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    metag_sig = utils.get_test_data('gather/combined.sig')
-    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
-
-    runtmp.sourmash('search', metag_sig, *gcf_sigs, '--containment',
-                    '-k', '21', '--picklist', f"{picklist}:md5:md5:exclude")
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    metag_sig = utils.get_test_data("gather/combined.sig")
+    picklist = utils.get_test_data("gather/thermotoga-picklist.csv")
+
+    runtmp.sourmash(
+        "search",
+        metag_sig,
+        *gcf_sigs,
+        "--containment",
+        "-k",
+        "21",
+        "--picklist",
+        f"{picklist}:md5:md5:exclude",
+    )
 
     err = runtmp.last_result.err
     print(err)
@@ -2524,11 +2903,19 @@ def test_search_with_picklist_exclude(runtmp):
 
 def test_search_with_pattern_include(runtmp):
     # test 'sourmash search' with --include-db-pattern
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    metag_sig = utils.get_test_data('gather/combined.sig')
-
-    runtmp.sourmash('search', metag_sig, *gcf_sigs, '--containment',
-                    '-k', '21', '--include', "thermotoga")
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    metag_sig = utils.get_test_data("gather/combined.sig")
+
+    runtmp.sourmash(
+        "search",
+        metag_sig,
+        *gcf_sigs,
+        "--containment",
+        "-k",
+        "21",
+        "--include",
+        "thermotoga",
+    )
 
     err = runtmp.last_result.err
     print(err)
@@ -2543,11 +2930,19 @@ def test_search_with_pattern_include(runtmp):
 
 def test_search_with_pattern_exclude(runtmp):
     # test 'sourmash search' with --exclude-db-pattern
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    metag_sig = utils.get_test_data('gather/combined.sig')
-
-    runtmp.sourmash('search', metag_sig, *gcf_sigs, '--containment',
-                    '-k', '21', '--exclude', "thermotoga")
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    metag_sig = utils.get_test_data("gather/combined.sig")
+
+    runtmp.sourmash(
+        "search",
+        metag_sig,
+        *gcf_sigs,
+        "--containment",
+        "-k",
+        "21",
+        "--exclude",
+        "thermotoga",
+    )
 
     err = runtmp.last_result.err
     print(err)
@@ -2562,13 +2957,12 @@ def test_search_with_pattern_exclude(runtmp):
 
 def test_search_empty_db_fail(runtmp):
     # search should fail on empty db with --fail-on-empty-database
-    query = utils.get_test_data('2.fa.sig')
-    against = utils.get_test_data('47.fa.sig')
-    against2 = utils.get_test_data('lca/47+63.lca.json')
+    query = utils.get_test_data("2.fa.sig")
+    against = utils.get_test_data("47.fa.sig")
+    against2 = utils.get_test_data("lca/47+63.lca.json")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('search', query, against, against2, '-k', '51')
-
+        runtmp.sourmash("search", query, against, against2, "-k", "51")
 
     err = runtmp.last_result.err
     assert "no compatible signatures found in " in err
@@ -2576,12 +2970,13 @@ def test_search_empty_db_fail(runtmp):
 
 def test_search_empty_db_nofail(runtmp):
     # search should not fail on empty db with --no-fail-on-empty-database
-    query = utils.get_test_data('2.fa.sig')
-    against = utils.get_test_data('47.fa.sig')
-    against2 = utils.get_test_data('lca/47+63.lca.json')
+    query = utils.get_test_data("2.fa.sig")
+    against = utils.get_test_data("47.fa.sig")
+    against2 = utils.get_test_data("lca/47+63.lca.json")
 
-    runtmp.sourmash('search', query, against, against2, '-k', '51',
-                    '--no-fail-on-empty-data')
+    runtmp.sourmash(
+        "search", query, against, against2, "-k", "51", "--no-fail-on-empty-data"
+    )
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -2589,206 +2984,239 @@ def test_search_empty_db_nofail(runtmp):
     print(err)
 
     assert "no compatible signatures found in " in err
-    assert "ksize on this database is 31; this is different from requested ksize of 51" in err
+    assert (
+        "ksize on this database is 31; this is different from requested ksize of 51"
+        in err
+    )
     assert "loaded 50 total signatures from 2 locations" in err
     assert "after selecting signatures compatible with search, 0 remain." in err
 
 
 def test_mash_csv_to_sig(runtmp):
-    testdata1 = utils.get_test_data('short.fa.msh.dump')
-    testdata2 = utils.get_test_data('short.fa')
+    testdata1 = utils.get_test_data("short.fa.msh.dump")
+    testdata2 = utils.get_test_data("short.fa")
 
-    runtmp.sourmash('import_csv', testdata1, '-o', 'xxx.sig')
+    runtmp.sourmash("import_csv", testdata1, "-o", "xxx.sig")
 
-    runtmp.sourmash('sketch', 'dna', '-p','k=31,num=970',testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,num=970", testdata2)
 
-    runtmp.sourmash('search', '-k', '31', 'short.fa.sig', 'xxx.sig')
+    runtmp.sourmash("search", "-k", "31", "short.fa.sig", "xxx.sig")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
-    assert '1 matches' in runtmp.last_result.out
-    assert '100.0%       short.fa' in runtmp.last_result.out
+    assert "1 matches" in runtmp.last_result.out
+    assert "100.0%       short.fa" in runtmp.last_result.out
 
 
 def test_do_sourmash_index_bad_args(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch','dna','-p','k=31,num=500', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,num=500", testdata1, testdata2)
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('index', 'zzz', 'short.fa.sig', 'short2.fa.sig', '-k', '31', '--dna', '--protein')
+        runtmp.sourmash(
+            "index",
+            "zzz",
+            "short.fa.sig",
+            "short2.fa.sig",
+            "-k",
+            "31",
+            "--dna",
+            "--protein",
+        )
 
     print(runtmp.last_result.out, runtmp.last_result.err)
-    assert 'cannot specify more than one of --dna/--rna/--nucleotide/--protein/--hp/--dayhoff' in runtmp.last_result.err
+    assert (
+        "cannot specify more than one of --dna/--rna/--nucleotide/--protein/--hp/--dayhoff"
+        in runtmp.last_result.err
+    )
     assert runtmp.last_result.status != 0
 
 
 def test_do_sourmash_sbt_search(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch','dna','-p','k=31,num=500', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,num=500", testdata1, testdata2)
 
-    runtmp.sourmash('index', 'zzz', 'short.fa.sig', 'short2.fa.sig', '-k', '31')
+    runtmp.sourmash("index", "zzz", "short.fa.sig", "short2.fa.sig", "-k", "31")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('search', 'short.fa.sig', 'zzz')
+    runtmp.sourmash("search", "short.fa.sig", "zzz")
 
     print(runtmp.last_result.out)
 
-    assert 'short.fa' in runtmp.last_result.out
-    assert 'short2.fa' in runtmp.last_result.out
+    assert "short.fa" in runtmp.last_result.out
+    assert "short2.fa" in runtmp.last_result.out
 
 
 def test_do_sourmash_sbt_search_wrong_ksize(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=31,num=500', '-p', 'k=51,num=500', testdata1, testdata2)
+    runtmp.sourmash(
+        "sketch",
+        "translate",
+        "-p",
+        "k=31,num=500",
+        "-p",
+        "k=51,num=500",
+        testdata1,
+        testdata2,
+    )
 
-    runtmp.sourmash('index', 'zzz', 'short.fa.sig', 'short2.fa.sig', '-k', '31')
+    runtmp.sourmash("index", "zzz", "short.fa.sig", "short2.fa.sig", "-k", "31")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('search', '-k', '51', 'short.fa.sig', 'zzz')
+        runtmp.sourmash("search", "-k", "51", "short.fa.sig", "zzz")
 
     assert runtmp.last_result.status == -1
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
     assert "ERROR: cannot use 'zzz' for this query." in runtmp.last_result.err
-    assert "search ksize 51 is different from database ksize 31" in runtmp.last_result.err
+    assert (
+        "search ksize 51 is different from database ksize 31" in runtmp.last_result.err
+    )
 
 
 def test_do_sourmash_sbt_search_multiple(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch','dna','-p','k=31,num=500', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,num=500", testdata1, testdata2)
 
-    runtmp.sourmash('index', 'zzz', 'short.fa.sig', '-k', '31')
+    runtmp.sourmash("index", "zzz", "short.fa.sig", "-k", "31")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('index', 'zzz2', 'short2.fa.sig', '-k', '31')
+    runtmp.sourmash("index", "zzz2", "short2.fa.sig", "-k", "31")
 
-    assert os.path.exists(runtmp.output('zzz2.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz2.sbt.zip"))
 
-    runtmp.sourmash('search', 'short.fa.sig', 'zzz', 'zzz2')
+    runtmp.sourmash("search", "short.fa.sig", "zzz", "zzz2")
 
     print(runtmp.last_result.out)
 
-    assert 'short.fa' in runtmp.last_result.out
-    assert 'short2.fa' in runtmp.last_result.out
+    assert "short.fa" in runtmp.last_result.out
+    assert "short2.fa" in runtmp.last_result.out
 
 
 def test_do_sourmash_sbt_search_and_sigs(runtmp):
     # search an SBT and a signature at same time.
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch','dna','-p','k=31,num=500', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,num=500", testdata1, testdata2)
 
-    runtmp.sourmash('index', 'zzz', 'short.fa.sig', '-k', '31')
+    runtmp.sourmash("index", "zzz", "short.fa.sig", "-k", "31")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('search', 'short.fa.sig', 'zzz', 'short2.fa.sig')
+    runtmp.sourmash("search", "short.fa.sig", "zzz", "short2.fa.sig")
 
     print(runtmp.last_result.out)
 
-    assert 'short.fa' in runtmp.last_result.out
-    assert 'short2.fa' in runtmp.last_result.out
+    assert "short.fa" in runtmp.last_result.out
+    assert "short2.fa" in runtmp.last_result.out
 
 
 def test_do_sourmash_sbt_search_downsample(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'dna', '-p', 'k=31,scaled=10', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,scaled=10", testdata1, testdata2)
 
-    testdata1 = utils.get_test_data('short.fa')
+    testdata1 = utils.get_test_data("short.fa")
 
-    runtmp.sourmash('sketch','dna','-p','k=31,scaled=5', '-o', 'query.sig', testdata1)
+    runtmp.sourmash(
+        "sketch", "dna", "-p", "k=31,scaled=5", "-o", "query.sig", testdata1
+    )
 
-    runtmp.sourmash('index', '-k', '31', 'zzz', 'short.fa.sig', 'short2.fa.sig')
+    runtmp.sourmash("index", "-k", "31", "zzz", "short.fa.sig", "short2.fa.sig")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('search', 'query.sig', 'zzz')
+    runtmp.sourmash("search", "query.sig", "zzz")
 
     print(runtmp.last_result.out)
 
-    assert 'short.fa' in runtmp.last_result.out
-    assert 'short2.fa' in runtmp.last_result.out
+    assert "short.fa" in runtmp.last_result.out
+    assert "short2.fa" in runtmp.last_result.out
 
 
 def test_do_sourmash_sbt_search_downsample_2(runtmp):
-    testdata1 = utils.get_test_data('lca-root/TARA_MED_MAG_00029.fa.sig')
-    testdata2 = utils.get_test_data('lca-root/TOBG_MED-875.fna.gz.sig')
+    testdata1 = utils.get_test_data("lca-root/TARA_MED_MAG_00029.fa.sig")
+    testdata2 = utils.get_test_data("lca-root/TOBG_MED-875.fna.gz.sig")
 
-    sbtname = 'foo'
+    sbtname = "foo"
 
-    runtmp.sourmash('index', '-k', '31', sbtname, testdata2)
+    runtmp.sourmash("index", "-k", "31", sbtname, testdata2)
 
     assert runtmp.last_result.status == 0
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('search', testdata1, sbtname, '--scaled=100000', '--threshold=0.01')
+        runtmp.sourmash(
+            "search", testdata1, sbtname, "--scaled=100000", "--threshold=0.01"
+        )
 
     assert runtmp.last_result.status == -1
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
     assert "ERROR: cannot use 'foo' for this query." in runtmp.last_result.err
-    assert "search scaled value 100000 is less than database scaled value of 2000" in runtmp.last_result.err
+    assert (
+        "search scaled value 100000 is less than database scaled value of 2000"
+        in runtmp.last_result.err
+    )
 
 
 @utils.in_tempdir
 def test_do_sourmash_index_abund(c):
     # 'sourmash index' should flatten signatures w/track_abund.
-    testdata2 = utils.get_test_data('lca-root/TOBG_MED-875.fna.gz.sig')
+    testdata2 = utils.get_test_data("lca-root/TOBG_MED-875.fna.gz.sig")
 
-    with open(testdata2, 'rt') as fp:
+    with open(testdata2):
         ss = sourmash.load_one_signature(testdata2, ksize=31)
         assert ss.minhash.track_abundance == True
 
-    sbtname = 'foo'
+    sbtname = "foo"
 
-    c.run_sourmash('index', '-k', '31', sbtname, testdata2)
+    c.run_sourmash("index", "-k", "31", sbtname, testdata2)
 
     for kk in sourmash.load_file_as_signatures(c.output(sbtname)):
         assert kk.minhash.track_abundance == False
 
 
 def test_do_sourmash_index_single(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch','dna','-p','k=31,num=500', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,num=500", testdata1, testdata2)
 
-    runtmp.sourmash('index', '-k', '31', 'zzz', 'short.fa.sig')
+    runtmp.sourmash("index", "-k", "31", "zzz", "short.fa.sig")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('search', 'short.fa.sig', 'zzz')
+    runtmp.sourmash("search", "short.fa.sig", "zzz")
 
     print(runtmp.last_result.out)
 
-    assert 'short.fa' in runtmp.last_result.out
+    assert "short.fa" in runtmp.last_result.out
 
 
 def test_do_sourmash_sbt_search_selectprot(runtmp):
     # index should fail when run on signatures with multiple types
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    args = ['sketch', 'dna', '-p', 'k=30,num=500',testdata1, testdata2]
+    args = ["sketch", "dna", "-p", "k=30,num=500", testdata1, testdata2]
 
     runtmp.sourmash(*args)
 
-    args = ['index', '-k', '31', 'zzz', 'short.fa.sig', 'short2.fa.sig']
+    args = ["index", "-k", "31", "zzz", "short.fa.sig", "short2.fa.sig"]
 
     with pytest.raises(SourmashCommandFailed):
         runtmp.sourmash(*args)
@@ -2801,122 +3229,130 @@ def test_do_sourmash_sbt_search_selectprot(runtmp):
 def test_do_sourmash_search_multimoltype_query(runtmp):
     # 'search' should fail if multiple sigs are given as query, due to
     # multiple molecule types.
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
     # first, calculate signatures with multiple molecule types
-    args = ['sketch', 'translate', testdata1, testdata2,
-            '-p', 'protein', '-p', 'dayhoff']
+    args = [
+        "sketch",
+        "translate",
+        testdata1,
+        testdata2,
+        "-p",
+        "protein",
+        "-p",
+        "dayhoff",
+    ]
     runtmp.sourmash(*args)
 
     # now, index one of 'em
-    args = ['index', 'zzz', 'short.fa.sig', 'short2.fa.sig', '--protein']
+    args = ["index", "zzz", "short.fa.sig", "short2.fa.sig", "--protein"]
     runtmp.sourmash(*args)
 
     # output exists, yes?
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
     # now, try searching. Should raise error.
-    args = ['search', 'short.fa.sig', 'zzz']
-    with pytest.raises(SourmashCommandFailed) as exc:
+    args = ["search", "short.fa.sig", "zzz"]
+    with pytest.raises(SourmashCommandFailed):
         runtmp.sourmash(*args)
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
-    assert 'need exactly one' in runtmp.last_result.err
+    assert "need exactly one" in runtmp.last_result.err
 
 
 def test_do_sourmash_index_traverse(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch','dna','-p','k=31,num=500', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,num=500", testdata1, testdata2)
 
-    runtmp.sourmash('index', '-k', '31', 'zzz', '.')
+    runtmp.sourmash("index", "-k", "31", "zzz", ".")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
-    assert 'loaded 2 sigs; saving SBT under' in runtmp.last_result.err
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
+    assert "loaded 2 sigs; saving SBT under" in runtmp.last_result.err
 
-    runtmp.sourmash('search', 'short.fa.sig', 'zzz')
+    runtmp.sourmash("search", "short.fa.sig", "zzz")
 
     print(runtmp.last_result.out)
 
-    assert 'short.fa' in runtmp.last_result.out
-    assert 'short2.fa' in runtmp.last_result.out
+    assert "short.fa" in runtmp.last_result.out
+    assert "short2.fa" in runtmp.last_result.out
 
 
 @utils.in_tempdir
 def test_do_sourmash_index_traverse_force(c):
     # test loading of files that don't end with .sig with -f
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    outdir = c.output('sigs')
+    outdir = c.output("sigs")
     os.mkdir(outdir)
-    out1 = os.path.join(outdir, 'short1')
-    out2 = os.path.join(outdir, 'short2')
+    out1 = os.path.join(outdir, "short1")
+    out2 = os.path.join(outdir, "short2")
 
-    c.run_sourmash('sketch','dna','-p','k=31,scaled=5', '-o', out1, testdata1)
-    c.run_sourmash('sketch','dna','-p','k=31,scaled=5', '-o', out2, testdata2)
+    c.run_sourmash("sketch", "dna", "-p", "k=31,scaled=5", "-o", out1, testdata1)
+    c.run_sourmash("sketch", "dna", "-p", "k=31,scaled=5", "-o", out2, testdata2)
 
-    c.run_sourmash('index', '-k', '31', 'zzz', '.', '-f')
+    c.run_sourmash("index", "-k", "31", "zzz", ".", "-f")
 
     err = c.last_result.err
-    assert os.path.exists(c.output('zzz.sbt.zip'))
-    assert 'loaded 2 sigs; saving SBT under' in err
+    assert os.path.exists(c.output("zzz.sbt.zip"))
+    assert "loaded 2 sigs; saving SBT under" in err
 
-    c.run_sourmash('search', out1, 'zzz')
+    c.run_sourmash("search", out1, "zzz")
 
     out = c.last_result.out
     print(out)
 
-    assert 'short.fa' in out
-    assert 'short2.fa' in out
+    assert "short.fa" in out
+    assert "short2.fa" in out
 
 
 def test_do_sourmash_index_sparseness(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch','dna','-p','k=31,num=500', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,num=500", testdata1, testdata2)
 
-    runtmp.sourmash('index', '-k', '31', 'zzz.sbt.json', '.', '--sparseness', '1.0')
+    runtmp.sourmash("index", "-k", "31", "zzz.sbt.json", ".", "--sparseness", "1.0")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.json'))
-    assert 'loaded 2 sigs; saving SBT under' in runtmp.last_result.err
+    assert os.path.exists(runtmp.output("zzz.sbt.json"))
+    assert "loaded 2 sigs; saving SBT under" in runtmp.last_result.err
 
-    runtmp.sourmash('search', 'short.fa.sig', 'zzz.sbt.json')
+    runtmp.sourmash("search", "short.fa.sig", "zzz.sbt.json")
 
     print(runtmp.last_result.out)
 
-    assert len(glob.glob(runtmp.output('.sbt.zzz/*'))) == 3
-    assert not glob.glob(runtmp.output('.sbt.zzz/*internal*'))
+    assert len(glob.glob(runtmp.output(".sbt.zzz/*"))) == 3
+    assert not glob.glob(runtmp.output(".sbt.zzz/*internal*"))
 
-    assert 'short.fa' in runtmp.last_result.out
-    assert 'short2.fa' in runtmp.last_result.out
+    assert "short.fa" in runtmp.last_result.out
+    assert "short2.fa" in runtmp.last_result.out
 
 
 def test_do_sourmash_sbt_combine(runtmp):
     files = [utils.get_test_data(f) for f in utils.SIG_FILES]
 
-    runtmp.sourmash('index', '-k', '31', 'zzz', *files)
+    runtmp.sourmash("index", "-k", "31", "zzz", *files)
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('sbt_combine', 'joined', 'zzz.sbt.zip', 'zzz.sbt.zip')
+    runtmp.sourmash("sbt_combine", "joined", "zzz.sbt.zip", "zzz.sbt.zip")
 
-    assert os.path.exists(runtmp.output('joined.sbt.zip'))
+    assert os.path.exists(runtmp.output("joined.sbt.zip"))
 
     filename = os.path.splitext(os.path.basename(utils.SIG_FILES[0]))[0]
 
-    runtmp.sourmash('search', files[0], 'zzz')
+    runtmp.sourmash("search", files[0], "zzz")
 
     print(runtmp.last_result.out)
 
     # we get notification of signature loading, too - so notify + result.
     assert runtmp.last_result.out.count(filename) == 1
 
-    runtmp.sourmash('search', files[0], 'joined')
+    runtmp.sourmash("search", files[0], "joined")
 
     print(runtmp.last_result.out)
 
@@ -2924,130 +3360,148 @@ def test_do_sourmash_sbt_combine(runtmp):
 
 
 def test_do_sourmash_index_append(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    testdata3 = utils.get_test_data('short3.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    testdata3 = utils.get_test_data("short3.fa")
 
-    runtmp.sourmash('sketch','dna', '-p', 'k=31,num=500', testdata1, testdata2, testdata3)
+    runtmp.sourmash(
+        "sketch", "dna", "-p", "k=31,num=500", testdata1, testdata2, testdata3
+    )
 
-    runtmp.sourmash('index', '-k', '31', 'zzz', 'short.fa.sig', 'short2.fa.sig')
+    runtmp.sourmash("index", "-k", "31", "zzz", "short.fa.sig", "short2.fa.sig")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    sbt_name = runtmp.output('zzz',)
-    sig_loc = runtmp.output('short3.fa.sig')
+    sbt_name = runtmp.output(
+        "zzz",
+    )
+    sig_loc = runtmp.output("short3.fa.sig")
 
-    runtmp.sourmash('search', sig_loc, sbt_name)
+    runtmp.sourmash("search", sig_loc, sbt_name)
 
     print(runtmp.last_result.out)
 
-    assert 'short.fa' in runtmp.last_result.out
-    assert 'short2.fa' in runtmp.last_result.out
-    assert 'short3.fa' not in runtmp.last_result.out
+    assert "short.fa" in runtmp.last_result.out
+    assert "short2.fa" in runtmp.last_result.out
+    assert "short3.fa" not in runtmp.last_result.out
 
-    runtmp.sourmash('index', '-k', '31', '--append', 'zzz', 'short3.fa.sig')
+    runtmp.sourmash("index", "-k", "31", "--append", "zzz", "short3.fa.sig")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    sbt_name = runtmp.output('zzz',)
-    sig_loc = runtmp.output('short3.fa.sig')
+    sbt_name = runtmp.output(
+        "zzz",
+    )
+    sig_loc = runtmp.output("short3.fa.sig")
 
-    runtmp.sourmash('search', '--threshold', '0.95', sig_loc, sbt_name)
+    runtmp.sourmash("search", "--threshold", "0.95", sig_loc, sbt_name)
 
     print(runtmp.last_result.out)
 
-    assert 'short.fa' not in runtmp.last_result.out
-    assert 'short2.fa' in runtmp.last_result.out
-    assert 'short3.fa' in runtmp.last_result.out
+    assert "short.fa" not in runtmp.last_result.out
+    assert "short2.fa" in runtmp.last_result.out
+    assert "short3.fa" in runtmp.last_result.out
 
 
 def test_do_sourmash_sbt_search_otherdir(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch','dna', '-p', 'k=31,num=500', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,num=500", testdata1, testdata2)
 
-    runtmp.sourmash('index', '-k', '31', 'xxx/zzz', 'short.fa.sig', 'short2.fa.sig')
+    runtmp.sourmash("index", "-k", "31", "xxx/zzz", "short.fa.sig", "short2.fa.sig")
 
-    assert os.path.exists(runtmp.output('xxx/zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("xxx/zzz.sbt.zip"))
 
-    sbt_name = runtmp.output('xxx/zzz',)
-    sig_loc = runtmp.output('short.fa.sig')
+    sbt_name = runtmp.output(
+        "xxx/zzz",
+    )
+    sig_loc = runtmp.output("short.fa.sig")
 
-    runtmp.sourmash('search', sig_loc, sbt_name)
+    runtmp.sourmash("search", sig_loc, sbt_name)
 
     print(runtmp.last_result.out)
 
-    assert 'short.fa' in runtmp.last_result.out
-    assert 'short2.fa' in runtmp.last_result.out
+    assert "short.fa" in runtmp.last_result.out
+    assert "short2.fa" in runtmp.last_result.out
 
 
 def test_do_sourmash_sbt_search_scaled_vs_num_1(runtmp):
     # should not work: scaled query against num tree
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch','dna', '-p', 'k=31,num=500', testdata1)
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,num=500", testdata1)
 
-    runtmp.sourmash('sketch','dna', '-p', 'scaled=1000', testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=1000", testdata2)
 
-    runtmp.sourmash('index', '-k', '31', 'zzz', 'short.fa.sig')
+    runtmp.sourmash("index", "-k", "31", "zzz", "short.fa.sig")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    sbt_name = runtmp.output('zzz',)
-    sig_loc = runtmp.output('short2.fa.sig')
+    sbt_name = runtmp.output(
+        "zzz",
+    )
+    sig_loc = runtmp.output("short2.fa.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('search', sig_loc, sbt_name)
+        runtmp.sourmash("search", sig_loc, sbt_name)
 
     assert runtmp.last_result.status == -1
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
     assert "ERROR: cannot use '" in runtmp.last_result.err
-    assert "this database was created with 'num' MinHash sketches, not 'scaled'" in runtmp.last_result.err
+    assert (
+        "this database was created with 'num' MinHash sketches, not 'scaled'"
+        in runtmp.last_result.err
+    )
 
 
 def test_do_sourmash_sbt_search_scaled_vs_num_2(runtmp):
     # should not work: num query against scaled tree
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch','dna', '-p', 'k=31,num=500', testdata1)
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,num=500", testdata1)
 
-    runtmp.sourmash('sketch','dna', '-p', 'scaled=1000', testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=1000", testdata2)
 
-    runtmp.sourmash('index', '-k', '31', 'zzz', 'short2.fa.sig')
+    runtmp.sourmash("index", "-k", "31", "zzz", "short2.fa.sig")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    sbt_name = runtmp.output('zzz',)
-    sig_loc = runtmp.output('short.fa.sig')
+    sbt_name = runtmp.output(
+        "zzz",
+    )
+    sig_loc = runtmp.output("short.fa.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('search', sig_loc, sbt_name)
+        runtmp.sourmash("search", sig_loc, sbt_name)
 
     assert runtmp.last_result.status == -1
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
     assert "ERROR: cannot use '" in runtmp.last_result.err
-    assert "this database was created with 'scaled' MinHash sketches, not 'num'" in runtmp.last_result.err
+    assert (
+        "this database was created with 'scaled' MinHash sketches, not 'num'"
+        in runtmp.last_result.err
+    )
 
 
 def test_do_sourmash_sbt_search_scaled_vs_num_3(runtmp):
     # should not work: scaled query against num signature
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch','dna', '-p', 'k=31,num=500', testdata1)
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,num=500", testdata1)
 
-    runtmp.sourmash('sketch','dna', '-p', 'scaled=1000', testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=1000", testdata2)
 
-    sig_loc = runtmp.output('short.fa.sig')
-    sig_loc2 = runtmp.output('short2.fa.sig')
+    sig_loc = runtmp.output("short.fa.sig")
+    sig_loc2 = runtmp.output("short2.fa.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('search', sig_loc, sig_loc2)
+        runtmp.sourmash("search", sig_loc, sig_loc2)
 
     assert runtmp.last_result.status == -1
     print(runtmp.last_result.out)
@@ -3057,18 +3511,18 @@ def test_do_sourmash_sbt_search_scaled_vs_num_3(runtmp):
 
 def test_do_sourmash_sbt_search_scaled_vs_num_4(runtmp):
     # should not work: num query against scaled signature
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch','dna', '-p', 'k=31,num=500', testdata1)
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,num=500", testdata1)
 
-    runtmp.sourmash('sketch','dna', '-p', 'scaled=1000', testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=1000", testdata2)
 
-    sig_loc = runtmp.output('short.fa.sig')
-    sig_loc2 = runtmp.output('short2.fa.sig')
+    sig_loc = runtmp.output("short.fa.sig")
+    sig_loc2 = runtmp.output("short2.fa.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('search', sig_loc2, sig_loc)
+        runtmp.sourmash("search", sig_loc2, sig_loc)
 
     assert runtmp.last_result.status == -1
     print(runtmp.last_result.out)
@@ -3079,13 +3533,13 @@ def test_do_sourmash_sbt_search_scaled_vs_num_4(runtmp):
 def test_do_sourmash_check_search_vs_actual_similarity(runtmp):
     files = [utils.get_test_data(f) for f in utils.SIG_FILES]
 
-    runtmp.sourmash('index', '-k', '31', 'zzz', *files)
+    runtmp.sourmash("index", "-k", "31", "zzz", *files)
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    filename = os.path.splitext(os.path.basename(utils.SIG_FILES[0]))[0]
+    os.path.splitext(os.path.basename(utils.SIG_FILES[0]))[0]
 
-    runtmp.sourmash('search', files[0], 'zzz')
+    runtmp.sourmash("search", files[0], "zzz")
 
     assert runtmp.last_result.status == 0
 
@@ -3093,9 +3547,9 @@ def test_do_sourmash_check_search_vs_actual_similarity(runtmp):
 def test_do_sourmash_check_sbt_filenames(runtmp):
     files = [utils.get_test_data(f) for f in utils.SIG_FILES]
 
-    runtmp.sourmash('index', '-k', '31', 'zzz.sbt.json', *files)
+    runtmp.sourmash("index", "-k", "31", "zzz.sbt.json", *files)
 
-    assert os.path.exists(runtmp.output('zzz.sbt.json'))
+    assert os.path.exists(runtmp.output("zzz.sbt.json"))
 
     sig_names = set()
     sig_md5s = set()
@@ -3104,11 +3558,11 @@ def test_do_sourmash_check_sbt_filenames(runtmp):
         sig_names.add(sig.name)
         sig_md5s.add(sig.md5sum())
 
-    sbt_files = glob.glob(runtmp.output('.sbt.zzz/*'))
+    sbt_files = glob.glob(runtmp.output(".sbt.zzz/*"))
     assert len(sbt_files) == 14
 
     for f in sbt_files:
-        if 'internal' in f or f.endswith('zzz.manifest.csv'):
+        if "internal" in f or f.endswith("zzz.manifest.csv"):
             continue
         f = os.path.basename(f)
         assert f not in sig_names
@@ -3116,161 +3570,208 @@ def test_do_sourmash_check_sbt_filenames(runtmp):
 
 
 def test_do_sourmash_sbt_search_bestonly(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch','dna', '-p', 'k=31,num=500', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,num=500", testdata1, testdata2)
 
-    runtmp.sourmash('index', '-k', '31', 'zzz', 'short.fa.sig', 'short2.fa.sig')
+    runtmp.sourmash("index", "-k", "31", "zzz", "short.fa.sig", "short2.fa.sig")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('search', '--best-only', 'short.fa.sig', 'zzz')
+    runtmp.sourmash("search", "--best-only", "short.fa.sig", "zzz")
 
     print(runtmp.last_result.out)
 
-    assert 'short.fa' in runtmp.last_result.out
+    assert "short.fa" in runtmp.last_result.out
 
 
 def test_do_sourmash_sbt_search_bestonly_scaled(runtmp):
     # as currently implemented, the query signature will be automatically
     # downsampled to match the tree.
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch','dna', '-p', 'scaled=1', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=1", testdata1, testdata2)
 
-    runtmp.sourmash('index', '-k', '31', 'zzz', 'short.fa.sig', 'short2.fa.sig', '--scaled', '10')
+    runtmp.sourmash(
+        "index", "-k", "31", "zzz", "short.fa.sig", "short2.fa.sig", "--scaled", "10"
+    )
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('search', '--best-only', 'short.fa.sig', 'zzz')
+    runtmp.sourmash("search", "--best-only", "short.fa.sig", "zzz")
 
     print(runtmp.last_result.out)
 
-    assert 'short.fa' in runtmp.last_result.out
+    assert "short.fa" in runtmp.last_result.out
 
 
 def test_sbt_search_order_dependence(runtmp):
-    testdata1 = utils.get_test_data('genome-s10.fa.gz')
-    testdata2 = utils.get_test_data('genome-s11.fa.gz')
-    testdata3 = utils.get_test_data('genome-s12.fa.gz')
-    testdata4 = utils.get_test_data('genome-s10+s11.fa.gz')
-
-    runtmp.sourmash('sketch', 'dna', '-p', 'k=21,scaled=10000', '-p', 'k=31,scaled=10000', testdata1, testdata2, testdata3, testdata4)
-
-    runtmp.sourmash('index', '-k', '21', '134', 'genome-s10+s11.fa.gz.sig', 'genome-s11.fa.gz.sig', 'genome-s12.fa.gz.sig')
-
-    runtmp.sourmash('search', '-k', '21', 'genome-s11.fa.gz.sig', '134', '--best-only', '-k', '21', '--dna')
+    testdata1 = utils.get_test_data("genome-s10.fa.gz")
+    testdata2 = utils.get_test_data("genome-s11.fa.gz")
+    testdata3 = utils.get_test_data("genome-s12.fa.gz")
+    testdata4 = utils.get_test_data("genome-s10+s11.fa.gz")
+
+    runtmp.sourmash(
+        "sketch",
+        "dna",
+        "-p",
+        "k=21,scaled=10000",
+        "-p",
+        "k=31,scaled=10000",
+        testdata1,
+        testdata2,
+        testdata3,
+        testdata4,
+    )
+
+    runtmp.sourmash(
+        "index",
+        "-k",
+        "21",
+        "134",
+        "genome-s10+s11.fa.gz.sig",
+        "genome-s11.fa.gz.sig",
+        "genome-s12.fa.gz.sig",
+    )
+
+    runtmp.sourmash(
+        "search",
+        "-k",
+        "21",
+        "genome-s11.fa.gz.sig",
+        "134",
+        "--best-only",
+        "-k",
+        "21",
+        "--dna",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
-    assert '100.0%' in runtmp.last_result.out
+    assert "100.0%" in runtmp.last_result.out
 
 
 def test_sbt_search_order_dependence_2(runtmp):
     # *should* return the same result as test_sbt_search_order_dependence,
     # but does not due to a bug.
-    testdata1 = utils.get_test_data('genome-s10.fa.gz')
-    testdata2 = utils.get_test_data('genome-s11.fa.gz')
-    testdata3 = utils.get_test_data('genome-s12.fa.gz')
-    testdata4 = utils.get_test_data('genome-s10+s11.fa.gz')
-
-    runtmp.sourmash('sketch', 'dna', '-p', 'k=21,scaled=10000', '-p', 'k=31,scaled=10000', testdata1, testdata2, testdata3, testdata4)
-
-    runtmp.sourmash('index', '-k', '21', '314', 'genome-s11.fa.gz.sig', 'genome-s10+s11.fa.gz.sig', 'genome-s12.fa.gz.sig')
-
-    runtmp.sourmash('search', '-k', '21', 'genome-s11.fa.gz.sig', '314', '--best-only', '--dna')
+    testdata1 = utils.get_test_data("genome-s10.fa.gz")
+    testdata2 = utils.get_test_data("genome-s11.fa.gz")
+    testdata3 = utils.get_test_data("genome-s12.fa.gz")
+    testdata4 = utils.get_test_data("genome-s10+s11.fa.gz")
+
+    runtmp.sourmash(
+        "sketch",
+        "dna",
+        "-p",
+        "k=21,scaled=10000",
+        "-p",
+        "k=31,scaled=10000",
+        testdata1,
+        testdata2,
+        testdata3,
+        testdata4,
+    )
+
+    runtmp.sourmash(
+        "index",
+        "-k",
+        "21",
+        "314",
+        "genome-s11.fa.gz.sig",
+        "genome-s10+s11.fa.gz.sig",
+        "genome-s12.fa.gz.sig",
+    )
+
+    runtmp.sourmash(
+        "search", "-k", "21", "genome-s11.fa.gz.sig", "314", "--best-only", "--dna"
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
-    assert '100.0%' in runtmp.last_result.out
+    assert "100.0%" in runtmp.last_result.out
 
 
 def test_compare_with_abundance_1(runtmp):
     # create two signatures
-    E1 = MinHash(ksize=5, n=5, is_protein=False,
-                    track_abundance=True)
-    E2 = MinHash(ksize=5, n=5, is_protein=False,
-                    track_abundance=True)
+    E1 = MinHash(ksize=5, n=5, is_protein=False, track_abundance=True)
+    E2 = MinHash(ksize=5, n=5, is_protein=False, track_abundance=True)
 
-    E1.add_sequence('ATGGA')
-    E2.add_sequence('ATGGA')
+    E1.add_sequence("ATGGA")
+    E2.add_sequence("ATGGA")
 
-    s1 = signature.SourmashSignature(E1, filename='e1', name='e1')
-    s2 = signature.SourmashSignature(E2, filename='e2', name='e2')
+    s1 = signature.SourmashSignature(E1, filename="e1", name="e1")
+    s2 = signature.SourmashSignature(E2, filename="e2", name="e2")
 
-    with open(runtmp.output('e1.sig'), 'w') as f:
+    with open(runtmp.output("e1.sig"), "w") as f:
         signature.save_signatures([s1], f)
 
-    with open(runtmp.output('e2.sig'), 'w') as f:
+    with open(runtmp.output("e2.sig"), "w") as f:
         signature.save_signatures([s2], f)
 
-    runtmp.sourmash('search', 'e1.sig', 'e2.sig', '-k', '5')
+    runtmp.sourmash("search", "e1.sig", "e2.sig", "-k", "5")
 
-    assert '100.0%' in runtmp.last_result.out
+    assert "100.0%" in runtmp.last_result.out
 
 
 def test_compare_with_abundance_2(runtmp):
     # create two signatures
-    E1 = MinHash(ksize=5, n=5, is_protein=False,
-                    track_abundance=True)
-    E2 = MinHash(ksize=5, n=5, is_protein=False,
-                    track_abundance=True)
+    E1 = MinHash(ksize=5, n=5, is_protein=False, track_abundance=True)
+    E2 = MinHash(ksize=5, n=5, is_protein=False, track_abundance=True)
 
-    E1.add_sequence('ATGGA')
+    E1.add_sequence("ATGGA")
 
-    E1.add_sequence('ATGGA')
-    E2.add_sequence('ATGGA')
+    E1.add_sequence("ATGGA")
+    E2.add_sequence("ATGGA")
 
-    s1 = signature.SourmashSignature(E1, filename='e1', name='e1')
-    s2 = signature.SourmashSignature(E2, filename='e2', name='e2')
+    s1 = signature.SourmashSignature(E1, filename="e1", name="e1")
+    s2 = signature.SourmashSignature(E2, filename="e2", name="e2")
 
-    with open(runtmp.output('e1.sig'), 'w') as f:
+    with open(runtmp.output("e1.sig"), "w") as f:
         signature.save_signatures([s1], f)
 
-    with open(runtmp.output('e2.sig'), 'w') as f:
+    with open(runtmp.output("e2.sig"), "w") as f:
         signature.save_signatures([s2], f)
 
-    runtmp.sourmash('search', 'e1.sig', 'e2.sig', '-k', '5')
+    runtmp.sourmash("search", "e1.sig", "e2.sig", "-k", "5")
 
-    assert '100.0%' in runtmp.last_result.out
+    assert "100.0%" in runtmp.last_result.out
 
 
 def test_compare_with_abundance_3(runtmp):
     # create two signatures
-    E1 = MinHash(ksize=5, n=5, is_protein=False,
-                    track_abundance=True)
-    E2 = MinHash(ksize=5, n=5, is_protein=False,
-                    track_abundance=True)
+    E1 = MinHash(ksize=5, n=5, is_protein=False, track_abundance=True)
+    E2 = MinHash(ksize=5, n=5, is_protein=False, track_abundance=True)
 
-    E1.add_sequence('ATGGA')
-    E1.add_sequence('GGACA')
+    E1.add_sequence("ATGGA")
+    E1.add_sequence("GGACA")
 
-    E1.add_sequence('ATGGA')
-    E2.add_sequence('ATGGA')
+    E1.add_sequence("ATGGA")
+    E2.add_sequence("ATGGA")
 
-    s1 = signature.SourmashSignature(E1, filename='e1', name='e1')
-    s2 = signature.SourmashSignature(E2, filename='e2', name='e2')
+    s1 = signature.SourmashSignature(E1, filename="e1", name="e1")
+    s2 = signature.SourmashSignature(E2, filename="e2", name="e2")
 
-    with open(runtmp.output('e1.sig'), 'w') as f:
+    with open(runtmp.output("e1.sig"), "w") as f:
         signature.save_signatures([s1], f)
 
-    with open(runtmp.output('e2.sig'), 'w') as f:
+    with open(runtmp.output("e2.sig"), "w") as f:
         signature.save_signatures([s2], f)
 
-    runtmp.sourmash('search', 'e1.sig', 'e2.sig', '-k', '5')
+    runtmp.sourmash("search", "e1.sig", "e2.sig", "-k", "5")
 
-    assert '70.5%' in runtmp.last_result.out
+    assert "70.5%" in runtmp.last_result.out
 
 
 def test_compare_with_picklist(runtmp):
     # test 'sourmash compare' with picklists
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    picklist = utils.get_test_data("gather/thermotoga-picklist.csv")
 
-    runtmp.sourmash('compare', *gcf_sigs,
-                    '-k', '21', '--picklist', f"{picklist}:md5:md5")
+    runtmp.sourmash(
+        "compare", *gcf_sigs, "-k", "21", "--picklist", f"{picklist}:md5:md5"
+    )
 
     err = runtmp.last_result.err
     out = runtmp.last_result.out
@@ -3287,11 +3788,12 @@ def test_compare_with_picklist(runtmp):
 
 def test_compare_with_picklist_exclude(runtmp):
     # test 'sourmash compare' with picklists - exclude
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    picklist = utils.get_test_data("gather/thermotoga-picklist.csv")
 
-    runtmp.sourmash('compare', *gcf_sigs,
-                    '-k', '21', '--picklist', f"{picklist}:md5:md5:exclude")
+    runtmp.sourmash(
+        "compare", *gcf_sigs, "-k", "21", "--picklist", f"{picklist}:md5:md5:exclude"
+    )
 
     err = runtmp.last_result.err
     out = runtmp.last_result.out
@@ -3309,12 +3811,10 @@ def test_compare_with_picklist_exclude(runtmp):
 
 def test_compare_with_pattern_include(runtmp):
     # test 'sourmash compare' with --include-db-pattern
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
 
-    runtmp.sourmash('compare', *gcf_sigs,
-                    '-k', '21', '--include', "thermotoga")
+    runtmp.sourmash("compare", *gcf_sigs, "-k", "21", "--include", "thermotoga")
 
-    err = runtmp.last_result.err
     out = runtmp.last_result.out
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
@@ -3326,12 +3826,10 @@ def test_compare_with_pattern_include(runtmp):
 
 def test_compare_with_pattern_exclude(runtmp):
     # test 'sourmash compare' with picklists - exclude
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
 
-    runtmp.sourmash('compare', *gcf_sigs,
-                    '-k', '21', '--exclude', "thermotoga")
+    runtmp.sourmash("compare", *gcf_sigs, "-k", "21", "--exclude", "thermotoga")
 
-    err = runtmp.last_result.err
     out = runtmp.last_result.out
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
@@ -3344,310 +3842,443 @@ def test_compare_with_pattern_exclude(runtmp):
 
 
 def test_gather(runtmp, linear_gather, prefetch_gather):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'dna', '-p', 'scaled=10', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=10", testdata1, testdata2)
 
-    runtmp.sourmash('sketch','dna','-p','scaled=10', '-o', 'query.fa.sig', testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=10", "-o", "query.fa.sig", testdata2)
 
-    runtmp.sourmash('index', '-k', '31', 'zzz', 'short.fa.sig', 'short2.fa.sig')
+    runtmp.sourmash("index", "-k", "31", "zzz", "short.fa.sig", "short2.fa.sig")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('gather', 'query.fa.sig', 'zzz', '-o', 'foo.csv', '--threshold-bp=1', linear_gather, prefetch_gather)
+    runtmp.sourmash(
+        "gather",
+        "query.fa.sig",
+        "zzz",
+        "-o",
+        "foo.csv",
+        "--threshold-bp=1",
+        linear_gather,
+        prefetch_gather,
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert '0.9 kbp      100.0%  100.0%' in runtmp.last_result.out
+    assert "0.9 kbp      100.0%  100.0%" in runtmp.last_result.out
 
 
 def test_gather_csv(runtmp, linear_gather, prefetch_gather):
     # test 'gather -o csvfile'
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-
-    runtmp.sourmash('sketch','dna','-p','scaled=10', '--name-from-first', testdata1, testdata2)
-
-    runtmp.sourmash('sketch','dna','-p','scaled=10', '-o', 'query.fa.sig', '--name-from-first', testdata2)
-
-    runtmp.sourmash('index', '-k', '31', 'zzz', 'short.fa.sig', 'short2.fa.sig')
-
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
-
-    runtmp.sourmash('gather', 'query.fa.sig', 'zzz', '-o', 'foo.csv', '--threshold-bp=1', linear_gather, prefetch_gather)
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+
+    runtmp.sourmash(
+        "sketch", "dna", "-p", "scaled=10", "--name-from-first", testdata1, testdata2
+    )
+
+    runtmp.sourmash(
+        "sketch",
+        "dna",
+        "-p",
+        "scaled=10",
+        "-o",
+        "query.fa.sig",
+        "--name-from-first",
+        testdata2,
+    )
+
+    runtmp.sourmash("index", "-k", "31", "zzz", "short.fa.sig", "short2.fa.sig")
+
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
+
+    runtmp.sourmash(
+        "gather",
+        "query.fa.sig",
+        "zzz",
+        "-o",
+        "foo.csv",
+        "--threshold-bp=1",
+        linear_gather,
+        prefetch_gather,
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    csv_file = runtmp.output('foo.csv')
+    csv_file = runtmp.output("foo.csv")
 
     with open(csv_file) as fp:
         reader = csv.DictReader(fp)
         row = next(reader)
         print(row)
-        assert float(row['intersect_bp']) == 910
-        assert float(row['unique_intersect_bp']) == 910
-        assert float(row['remaining_bp']) == 0
-        assert float(row['f_orig_query']) == 1.0
-        assert float(row['f_unique_to_query']) == 1.0
-        assert float(row['f_match']) == 1.0
-        assert row['filename'] == 'zzz'
-        assert row['name'] == 'tr1 4'
-        assert row['md5'] == 'c9d5a795eeaaf58e286fb299133e1938'
-        assert row['gather_result_rank'] == '0'
-        assert row['query_filename'].endswith('short2.fa')
-        assert row['query_name'] == 'tr1 4'
-        assert row['query_md5'] == 'c9d5a795'
-        assert row['query_bp'] == '910'
-
-        assert row['query_abundance'] == 'False'
-        assert row['n_unique_weighted_found'] == ''
+        assert float(row["intersect_bp"]) == 910
+        assert float(row["unique_intersect_bp"]) == 910
+        assert float(row["remaining_bp"]) == 0
+        assert float(row["f_orig_query"]) == 1.0
+        assert float(row["f_unique_to_query"]) == 1.0
+        assert float(row["f_match"]) == 1.0
+        assert row["filename"] == "zzz"
+        assert row["name"] == "tr1 4"
+        assert row["md5"] == "c9d5a795eeaaf58e286fb299133e1938"
+        assert row["gather_result_rank"] == "0"
+        assert row["query_filename"].endswith("short2.fa")
+        assert row["query_name"] == "tr1 4"
+        assert row["query_md5"] == "c9d5a795"
+        assert row["query_bp"] == "910"
+
+        assert row["query_abundance"] == "False"
+        assert row["n_unique_weighted_found"] == ""
 
 
 def test_gather_csv_gz(runtmp, linear_gather, prefetch_gather):
     # test 'gather -o csvfile.gz'
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-
-    runtmp.sourmash('sketch','dna','-p','scaled=10', '--name-from-first', testdata1, testdata2)
-
-    runtmp.sourmash('sketch','dna','-p','scaled=10', '-o', 'query.fa.sig', '--name-from-first', testdata2)
-
-    runtmp.sourmash('index', '-k', '31', 'zzz', 'short.fa.sig', 'short2.fa.sig')
-
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
-
-    runtmp.sourmash('gather', 'query.fa.sig', 'zzz', '-o', 'foo.csv.gz', '--threshold-bp=1', linear_gather, prefetch_gather)
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+
+    runtmp.sourmash(
+        "sketch", "dna", "-p", "scaled=10", "--name-from-first", testdata1, testdata2
+    )
+
+    runtmp.sourmash(
+        "sketch",
+        "dna",
+        "-p",
+        "scaled=10",
+        "-o",
+        "query.fa.sig",
+        "--name-from-first",
+        testdata2,
+    )
+
+    runtmp.sourmash("index", "-k", "31", "zzz", "short.fa.sig", "short2.fa.sig")
+
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
+
+    runtmp.sourmash(
+        "gather",
+        "query.fa.sig",
+        "zzz",
+        "-o",
+        "foo.csv.gz",
+        "--threshold-bp=1",
+        linear_gather,
+        prefetch_gather,
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    csv_file = runtmp.output('foo.csv.gz')
+    csv_file = runtmp.output("foo.csv.gz")
 
     with gzip.open(csv_file, "rt", newline="") as fp:
         reader = csv.DictReader(fp)
         row = next(reader)
         print(row)
-        assert float(row['intersect_bp']) == 910
-        assert float(row['unique_intersect_bp']) == 910
-        assert float(row['remaining_bp']) == 0
-        assert float(row['f_orig_query']) == 1.0
-        assert float(row['f_unique_to_query']) == 1.0
-        assert float(row['f_match']) == 1.0
-        assert row['filename'] == 'zzz'
-        assert row['name'] == 'tr1 4'
-        assert row['md5'] == 'c9d5a795eeaaf58e286fb299133e1938'
-        assert row['gather_result_rank'] == '0'
-        assert row['query_filename'].endswith('short2.fa')
-        assert row['query_name'] == 'tr1 4'
-        assert row['query_md5'] == 'c9d5a795'
-        assert row['query_bp'] == '910'
+        assert float(row["intersect_bp"]) == 910
+        assert float(row["unique_intersect_bp"]) == 910
+        assert float(row["remaining_bp"]) == 0
+        assert float(row["f_orig_query"]) == 1.0
+        assert float(row["f_unique_to_query"]) == 1.0
+        assert float(row["f_match"]) == 1.0
+        assert row["filename"] == "zzz"
+        assert row["name"] == "tr1 4"
+        assert row["md5"] == "c9d5a795eeaaf58e286fb299133e1938"
+        assert row["gather_result_rank"] == "0"
+        assert row["query_filename"].endswith("short2.fa")
+        assert row["query_name"] == "tr1 4"
+        assert row["query_md5"] == "c9d5a795"
+        assert row["query_bp"] == "910"
 
 
 def test_gather_abund_x_abund(runtmp, prefetch_gather, linear_gather):
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
-    sig63 = utils.get_test_data('track_abund/63.fa.sig')
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
+    sig63 = utils.get_test_data("track_abund/63.fa.sig")
 
-    runtmp.sourmash('gather', sig47, sig63, linear_gather, prefetch_gather)
+    runtmp.sourmash("gather", sig47, sig63, linear_gather, prefetch_gather)
 
-    assert '2.5 Mbp       49.2%   48.3%       1.0    NC_011663.1' in runtmp.last_result.out
+    assert (
+        "2.5 Mbp       49.2%   48.3%       1.0    NC_011663.1" in runtmp.last_result.out
+    )
 
 
 def test_gather_multiple_sbts(runtmp, prefetch_gather, linear_gather):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch','dna', '-p', 'scaled=10', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=10", testdata1, testdata2)
 
-    runtmp.sourmash('sketch','dna','-p','scaled=10', '-o', 'query.fa.sig', testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=10", "-o", "query.fa.sig", testdata2)
 
-    runtmp.sourmash('index', 'zzz', 'short.fa.sig', '-k', '31')
+    runtmp.sourmash("index", "zzz", "short.fa.sig", "-k", "31")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('index', 'zzz2', 'short2.fa.sig', '-k', '31')
+    runtmp.sourmash("index", "zzz2", "short2.fa.sig", "-k", "31")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('gather', 'query.fa.sig', 'zzz', 'zzz2', '-o', 'foo.csv', '--threshold-bp=1', linear_gather, prefetch_gather)
+    runtmp.sourmash(
+        "gather",
+        "query.fa.sig",
+        "zzz",
+        "zzz2",
+        "-o",
+        "foo.csv",
+        "--threshold-bp=1",
+        linear_gather,
+        prefetch_gather,
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert '0.9 kbp      100.0%  100.0%' in runtmp.last_result.out
+    assert "0.9 kbp      100.0%  100.0%" in runtmp.last_result.out
 
 
 def test_gather_multiple_sbts_save_prefetch(runtmp, linear_gather):
     # test --save-prefetch with multiple databases
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch','dna', '-p', 'scaled=10', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=10", testdata1, testdata2)
 
-    runtmp.sourmash('sketch','dna','-p','scaled=10', '-o', 'query.fa.sig', testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=10", "-o", "query.fa.sig", testdata2)
 
-    runtmp.sourmash('index', 'zzz', 'short.fa.sig', '-k', '31')
+    runtmp.sourmash("index", "zzz", "short.fa.sig", "-k", "31")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('index', 'zzz2', 'short2.fa.sig', '-k', '31')
+    runtmp.sourmash("index", "zzz2", "short2.fa.sig", "-k", "31")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('gather', 'query.fa.sig', 'zzz', 'zzz2', '-o', 'foo.csv', '--save-prefetch', 'out.zip', '--threshold-bp=1', linear_gather)
+    runtmp.sourmash(
+        "gather",
+        "query.fa.sig",
+        "zzz",
+        "zzz2",
+        "-o",
+        "foo.csv",
+        "--save-prefetch",
+        "out.zip",
+        "--threshold-bp=1",
+        linear_gather,
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert '0.9 kbp      100.0%  100.0%' in runtmp.last_result.out
-    assert os.path.exists(runtmp.output('out.zip'))
+    assert "0.9 kbp      100.0%  100.0%" in runtmp.last_result.out
+    assert os.path.exists(runtmp.output("out.zip"))
 
 
 def test_gather_multiple_sbts_save_prefetch_csv(runtmp, linear_gather):
     # test --save-prefetch-csv with multiple databases
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch','dna', '-p', 'scaled=10', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=10", testdata1, testdata2)
 
-    runtmp.sourmash('sketch','dna','-p','scaled=10', '-o', 'query.fa.sig', testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=10", "-o", "query.fa.sig", testdata2)
 
-    runtmp.sourmash('index', 'zzz', 'short.fa.sig', '-k', '31')
+    runtmp.sourmash("index", "zzz", "short.fa.sig", "-k", "31")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('index', 'zzz2', 'short2.fa.sig', '-k', '31')
+    runtmp.sourmash("index", "zzz2", "short2.fa.sig", "-k", "31")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('gather', 'query.fa.sig', 'zzz', 'zzz2', '-o', 'foo.csv', '--save-prefetch-csv', 'prefetch.csv', '--threshold-bp=1', linear_gather)
+    runtmp.sourmash(
+        "gather",
+        "query.fa.sig",
+        "zzz",
+        "zzz2",
+        "-o",
+        "foo.csv",
+        "--save-prefetch-csv",
+        "prefetch.csv",
+        "--threshold-bp=1",
+        linear_gather,
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert '0.9 kbp      100.0%  100.0%' in runtmp.last_result.out
-    assert os.path.exists(runtmp.output('prefetch.csv'))
-    with open(runtmp.output('prefetch.csv')) as f:
+    assert "0.9 kbp      100.0%  100.0%" in runtmp.last_result.out
+    assert os.path.exists(runtmp.output("prefetch.csv"))
+    with open(runtmp.output("prefetch.csv")) as f:
         output = f.read()
         print((output,))
-        assert '870,0.925531914893617,0.9666666666666667' in output
+        assert "870,0.925531914893617,0.9666666666666667" in output
 
 
 def test_gather_multiple_sbts_save_prefetch_csv_gz(runtmp, linear_gather):
     # test --save-prefetch-csv to a .gz file, with multiple databases
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch','dna', '-p', 'scaled=10', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=10", testdata1, testdata2)
 
-    runtmp.sourmash('sketch','dna','-p','scaled=10', '-o', 'query.fa.sig', testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=10", "-o", "query.fa.sig", testdata2)
 
-    runtmp.sourmash('index', 'zzz', 'short.fa.sig', '-k', '31')
+    runtmp.sourmash("index", "zzz", "short.fa.sig", "-k", "31")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('index', 'zzz2', 'short2.fa.sig', '-k', '31')
+    runtmp.sourmash("index", "zzz2", "short2.fa.sig", "-k", "31")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('gather', 'query.fa.sig', 'zzz', 'zzz2', '-o', 'foo.csv', '--save-prefetch-csv', 'prefetch.csv.gz', '--threshold-bp=1', linear_gather)
+    runtmp.sourmash(
+        "gather",
+        "query.fa.sig",
+        "zzz",
+        "zzz2",
+        "-o",
+        "foo.csv",
+        "--save-prefetch-csv",
+        "prefetch.csv.gz",
+        "--threshold-bp=1",
+        linear_gather,
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert '0.9 kbp      100.0%  100.0%' in runtmp.last_result.out
-    assert os.path.exists(runtmp.output('prefetch.csv.gz'))
-    with gzip.open(runtmp.output('prefetch.csv.gz'), 'rt', newline="") as f:
+    assert "0.9 kbp      100.0%  100.0%" in runtmp.last_result.out
+    assert os.path.exists(runtmp.output("prefetch.csv.gz"))
+    with gzip.open(runtmp.output("prefetch.csv.gz"), "rt", newline="") as f:
         output = f.read()
         print((output,))
-        assert '870,0.925531914893617,0.9666666666666667' in output
+        assert "870,0.925531914893617,0.9666666666666667" in output
 
 
 def test_gather_multiple_sbts_save_prefetch_and_prefetch_csv(runtmp, linear_gather):
     # test --save-prefetch-csv with multiple databases
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch','dna', '-p', 'scaled=10', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=10", testdata1, testdata2)
 
-    runtmp.sourmash('sketch','dna','-p','scaled=10', '-o', 'query.fa.sig', testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=10", "-o", "query.fa.sig", testdata2)
 
-    runtmp.sourmash('index', 'zzz', 'short.fa.sig', '-k', '31')
+    runtmp.sourmash("index", "zzz", "short.fa.sig", "-k", "31")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('index', 'zzz2', 'short2.fa.sig', '-k', '31')
+    runtmp.sourmash("index", "zzz2", "short2.fa.sig", "-k", "31")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('gather', 'query.fa.sig', 'zzz', 'zzz2', '-o', 'foo.csv', '--save-prefetch', 'out.zip', '--save-prefetch-csv', 'prefetch.csv', '--threshold-bp=1', linear_gather)
+    runtmp.sourmash(
+        "gather",
+        "query.fa.sig",
+        "zzz",
+        "zzz2",
+        "-o",
+        "foo.csv",
+        "--save-prefetch",
+        "out.zip",
+        "--save-prefetch-csv",
+        "prefetch.csv",
+        "--threshold-bp=1",
+        linear_gather,
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert '0.9 kbp      100.0%  100.0%' in runtmp.last_result.out
-    assert os.path.exists(runtmp.output('prefetch.csv'))
-    with open(runtmp.output('prefetch.csv')) as f:
+    assert "0.9 kbp      100.0%  100.0%" in runtmp.last_result.out
+    assert os.path.exists(runtmp.output("prefetch.csv"))
+    with open(runtmp.output("prefetch.csv")) as f:
         output = f.read()
         print((output,))
-        assert '870,0.925531914893617,0.9666666666666667' in output
-    assert os.path.exists(runtmp.output('out.zip'))
+        assert "870,0.925531914893617,0.9666666666666667" in output
+    assert os.path.exists(runtmp.output("out.zip"))
 
 
 def test_gather_sbt_and_sigs(runtmp, linear_gather, prefetch_gather):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'dna', '-p', 'k=31,scaled=10', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,scaled=10", testdata1, testdata2)
 
-    runtmp.sourmash('sketch','dna','-p','scaled=10', '-o', 'query.fa.sig', testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=10", "-o", "query.fa.sig", testdata2)
 
-    runtmp.sourmash('index', '-k', '31', 'zzz', 'short.fa.sig')
+    runtmp.sourmash("index", "-k", "31", "zzz", "short.fa.sig")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('gather', 'query.fa.sig', 'zzz', 'short2.fa.sig', '-o', 'foo.csv', linear_gather, prefetch_gather, '--threshold-bp=1')
+    runtmp.sourmash(
+        "gather",
+        "query.fa.sig",
+        "zzz",
+        "short2.fa.sig",
+        "-o",
+        "foo.csv",
+        linear_gather,
+        prefetch_gather,
+        "--threshold-bp=1",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert '0.9 kbp      100.0%  100.0%' in runtmp.last_result.out
+    assert "0.9 kbp      100.0%  100.0%" in runtmp.last_result.out
 
 
 def test_gather_file_output(runtmp, linear_gather, prefetch_gather):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'dna', '-p', 'scaled=10', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=10", testdata1, testdata2)
 
-    runtmp.sourmash('sketch','dna','-p','scaled=10', '-o', 'query.fa.sig', testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=10", "-o", "query.fa.sig", testdata2)
 
-    runtmp.sourmash('index', '-k', '31', 'zzz', 'short.fa.sig', 'short2.fa.sig')
+    runtmp.sourmash("index", "-k", "31", "zzz", "short.fa.sig", "short2.fa.sig")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('gather', 'query.fa.sig', 'zzz', '--threshold-bp=500', linear_gather, prefetch_gather, '-o', 'foo.out')
+    runtmp.sourmash(
+        "gather",
+        "query.fa.sig",
+        "zzz",
+        "--threshold-bp=500",
+        linear_gather,
+        prefetch_gather,
+        "-o",
+        "foo.out",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
-    assert '0.9 kbp      100.0%  100.0%' in runtmp.last_result.out
-    with open(runtmp.output('foo.out')) as f:
+    assert "0.9 kbp      100.0%  100.0%" in runtmp.last_result.out
+    with open(runtmp.output("foo.out")) as f:
         output = f.read()
         print((output,))
-        assert '910,1.0,1.0' in output
+        assert "910,1.0,1.0" in output
 
 
 def test_gather_f_match_orig(runtmp, linear_gather, prefetch_gather):
     import copy
 
-    testdata_combined = utils.get_test_data('gather/combined.sig')
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_combined = utils.get_test_data("gather/combined.sig")
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    runtmp.sourmash('gather', testdata_combined, '-o', 'out.csv',
-                    *testdata_sigs, linear_gather, prefetch_gather)
+    runtmp.sourmash(
+        "gather",
+        testdata_combined,
+        "-o",
+        "out.csv",
+        *testdata_sigs,
+        linear_gather,
+        prefetch_gather,
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
@@ -3658,25 +4289,25 @@ def test_gather_f_match_orig(runtmp, linear_gather, prefetch_gather):
     def approx_equal(a, b, n=5):
         return round(a, n) == round(b, n)
 
-    with open(runtmp.output('out.csv'), 'rt') as fp:
+    with open(runtmp.output("out.csv")) as fp:
         r = csv.DictReader(fp)
         for n, row in enumerate(r):
-            print(n, row['f_match'], row['f_match_orig'])
+            print(n, row["f_match"], row["f_match_orig"])
 
             # each match is completely in the original query
-            assert row['f_match_orig'] == "1.0"
+            assert row["f_match_orig"] == "1.0"
 
             # double check -- should match 'search --containment'.
             # (this is kind of useless for a 1.0 contained_by, I guess)
-            filename = row['filename']
+            filename = row["filename"]
             match = sourmash.load_one_signature(filename, ksize=21)
             assert match.contained_by(combined_sig) == 1.0
 
             # check other fields, too.
-            f_orig_query = float(row['f_orig_query'])
-            f_match_orig = float(row['f_match_orig'])
-            f_match = float(row['f_match'])
-            f_unique_to_query = float(row['f_unique_to_query'])
+            f_orig_query = float(row["f_orig_query"])
+            f_match_orig = float(row["f_match_orig"])
+            f_match = float(row["f_match"])
+            f_unique_to_query = float(row["f_unique_to_query"])
 
             # f_orig_query is the containment of the query by the match.
             # (note, this only works because containment is 100% in combined).
@@ -3687,8 +4318,7 @@ def approx_equal(a, b, n=5):
             assert approx_equal(match.contained_by(combined_sig), f_match_orig)
 
             # f_match is how much of the match is in the unallocated hashes
-            assert approx_equal(match.minhash.contained_by(remaining_mh),
-                                f_match)
+            assert approx_equal(match.minhash.contained_by(remaining_mh), f_match)
 
             # f_unique_to_query is how much of the match is unique wrt
             # the original query.
@@ -3704,14 +4334,21 @@ def approx_equal(a, b, n=5):
 
 def test_gather_nomatch(runtmp, linear_gather, prefetch_gather):
     testdata_query = utils.get_test_data(
-        'gather/GCF_000006945.2_ASM694v2_genomic.fna.gz.sig')
-    testdata_match = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-
-    out_csv = runtmp.output('results.csv')
-
-    runtmp.sourmash('gather', testdata_query, testdata_match,
-                    '-o', out_csv,
-                    linear_gather, prefetch_gather)
+        "gather/GCF_000006945.2_ASM694v2_genomic.fna.gz.sig"
+    )
+    testdata_match = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+
+    out_csv = runtmp.output("results.csv")
+
+    runtmp.sourmash(
+        "gather",
+        testdata_query,
+        testdata_match,
+        "-o",
+        out_csv,
+        linear_gather,
+        prefetch_gather,
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
@@ -3722,14 +4359,22 @@ def test_gather_nomatch(runtmp, linear_gather, prefetch_gather):
 
 def test_gather_nomatch_create_empty(runtmp, linear_gather, prefetch_gather):
     testdata_query = utils.get_test_data(
-        'gather/GCF_000006945.2_ASM694v2_genomic.fna.gz.sig')
-    testdata_match = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-
-    out_csv = runtmp.output('results.csv')
-
-    runtmp.sourmash('gather', testdata_query, testdata_match,
-                    '-o', out_csv, '--create-empty-results',
-                    linear_gather, prefetch_gather)
+        "gather/GCF_000006945.2_ASM694v2_genomic.fna.gz.sig"
+    )
+    testdata_match = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+
+    out_csv = runtmp.output("results.csv")
+
+    runtmp.sourmash(
+        "gather",
+        testdata_query,
+        testdata_match,
+        "-o",
+        out_csv,
+        "--create-empty-results",
+        linear_gather,
+        prefetch_gather,
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
@@ -3737,17 +4382,20 @@ def test_gather_nomatch_create_empty(runtmp, linear_gather, prefetch_gather):
     assert "No matches found for --threshold-bp at 50.0 kbp." in runtmp.last_result.err
     assert os.path.exists(out_csv)
 
-    with open(out_csv, 'rt') as fp:
+    with open(out_csv) as fp:
         data = fp.read()
         assert not data
 
 
 def test_gather_abund_nomatch(runtmp, linear_gather, prefetch_gather):
-    testdata_query = utils.get_test_data('gather-abund/reads-s10x10-s11.sig')
-    testdata_match = utils.get_test_data('gather/GCF_000006945.2_ASM694v2_genomic.fna.gz.sig')
+    testdata_query = utils.get_test_data("gather-abund/reads-s10x10-s11.sig")
+    testdata_match = utils.get_test_data(
+        "gather/GCF_000006945.2_ASM694v2_genomic.fna.gz.sig"
+    )
 
-    runtmp.sourmash('gather', testdata_query, testdata_match,
-                    linear_gather, prefetch_gather)
+    runtmp.sourmash(
+        "gather", testdata_query, testdata_match, linear_gather, prefetch_gather
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
@@ -3756,50 +4404,58 @@ def test_gather_abund_nomatch(runtmp, linear_gather, prefetch_gather):
 
 
 def test_gather_metagenome(runtmp):
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all']
+    cmd = ["index", "gcf_all"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
 
     runtmp.sourmash(*cmd)
 
-    assert os.path.exists(runtmp.output('gcf_all.sbt.zip'))
+    assert os.path.exists(runtmp.output("gcf_all.sbt.zip"))
 
-    runtmp.sourmash('gather', query_sig, 'gcf_all', '-k', '21', '--threshold-bp=0')
+    runtmp.sourmash("gather", query_sig, "gcf_all", "-k", "21", "--threshold-bp=0")
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'found 12 matches total' in runtmp.last_result.out
-    assert 'the recovered matches hit 100.0% of the query' in runtmp.last_result.out
-    assert all(('4.9 Mbp       33.2%  100.0%' in runtmp.last_result.out,
-                'NC_003198.1 Salmonella enterica subsp' in runtmp.last_result.out))
-    assert all(('4.7 Mbp        0.5%    1.5%' in runtmp.last_result.out,
-                'NC_011294.1 Salmonella enterica subs' in runtmp.last_result.out))
+    assert "found 12 matches total" in runtmp.last_result.out
+    assert "the recovered matches hit 100.0% of the query" in runtmp.last_result.out
+    assert all(
+        (
+            "4.9 Mbp       33.2%  100.0%" in runtmp.last_result.out,
+            "NC_003198.1 Salmonella enterica subsp" in runtmp.last_result.out,
+        )
+    )
+    assert all(
+        (
+            "4.7 Mbp        0.5%    1.5%" in runtmp.last_result.out,
+            "NC_011294.1 Salmonella enterica subs" in runtmp.last_result.out,
+        )
+    )
 
 
 @utils.in_tempdir
 def test_gather_metagenome_num_results(c):
     # set a threshold on the number of results to be reported by gather
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all']
+    cmd = ["index", "gcf_all"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
 
     c.run_sourmash(*cmd)
 
-    assert os.path.exists(c.output('gcf_all.sbt.zip'))
+    assert os.path.exists(c.output("gcf_all.sbt.zip"))
 
-    cmd = 'gather {} gcf_all -k 21 --num-results 10'.format(query_sig)
-    cmd = cmd.split(' ')
+    cmd = f"gather {query_sig} gcf_all -k 21 --num-results 10"
+    cmd = cmd.split(" ")
     c.run_sourmash(*cmd)
 
     print(c.last_result.out)
@@ -3807,85 +4463,122 @@ def test_gather_metagenome_num_results(c):
 
     out = c.last_result.out
 
-    assert 'found 10 matches total' in out
-    assert '(truncated gather because --num-results=10)' in out
-    assert 'the recovered matches hit 99.4% of the query' in out
-    assert all(('4.9 Mbp       33.2%  100.0%' in out,
-                'NC_003198.1 Salmonella enterica subsp' in out))
-    assert '4.3 Mbp        2.1%    7.3%    NC_006511.1 Salmonella enterica subsp' in out
+    assert "found 10 matches total" in out
+    assert "(truncated gather because --num-results=10)" in out
+    assert "the recovered matches hit 99.4% of the query" in out
+    assert all(
+        (
+            "4.9 Mbp       33.2%  100.0%" in out,
+            "NC_003198.1 Salmonella enterica subsp" in out,
+        )
+    )
+    assert "4.3 Mbp        2.1%    7.3%    NC_006511.1 Salmonella enterica subsp" in out
 
 
 def test_gather_metagenome_threshold_bp(runtmp, linear_gather, prefetch_gather):
     # set a threshold on the gather output
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all']
+    cmd = ["index", "gcf_all"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
 
     runtmp.sourmash(*cmd)
 
-    assert os.path.exists(runtmp.output('gcf_all.sbt.zip'))
+    assert os.path.exists(runtmp.output("gcf_all.sbt.zip"))
 
-    runtmp.sourmash('gather', query_sig, 'gcf_all', '-k',  '21',
-                    '--threshold-bp', '2e6', linear_gather, prefetch_gather)
+    runtmp.sourmash(
+        "gather",
+        query_sig,
+        "gcf_all",
+        "-k",
+        "21",
+        "--threshold-bp",
+        "2e6",
+        linear_gather,
+        prefetch_gather,
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'found 1 matches total' in runtmp.last_result.out
-    assert 'found less than 2.0 Mbp in common. => exiting' in runtmp.last_result.err
-    assert 'the recovered matches hit 33.2% of the query' in runtmp.last_result.out
-    assert all(('4.9 Mbp       33.2%  100.0%' in runtmp.last_result.out,
-                'NC_003198.1 Salmonella enterica subsp' in runtmp.last_result.out))
+    assert "found 1 matches total" in runtmp.last_result.out
+    assert "found less than 2.0 Mbp in common. => exiting" in runtmp.last_result.err
+    assert "the recovered matches hit 33.2% of the query" in runtmp.last_result.out
+    assert all(
+        (
+            "4.9 Mbp       33.2%  100.0%" in runtmp.last_result.out,
+            "NC_003198.1 Salmonella enterica subsp" in runtmp.last_result.out,
+        )
+    )
 
 
 def test_gather_metagenome_threshold_bp_low(runtmp, linear_gather, prefetch_gather):
     # set a threshold on the gather output => too low
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all']
+    cmd = ["index", "gcf_all"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
 
     runtmp.sourmash(*cmd)
 
-    assert os.path.exists(runtmp.output('gcf_all.sbt.zip'))
+    assert os.path.exists(runtmp.output("gcf_all.sbt.zip"))
 
-    runtmp.sourmash('gather', query_sig, 'gcf_all', '-k',  '21',
-                    '--threshold-bp', '1', linear_gather, prefetch_gather)
+    runtmp.sourmash(
+        "gather",
+        query_sig,
+        "gcf_all",
+        "-k",
+        "21",
+        "--threshold-bp",
+        "1",
+        linear_gather,
+        prefetch_gather,
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'found 12 matches total' in runtmp.last_result.out
-    assert 'found less than 1 bp in common. => exiting' in runtmp.last_result.err
-    assert 'the recovered matches hit 100.0% of the query' in runtmp.last_result.out
+    assert "found 12 matches total" in runtmp.last_result.out
+    assert "found less than 1 bp in common. => exiting" in runtmp.last_result.err
+    assert "the recovered matches hit 100.0% of the query" in runtmp.last_result.out
 
 
-def test_gather_metagenome_threshold_bp_too_high(runtmp, linear_gather, prefetch_gather):
+def test_gather_metagenome_threshold_bp_too_high(
+    runtmp, linear_gather, prefetch_gather
+):
     # set a threshold on the gather output => no results
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all']
+    cmd = ["index", "gcf_all"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
 
     runtmp.sourmash(*cmd)
 
-    assert os.path.exists(runtmp.output('gcf_all.sbt.zip'))
+    assert os.path.exists(runtmp.output("gcf_all.sbt.zip"))
 
-    runtmp.sourmash('gather', query_sig, 'gcf_all', '-k',  '21',
-                    '--threshold-bp', '5e6', linear_gather, prefetch_gather)
+    runtmp.sourmash(
+        "gather",
+        query_sig,
+        "gcf_all",
+        "-k",
+        "21",
+        "--threshold-bp",
+        "5e6",
+        linear_gather,
+        prefetch_gather,
+    )
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -3896,46 +4589,67 @@ def test_gather_metagenome_threshold_bp_too_high(runtmp, linear_gather, prefetch
 
 
 def test_multigather_metagenome(runtmp):
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all']
+    cmd = ["index", "gcf_all"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
 
     runtmp.sourmash(*cmd)
 
-    assert os.path.exists(runtmp.output('gcf_all.sbt.zip'))
+    assert os.path.exists(runtmp.output("gcf_all.sbt.zip"))
 
-    runtmp.sourmash('multigather', '--query', query_sig,  '--db', 'gcf_all', '-k', '21', '--threshold-bp=0')
+    runtmp.sourmash(
+        "multigather",
+        "--query",
+        query_sig,
+        "--db",
+        "gcf_all",
+        "-k",
+        "21",
+        "--threshold-bp=0",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'found 12 matches total' in runtmp.last_result.out
-    assert 'the recovered matches hit 100.0% of the query' in runtmp.last_result.out
-    assert all(('4.9 Mbp       33.2%  100.0%' in runtmp.last_result.out,
-                'NC_003198.1 Salmonella enterica subsp' in runtmp.last_result.out))
-    assert all(('4.7 Mbp        0.5%    1.5%' in runtmp.last_result.out,
-                'NC_011294.1 Salmonella enterica subsp' in runtmp.last_result.out))
+    assert "found 12 matches total" in runtmp.last_result.out
+    assert "the recovered matches hit 100.0% of the query" in runtmp.last_result.out
+    assert all(
+        (
+            "4.9 Mbp       33.2%  100.0%" in runtmp.last_result.out,
+            "NC_003198.1 Salmonella enterica subsp" in runtmp.last_result.out,
+        )
+    )
+    assert all(
+        (
+            "4.7 Mbp        0.5%    1.5%" in runtmp.last_result.out,
+            "NC_011294.1 Salmonella enterica subsp" in runtmp.last_result.out,
+        )
+    )
 
 
 def test_multigather_check_scaled_bounds_negative(runtmp):
     c = runtmp
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all']
+    cmd = ["index", "gcf_all"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
     c.run_sourmash(*cmd)
 
-    cmd = 'multigather --query {} --db gcf_all -k 21 --scaled -5 --threshold-bp=0'.format(query_sig)
-    cmd = cmd.split(' ')
+    cmd = (
+        "multigather --query {} --db gcf_all -k 21 --scaled -5 --threshold-bp=0".format(
+            query_sig
+        )
+    )
+    cmd = cmd.split(" ")
     with pytest.raises(SourmashCommandFailed) as exc:
         c.run_sourmash(*cmd)
 
@@ -3944,67 +4658,80 @@ def test_multigather_check_scaled_bounds_negative(runtmp):
 
 def test_multigather_check_scaled_bounds_less_than_minimum(runtmp):
     c = runtmp
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all']
+    cmd = ["index", "gcf_all"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
     c.run_sourmash(*cmd)
 
-    cmd = 'multigather --query {} --db gcf_all -k 21 --scaled 50 --threshold-bp=0'.format(query_sig)
-    cmd = cmd.split(' ')
+    cmd = (
+        "multigather --query {} --db gcf_all -k 21 --scaled 50 --threshold-bp=0".format(
+            query_sig
+        )
+    )
+    cmd = cmd.split(" ")
     # Note: this is the value error that is emitted, but we want the Warning from below to be generated instead. (ValueError: new scaled 50.0 is lower than current sample scaled 10000)
     with pytest.raises(SourmashCommandFailed) as exc:
         c.run_sourmash(*cmd)
 
-    assert "WARNING: scaled value should be >= 100. Continuing anyway." in str(exc.value)
+    assert "WARNING: scaled value should be >= 100. Continuing anyway." in str(
+        exc.value
+    )
 
 
 def test_multigather_check_scaled_bounds_more_than_maximum(runtmp):
     c = runtmp
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all']
+    cmd = ["index", "gcf_all"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
     c.run_sourmash(*cmd)
 
-    cmd = 'multigather --query {} --db gcf_all -k 21 --scaled 1e9 --threshold-bp=0'.format(query_sig)
-    cmd = cmd.split(' ')
+    cmd = "multigather --query {} --db gcf_all -k 21 --scaled 1e9 --threshold-bp=0".format(
+        query_sig
+    )
+    cmd = cmd.split(" ")
 
     c.run_sourmash(*cmd)
 
-    assert "WARNING: scaled value should be <= 1e6. Continuing anyway." in c.last_result.err
+    assert (
+        "WARNING: scaled value should be <= 1e6. Continuing anyway."
+        in c.last_result.err
+    )
 
 
 def test_multigather_metagenome_query_from_file(runtmp):
     # test multigather --query-from-file
     c = runtmp
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all']
+    cmd = ["index", "gcf_all"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
     c.run_sourmash(*cmd)
 
-    assert os.path.exists(c.output('gcf_all.sbt.zip'))
+    assert os.path.exists(c.output("gcf_all.sbt.zip"))
 
     # make list w/query sig
-    query_list = c.output('query.list')
-    with open(query_list, 'wt') as fp:
+    query_list = c.output("query.list")
+    with open(query_list, "w") as fp:
         print(query_sig, file=fp)
 
-    cmd = 'multigather --query-from-file {} --db gcf_all -k 21 --threshold-bp=0'.format(query_list)
-    cmd = cmd.split(' ')
+    cmd = "multigather --query-from-file {} --db gcf_all -k 21 --threshold-bp=0".format(
+        query_list
+    )
+    cmd = cmd.split(" ")
     c.run_sourmash(*cmd)
 
     out = c.last_result.out
@@ -4012,36 +4739,44 @@ def test_multigather_metagenome_query_from_file(runtmp):
     err = c.last_result.err
     print(err)
 
-    assert 'found 12 matches total' in out
-    assert 'the recovered matches hit 100.0% of the query' in out
-    assert all(('4.9 Mbp       33.2%  100.0%' in out,
-                'NC_003198.1 Salmonella enterica subsp' in out))
-    assert all(('4.7 Mbp        0.5%    1.5%' in out,
-                'NC_011294.1 Salmonella enterica subsp' in out))
+    assert "found 12 matches total" in out
+    assert "the recovered matches hit 100.0% of the query" in out
+    assert all(
+        (
+            "4.9 Mbp       33.2%  100.0%" in out,
+            "NC_003198.1 Salmonella enterica subsp" in out,
+        )
+    )
+    assert all(
+        (
+            "4.7 Mbp        0.5%    1.5%" in out,
+            "NC_011294.1 Salmonella enterica subsp" in out,
+        )
+    )
 
 
 def test_multigather_metagenome_output(runtmp):
     # test multigather CSV output has more than one output line
     c = runtmp
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all']
+    cmd = ["index", "gcf_all"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
     c.run_sourmash(*cmd)
 
-    assert os.path.exists(c.output('gcf_all.sbt.zip'))
+    assert os.path.exists(c.output("gcf_all.sbt.zip"))
 
-    cmd = f'multigather --query {query_sig} --db gcf_all -k 21 --threshold-bp=0'
-    cmd = cmd.split(' ')
+    cmd = f"multigather --query {query_sig} --db gcf_all -k 21 --threshold-bp=0"
+    cmd = cmd.split(" ")
     c.run_sourmash(*cmd)
 
-    output_csv = runtmp.output('-.csv')
+    output_csv = runtmp.output("-.csv")
     assert os.path.exists(output_csv)
-    with open(output_csv, newline='') as fp:
+    with open(output_csv, newline="") as fp:
         x = fp.readlines()
         assert len(x) == 13
 
@@ -4049,50 +4784,49 @@ def test_multigather_metagenome_output(runtmp):
 def test_multigather_metagenome_output_outdir(runtmp):
     # test multigather CSV output to different location
     c = runtmp
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all']
+    cmd = ["index", "gcf_all"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
     c.run_sourmash(*cmd)
 
-    assert os.path.exists(c.output('gcf_all.sbt.zip'))
+    assert os.path.exists(c.output("gcf_all.sbt.zip"))
 
     # create output directory
-    outdir = runtmp.output('savehere')
+    outdir = runtmp.output("savehere")
     os.mkdir(outdir)
 
-    cmd = f'multigather --query {query_sig} --db gcf_all -k 21 --threshold-bp=0 --output-dir {outdir}'
-    cmd = cmd.split(' ')
+    cmd = f"multigather --query {query_sig} --db gcf_all -k 21 --threshold-bp=0 --output-dir {outdir}"
+    cmd = cmd.split(" ")
     c.run_sourmash(*cmd)
 
-    output_csv = runtmp.output('savehere/-.csv')
+    output_csv = runtmp.output("savehere/-.csv")
     assert os.path.exists(output_csv)
-    with open(output_csv, newline='') as fp:
+    with open(output_csv, newline="") as fp:
         x = fp.readlines()
         assert len(x) == 13
 
 
 @utils.in_tempdir
 def test_multigather_metagenome_query_with_sbt(c):
-
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all.sbt.zip']
+    cmd = ["index", "gcf_all.sbt.zip"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
     c.run_sourmash(*cmd)
 
-    assert os.path.exists(c.output('gcf_all.sbt.zip'))
+    assert os.path.exists(c.output("gcf_all.sbt.zip"))
 
-    cmd = 'multigather --query gcf_all.sbt.zip --db gcf_all.sbt.zip -k 21 --threshold-bp=0'
-    cmd = cmd.split(' ')
+    cmd = "multigather --query gcf_all.sbt.zip --db gcf_all.sbt.zip -k 21 --threshold-bp=0"
+    cmd = cmd.split(" ")
     c.run_sourmash(*cmd)
 
     out = c.last_result.out
@@ -4100,35 +4834,50 @@ def test_multigather_metagenome_query_with_sbt(c):
     err = c.last_result.err
     print(err)
 
-    assert 'conducted gather searches on 12 signatures' in err
-    assert 'the recovered matches hit 100.0% of the query' in out
-    assert all(('4.7 Mbp      100.0%  100.0%'  in out,
-                'NC_011080.1 Salmonella enterica subsp' in out))
-    assert all(('4.5 Mbp      100.0%  100.0%' in out,
-                'NC_004631.1 Salmonella enterica subsp' in out))
-    assert all (('1.6 Mbp      100.0%  100.0%' in out,
-                 'NC_002163.1 Campylobacter jejuni subs' in out))
-    assert all(('1.9 Mbp      100.0%  100.0%' in out,
-                'NC_000853.1 Thermotoga maritima MSB8 ' in out))
+    assert "conducted gather searches on 12 signatures" in err
+    assert "the recovered matches hit 100.0% of the query" in out
+    assert all(
+        (
+            "4.7 Mbp      100.0%  100.0%" in out,
+            "NC_011080.1 Salmonella enterica subsp" in out,
+        )
+    )
+    assert all(
+        (
+            "4.5 Mbp      100.0%  100.0%" in out,
+            "NC_004631.1 Salmonella enterica subsp" in out,
+        )
+    )
+    assert all(
+        (
+            "1.6 Mbp      100.0%  100.0%" in out,
+            "NC_002163.1 Campylobacter jejuni subs" in out,
+        )
+    )
+    assert all(
+        (
+            "1.9 Mbp      100.0%  100.0%" in out,
+            "NC_000853.1 Thermotoga maritima MSB8 " in out,
+        )
+    )
 
 
 @utils.in_tempdir
 def test_multigather_metagenome_query_with_lca(c):
-
-    testdata_glob = utils.get_test_data('47*.fa.sig')
+    testdata_glob = utils.get_test_data("47*.fa.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    lca_db = utils.get_test_data('lca/47+63.lca.json')
+    lca_db = utils.get_test_data("lca/47+63.lca.json")
 
-    cmd = ['index', '47+63.sbt.zip']
+    cmd = ["index", "47+63.sbt.zip"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '31'])
+    cmd.extend(["-k", "31"])
     c.run_sourmash(*cmd)
 
-    assert os.path.exists(c.output('47+63.sbt.zip'))
+    assert os.path.exists(c.output("47+63.sbt.zip"))
 
-    cmd = 'multigather --query {} --db 47+63.sbt.zip -k 31 --threshold-bp=0'.format(lca_db)
-    cmd = cmd.split(' ')
+    cmd = f"multigather --query {lca_db} --db 47+63.sbt.zip -k 31 --threshold-bp=0"
+    cmd = cmd.split(" ")
     c.run_sourmash(*cmd)
 
     out = c.last_result.out
@@ -4136,21 +4885,22 @@ def test_multigather_metagenome_query_with_lca(c):
     err = c.last_result.err
     print(err)
 
-    assert 'conducted gather searches on 2 signatures' in err
-    assert 'the recovered matches hit 100.0% of the query' in out
-#    assert '5.1 Mbp      100.0%   64.9%    491c0a81'  in out
-    assert '5.5 Mbp      100.0%   69.4%    491c0a81'  in out
+    assert "conducted gather searches on 2 signatures" in err
+    assert "the recovered matches hit 100.0% of the query" in out
+    #    assert '5.1 Mbp      100.0%   64.9%    491c0a81'  in out
+    assert "5.5 Mbp      100.0%   69.4%    491c0a81" in out
 
 
 @utils.in_tempdir
 def test_multigather_metagenome_query_on_lca_db(c):
-
-    testdata_sig1 = utils.get_test_data('47.fa.sig')
-    testdata_sig2 = utils.get_test_data('63.fa.sig')
-    lca_db = utils.get_test_data('lca/47+63.lca.json')
-
-    cmd = 'multigather --query {} {} --db {} -k 31 --threshold-bp=0'.format(testdata_sig1, testdata_sig2, lca_db)
-    cmd = cmd.split(' ')
+    testdata_sig1 = utils.get_test_data("47.fa.sig")
+    testdata_sig2 = utils.get_test_data("63.fa.sig")
+    lca_db = utils.get_test_data("lca/47+63.lca.json")
+
+    cmd = "multigather --query {} {} --db {} -k 31 --threshold-bp=0".format(
+        testdata_sig1, testdata_sig2, lca_db
+    )
+    cmd = cmd.split(" ")
     c.run_sourmash(*cmd)
 
     out = c.last_result.out
@@ -4158,33 +4908,44 @@ def test_multigather_metagenome_query_on_lca_db(c):
     err = c.last_result.err
     print(err)
 
-    assert 'conducted gather searches on 2 signatures' in err
-    assert 'the recovered matches hit 100.0% of the query' in out
-    assert all(('5.1 Mbp      100.0%  100.0%'  in out,
-                'NC_009665.1 Shewanella baltica OS185,' in out))
-    assert all(('5.5 Mbp      100.0%  100.0%' in out,
-                'NC_011663.1 Shewanella baltica OS223,' in out))
+    assert "conducted gather searches on 2 signatures" in err
+    assert "the recovered matches hit 100.0% of the query" in out
+    assert all(
+        (
+            "5.1 Mbp      100.0%  100.0%" in out,
+            "NC_009665.1 Shewanella baltica OS185," in out,
+        )
+    )
+    assert all(
+        (
+            "5.5 Mbp      100.0%  100.0%" in out,
+            "NC_011663.1 Shewanella baltica OS223," in out,
+        )
+    )
 
 
 @utils.in_tempdir
 def test_multigather_metagenome_query_with_sbt_addl_query(c):
-
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all.sbt.zip']
+    cmd = ["index", "gcf_all.sbt.zip"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
     c.run_sourmash(*cmd)
 
-    assert os.path.exists(c.output('gcf_all.sbt.zip'))
+    assert os.path.exists(c.output("gcf_all.sbt.zip"))
 
-    another_query = utils.get_test_data('gather/GCF_000195995.1_ASM19599v1_genomic.fna.gz.sig')
+    another_query = utils.get_test_data(
+        "gather/GCF_000195995.1_ASM19599v1_genomic.fna.gz.sig"
+    )
 
-    cmd = 'multigather --query {} gcf_all.sbt.zip --db gcf_all.sbt.zip -k 21 --threshold-bp=0'.format(another_query)
-    cmd = cmd.split(' ')
+    cmd = "multigather --query {} gcf_all.sbt.zip --db gcf_all.sbt.zip -k 21 --threshold-bp=0".format(
+        another_query
+    )
+    cmd = cmd.split(" ")
     c.run_sourmash(*cmd)
 
     out = c.last_result.out
@@ -4192,47 +4953,70 @@ def test_multigather_metagenome_query_with_sbt_addl_query(c):
     err = c.last_result.err
     print(err)
 
-    assert 'conducted gather searches on 13 signatures' in err
-    assert 'the recovered matches hit 100.0% of the query' in out
-    #check for matches to some of the sbt signatures
-    assert all(('4.7 Mbp      100.0%  100.0%'  in out,
-                'NC_011080.1 Salmonella enterica subsp' in out))
-    assert all(('4.5 Mbp      100.0%  100.0%' in out,
-                'NC_004631.1 Salmonella enterica subsp' in out))
-    assert all (('1.6 Mbp      100.0%  100.0%' in out,
-                 'NC_002163.1 Campylobacter jejuni subs' in out))
-    assert all(('1.9 Mbp      100.0%  100.0%' in out,
-                'NC_000853.1 Thermotoga maritima MSB8 ' in out))
-
-    #check additional query sig
-    assert all(('4.9 Mbp      100.0%  100.0%' in out,
-                'NC_003198.1 Salmonella enterica subsp' in out))
+    assert "conducted gather searches on 13 signatures" in err
+    assert "the recovered matches hit 100.0% of the query" in out
+    # check for matches to some of the sbt signatures
+    assert all(
+        (
+            "4.7 Mbp      100.0%  100.0%" in out,
+            "NC_011080.1 Salmonella enterica subsp" in out,
+        )
+    )
+    assert all(
+        (
+            "4.5 Mbp      100.0%  100.0%" in out,
+            "NC_004631.1 Salmonella enterica subsp" in out,
+        )
+    )
+    assert all(
+        (
+            "1.6 Mbp      100.0%  100.0%" in out,
+            "NC_002163.1 Campylobacter jejuni subs" in out,
+        )
+    )
+    assert all(
+        (
+            "1.9 Mbp      100.0%  100.0%" in out,
+            "NC_000853.1 Thermotoga maritima MSB8 " in out,
+        )
+    )
+
+    # check additional query sig
+    assert all(
+        (
+            "4.9 Mbp      100.0%  100.0%" in out,
+            "NC_003198.1 Salmonella enterica subsp" in out,
+        )
+    )
 
 
 @utils.in_tempdir
 def test_multigather_metagenome_sbt_query_from_file_with_addl_query(c):
-
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all.sbt.zip']
+    cmd = ["index", "gcf_all.sbt.zip"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
     c.run_sourmash(*cmd)
 
-    assert os.path.exists(c.output('gcf_all.sbt.zip'))
+    assert os.path.exists(c.output("gcf_all.sbt.zip"))
 
     # make list w/query sbt
-    query_list = c.output('query.list')
-    with open(query_list, 'wt') as fp:
-        print('gcf_all.sbt.zip', file=fp)
-
-    another_query = utils.get_test_data('gather/GCF_000195995.1_ASM19599v1_genomic.fna.gz.sig')
-
-    cmd = 'multigather --query {} --query-from-file {} --db gcf_all.sbt.zip -k 21 --threshold-bp=0'.format(another_query, query_list)
-    cmd = cmd.split(' ')
+    query_list = c.output("query.list")
+    with open(query_list, "w") as fp:
+        print("gcf_all.sbt.zip", file=fp)
+
+    another_query = utils.get_test_data(
+        "gather/GCF_000195995.1_ASM19599v1_genomic.fna.gz.sig"
+    )
+
+    cmd = "multigather --query {} --query-from-file {} --db gcf_all.sbt.zip -k 21 --threshold-bp=0".format(
+        another_query, query_list
+    )
+    cmd = cmd.split(" ")
     c.run_sourmash(*cmd)
 
     out = c.last_result.out
@@ -4240,43 +5024,62 @@ def test_multigather_metagenome_sbt_query_from_file_with_addl_query(c):
     err = c.last_result.err
     print(err)
 
-    assert 'conducted gather searches on 13 signatures' in err
-    assert 'the recovered matches hit 100.0% of the query' in out
-    #check for matches to some of the sbt signatures
-    assert all(('4.7 Mbp      100.0%  100.0%'  in out,
-                'NC_011080.1 Salmonella enterica subsp' in out))
-    assert all(('4.5 Mbp      100.0%  100.0%' in out,
-                'NC_004631.1 Salmonella enterica subsp' in out))
-    assert all (('1.6 Mbp      100.0%  100.0%' in out,
-                 'NC_002163.1 Campylobacter jejuni subs' in out))
-    assert all(('1.9 Mbp      100.0%  100.0%' in out,
-                'NC_000853.1 Thermotoga maritima MSB8 ' in out))
-
-    #check additional query sig
-    assert all(('4.9 Mbp      100.0%  100.0%' in out,
-                'NC_003198.1 Salmonella enterica subsp' in out))
+    assert "conducted gather searches on 13 signatures" in err
+    assert "the recovered matches hit 100.0% of the query" in out
+    # check for matches to some of the sbt signatures
+    assert all(
+        (
+            "4.7 Mbp      100.0%  100.0%" in out,
+            "NC_011080.1 Salmonella enterica subsp" in out,
+        )
+    )
+    assert all(
+        (
+            "4.5 Mbp      100.0%  100.0%" in out,
+            "NC_004631.1 Salmonella enterica subsp" in out,
+        )
+    )
+    assert all(
+        (
+            "1.6 Mbp      100.0%  100.0%" in out,
+            "NC_002163.1 Campylobacter jejuni subs" in out,
+        )
+    )
+    assert all(
+        (
+            "1.9 Mbp      100.0%  100.0%" in out,
+            "NC_000853.1 Thermotoga maritima MSB8 " in out,
+        )
+    )
+
+    # check additional query sig
+    assert all(
+        (
+            "4.9 Mbp      100.0%  100.0%" in out,
+            "NC_003198.1 Salmonella enterica subsp" in out,
+        )
+    )
 
 
 @utils.in_tempdir
 def test_multigather_metagenome_sbt_query_from_file_incorrect(c):
-
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all.sbt.zip']
+    cmd = ["index", "gcf_all.sbt.zip"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
     c.run_sourmash(*cmd)
 
-    assert os.path.exists(c.output('gcf_all.sbt.zip'))
+    assert os.path.exists(c.output("gcf_all.sbt.zip"))
 
     # incorrectly query with sbt using `--query-from-file`
-    cmd = 'multigather --query-from-file gcf_all.sbt.zip --db gcf_all.sbt.zip -k 21 --threshold-bp=0'
-    cmd = cmd.split(' ')
+    cmd = "multigather --query-from-file gcf_all.sbt.zip --db gcf_all.sbt.zip -k 21 --threshold-bp=0"
+    cmd = cmd.split(" ")
 
-    with pytest.raises(SourmashCommandFailed) as e:
+    with pytest.raises(SourmashCommandFailed):
         c.run_sourmash(*cmd)
 
     print(c.last_result.out)
@@ -4285,25 +5088,27 @@ def test_multigather_metagenome_sbt_query_from_file_incorrect(c):
 
 @utils.in_tempdir
 def test_multigather_metagenome_lca_query_from_file(c):
-    testdata_glob = utils.get_test_data('47*.fa.sig')
+    testdata_glob = utils.get_test_data("47*.fa.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    lca_db = utils.get_test_data('lca/47+63.lca.json')
+    lca_db = utils.get_test_data("lca/47+63.lca.json")
 
-    cmd = ['index', '47+63.sbt.zip']
+    cmd = ["index", "47+63.sbt.zip"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '31'])
+    cmd.extend(["-k", "31"])
     c.run_sourmash(*cmd)
 
-    assert os.path.exists(c.output('47+63.sbt.zip'))
+    assert os.path.exists(c.output("47+63.sbt.zip"))
 
     # make list w/query sig
-    query_list = c.output('query.list')
-    with open(query_list, 'wt') as fp:
+    query_list = c.output("query.list")
+    with open(query_list, "w") as fp:
         print(lca_db, file=fp)
 
-    cmd = 'multigather --query-from-file {} --db 47+63.sbt.zip -k 31 --threshold-bp=0'.format(query_list)
-    cmd = cmd.split(' ')
+    cmd = "multigather --query-from-file {} --db 47+63.sbt.zip -k 31 --threshold-bp=0".format(
+        query_list
+    )
+    cmd = cmd.split(" ")
     c.run_sourmash(*cmd)
 
     out = c.last_result.out
@@ -4311,36 +5116,40 @@ def test_multigather_metagenome_lca_query_from_file(c):
     err = c.last_result.err
     print(err)
 
-    assert 'conducted gather searches on 2 signatures' in err
-    assert 'the recovered matches hit 100.0% of the query' in out
-#    assert '5.1 Mbp      100.0%   64.9%    491c0a81'  in out
-    assert '5.5 Mbp      100.0%   69.4%    491c0a81'  in out
+    assert "conducted gather searches on 2 signatures" in err
+    assert "the recovered matches hit 100.0% of the query" in out
+    #    assert '5.1 Mbp      100.0%   64.9%    491c0a81'  in out
+    assert "5.5 Mbp      100.0%   69.4%    491c0a81" in out
 
 
 @utils.in_tempdir
 def test_multigather_metagenome_query_from_file_with_addl_query(c):
     # test multigather --query-from-file and --query too
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all']
+    cmd = ["index", "gcf_all"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
     c.run_sourmash(*cmd)
 
-    assert os.path.exists(c.output('gcf_all.sbt.zip'))
+    assert os.path.exists(c.output("gcf_all.sbt.zip"))
 
     # make list w/query sig
-    query_list = c.output('query.list')
-    with open(query_list, 'wt') as fp:
+    query_list = c.output("query.list")
+    with open(query_list, "w") as fp:
         print(query_sig, file=fp)
 
-    another_query = utils.get_test_data('gather/GCF_000195995.1_ASM19599v1_genomic.fna.gz.sig')
+    another_query = utils.get_test_data(
+        "gather/GCF_000195995.1_ASM19599v1_genomic.fna.gz.sig"
+    )
 
-    cmd = 'multigather --query-from-file {} --query {} --db gcf_all -k 21 --threshold-bp=0'.format(query_list, another_query)
-    cmd = cmd.split(' ')
+    cmd = "multigather --query-from-file {} --query {} --db gcf_all -k 21 --threshold-bp=0".format(
+        query_list, another_query
+    )
+    cmd = cmd.split(" ")
     c.run_sourmash(*cmd)
 
     out = c.last_result.out
@@ -4349,42 +5158,67 @@ def test_multigather_metagenome_query_from_file_with_addl_query(c):
     print(err)
 
     # first gather query
-    assert 'found 12 matches total' in out
-    assert 'the recovered matches hit 100.0% of the query' in out
-    assert all(('4.9 Mbp       33.2%  100.0%' in out,
-                'NC_003198.1 Salmonella enterica subsp' in out))
-    assert all(('4.7 Mbp        0.5%    1.5%' in out,
-                'NC_011294.1 Salmonella enterica subsp' in out))
+    assert "found 12 matches total" in out
+    assert "the recovered matches hit 100.0% of the query" in out
+    assert all(
+        (
+            "4.9 Mbp       33.2%  100.0%" in out,
+            "NC_003198.1 Salmonella enterica subsp" in out,
+        )
+    )
+    assert all(
+        (
+            "4.7 Mbp        0.5%    1.5%" in out,
+            "NC_011294.1 Salmonella enterica subsp" in out,
+        )
+    )
 
     # second gather query
-    assert '4.9 Mbp      100.0%  100.0%    NC_003198.1 Salmonella enterica subsp' in out
-    assert 'found 1 matches total;' in out
-    assert 'the recovered matches hit 100.0% of the query' in out
+    assert "4.9 Mbp      100.0%  100.0%    NC_003198.1 Salmonella enterica subsp" in out
+    assert "found 1 matches total;" in out
+    assert "the recovered matches hit 100.0% of the query" in out
 
 
 def test_gather_metagenome_traverse(runtmp, linear_gather, prefetch_gather):
     # set up a directory $location/gather that contains
     # everything in the 'tests/test-data/gather' directory
     # *except* the query sequence, which is 'combined.sig'.
-    testdata_dir = utils.get_test_data('gather')
-    copy_testdata = runtmp.output('somesigs')
+    testdata_dir = utils.get_test_data("gather")
+    copy_testdata = runtmp.output("somesigs")
     shutil.copytree(testdata_dir, copy_testdata)
-    os.unlink(os.path.join(copy_testdata, 'combined.sig'))
+    os.unlink(os.path.join(copy_testdata, "combined.sig"))
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
     # now, feed in the new directory --
-    runtmp.sourmash('gather', query_sig, copy_testdata, '-k', '21', '--threshold-bp=0', linear_gather, prefetch_gather)
+    runtmp.sourmash(
+        "gather",
+        query_sig,
+        copy_testdata,
+        "-k",
+        "21",
+        "--threshold-bp=0",
+        linear_gather,
+        prefetch_gather,
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'found 12 matches total' in runtmp.last_result.out
-    assert 'the recovered matches hit 100.0% of the query' in runtmp.last_result.out
-    assert all(('4.9 Mbp       33.2%  100.0%' in runtmp.last_result.out,
-                'NC_003198.1 Salmonella enterica subsp' in runtmp.last_result.out))
-    assert all(('4.7 Mbp        0.5%    1.5%' in runtmp.last_result.out,
-                'NC_011294.1 Salmonella enterica subsp' in runtmp.last_result.out))
+    assert "found 12 matches total" in runtmp.last_result.out
+    assert "the recovered matches hit 100.0% of the query" in runtmp.last_result.out
+    assert all(
+        (
+            "4.9 Mbp       33.2%  100.0%" in runtmp.last_result.out,
+            "NC_003198.1 Salmonella enterica subsp" in runtmp.last_result.out,
+        )
+    )
+    assert all(
+        (
+            "4.7 Mbp        0.5%    1.5%" in runtmp.last_result.out,
+            "NC_011294.1 Salmonella enterica subsp" in runtmp.last_result.out,
+        )
+    )
 
 
 def test_gather_metagenome_traverse_check_csv(runtmp, linear_gather, prefetch_gather):
@@ -4394,300 +5228,483 @@ def test_gather_metagenome_traverse_check_csv(runtmp, linear_gather, prefetch_ga
     # set up a directory $location/gather that contains
     # everything in the 'tests/test-data/gather' directory
     # *except* the query sequence, which is 'combined.sig'.
-    testdata_dir = utils.get_test_data('gather')
-    copy_testdata = runtmp.output('somesigs')
+    testdata_dir = utils.get_test_data("gather")
+    copy_testdata = runtmp.output("somesigs")
     shutil.copytree(testdata_dir, copy_testdata)
-    os.unlink(os.path.join(copy_testdata, 'combined.sig'))
+    os.unlink(os.path.join(copy_testdata, "combined.sig"))
 
-    query_sig = utils.get_test_data('gather/combined.sig')
-    out_csv = runtmp.output('out.csv')
+    query_sig = utils.get_test_data("gather/combined.sig")
+    out_csv = runtmp.output("out.csv")
 
     # now, feed in the new directory --
-    runtmp.sourmash('gather', query_sig, copy_testdata, '-k', '21', '--threshold-bp=0', '-o', out_csv, linear_gather, prefetch_gather)
+    runtmp.sourmash(
+        "gather",
+        query_sig,
+        copy_testdata,
+        "-k",
+        "21",
+        "--threshold-bp=0",
+        "-o",
+        out_csv,
+        linear_gather,
+        prefetch_gather,
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    with open(out_csv, 'rt') as fp:
+    with open(out_csv) as fp:
         prefix_len = len(copy_testdata)
         r = csv.DictReader(fp)
         for row in r:
-            filename = row['filename']
+            filename = row["filename"]
             assert filename.startswith(copy_testdata), filename
             # should have full path to file sig was loaded from
             assert len(filename) > prefix_len
 
-    assert 'found 12 matches total' in runtmp.last_result.out
-    assert 'the recovered matches hit 100.0% of the query' in runtmp.last_result.out
-    assert all(('4.9 Mbp       33.2%  100.0%' in runtmp.last_result.out,
-                'NC_003198.1 Salmonella enterica subsp' in runtmp.last_result.out))
-    assert all(('4.7 Mbp        0.5%    1.5%' in runtmp.last_result.out,
-                'NC_011294.1 Salmonella enterica subsp' in runtmp.last_result.out))
+    assert "found 12 matches total" in runtmp.last_result.out
+    assert "the recovered matches hit 100.0% of the query" in runtmp.last_result.out
+    assert all(
+        (
+            "4.9 Mbp       33.2%  100.0%" in runtmp.last_result.out,
+            "NC_003198.1 Salmonella enterica subsp" in runtmp.last_result.out,
+        )
+    )
+    assert all(
+        (
+            "4.7 Mbp        0.5%    1.5%" in runtmp.last_result.out,
+            "NC_011294.1 Salmonella enterica subsp" in runtmp.last_result.out,
+        )
+    )
 
 
 @utils.in_tempdir
 def test_gather_traverse_incompatible(c):
-    searchdir = c.output('searchme')
+    searchdir = c.output("searchme")
     os.mkdir(searchdir)
 
-    num_sig = utils.get_test_data('num/47.fa.sig')
-    scaled_sig = utils.get_test_data('47.fa.sig')
-    shutil.copyfile(num_sig, c.output('searchme/num.sig'))
-    shutil.copyfile(scaled_sig, c.output('searchme/scaled.sig'))
+    num_sig = utils.get_test_data("num/47.fa.sig")
+    scaled_sig = utils.get_test_data("47.fa.sig")
+    shutil.copyfile(num_sig, c.output("searchme/num.sig"))
+    shutil.copyfile(scaled_sig, c.output("searchme/scaled.sig"))
 
-    c.run_sourmash("gather", scaled_sig, c.output('searchme'))
+    c.run_sourmash("gather", scaled_sig, c.output("searchme"))
     print(c.last_result.out)
     print(c.last_result.err)
-    assert "5.2 Mbp      100.0%  100.0%    NC_009665.1 Shewanella baltica OS185," in c.last_result.out
+    assert (
+        "5.2 Mbp      100.0%  100.0%    NC_009665.1 Shewanella baltica OS185,"
+        in c.last_result.out
+    )
 
 
 def test_gather_metagenome_output_unassigned(runtmp):
-    testdata_glob = utils.get_test_data('gather/GCF_000195995*g')
+    testdata_glob = utils.get_test_data("gather/GCF_000195995*g")
     testdata_sigs = glob.glob(testdata_glob)[0]
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    runtmp.sourmash('gather', query_sig, testdata_sigs, '-k', '21', '--output-unassigned=unassigned.sig')
+    runtmp.sourmash(
+        "gather",
+        query_sig,
+        testdata_sigs,
+        "-k",
+        "21",
+        "--output-unassigned=unassigned.sig",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'found 1 matches total' in runtmp.last_result.out
-    assert 'the recovered matches hit 33.2% of the query' in runtmp.last_result.out
-    assert all(('4.9 Mbp       33.2%  100.0%' in runtmp.last_result.out,
-                'NC_003198.1 Salmonella enterica subsp' in runtmp.last_result.out))
+    assert "found 1 matches total" in runtmp.last_result.out
+    assert "the recovered matches hit 33.2% of the query" in runtmp.last_result.out
+    assert all(
+        (
+            "4.9 Mbp       33.2%  100.0%" in runtmp.last_result.out,
+            "NC_003198.1 Salmonella enterica subsp" in runtmp.last_result.out,
+        )
+    )
 
     # now examine unassigned
-    testdata2_glob = utils.get_test_data('gather/GCF_000009505.1*.sig')
+    testdata2_glob = utils.get_test_data("gather/GCF_000009505.1*.sig")
     testdata2_sigs = glob.glob(testdata2_glob)[0]
 
-    runtmp.sourmash('gather', 'unassigned.sig', testdata_sigs, testdata2_sigs, '-k', '21')
+    runtmp.sourmash(
+        "gather", "unassigned.sig", testdata_sigs, testdata2_sigs, "-k", "21"
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
-    assert all(('1.3 Mbp       13.6%   28.2%' in runtmp.last_result.out,
-                'NC_011294.1' in runtmp.last_result.out))
+    assert all(
+        (
+            "1.3 Mbp       13.6%   28.2%" in runtmp.last_result.out,
+            "NC_011294.1" in runtmp.last_result.out,
+        )
+    )
 
 
 def test_gather_metagenome_output_unassigned_as_zip(runtmp):
-    testdata_glob = utils.get_test_data('gather/GCF_000195995*g')
+    testdata_glob = utils.get_test_data("gather/GCF_000195995*g")
     testdata_sigs = glob.glob(testdata_glob)[0]
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    runtmp.sourmash('gather', query_sig, testdata_sigs, '-k', '21', '--output-unassigned=unassigned.sig.zip')
+    runtmp.sourmash(
+        "gather",
+        query_sig,
+        testdata_sigs,
+        "-k",
+        "21",
+        "--output-unassigned=unassigned.sig.zip",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'found 1 matches total' in runtmp.last_result.out
-    assert 'the recovered matches hit 33.2% of the query' in runtmp.last_result.out
-    assert all(('4.9 Mbp       33.2%  100.0%' in runtmp.last_result.out,
-                'NC_003198.1 Salmonella enterica subsp' in runtmp.last_result.out))
+    assert "found 1 matches total" in runtmp.last_result.out
+    assert "the recovered matches hit 33.2% of the query" in runtmp.last_result.out
+    assert all(
+        (
+            "4.9 Mbp       33.2%  100.0%" in runtmp.last_result.out,
+            "NC_003198.1 Salmonella enterica subsp" in runtmp.last_result.out,
+        )
+    )
 
-    assert zipfile.is_zipfile(runtmp.output('unassigned.sig.zip'))
+    assert zipfile.is_zipfile(runtmp.output("unassigned.sig.zip"))
 
     # now examine unassigned
-    testdata2_glob = utils.get_test_data('gather/GCF_000009505.1*.sig')
+    testdata2_glob = utils.get_test_data("gather/GCF_000009505.1*.sig")
     testdata2_sigs = glob.glob(testdata2_glob)[0]
 
-    runtmp.sourmash('gather', 'unassigned.sig.zip', testdata_sigs, testdata2_sigs, '-k', '21')
+    runtmp.sourmash(
+        "gather", "unassigned.sig.zip", testdata_sigs, testdata2_sigs, "-k", "21"
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
-    assert all(('1.3 Mbp       13.6%   28.2%' in runtmp.last_result.out,
-                'NC_011294.1' in runtmp.last_result.out))
+    assert all(
+        (
+            "1.3 Mbp       13.6%   28.2%" in runtmp.last_result.out,
+            "NC_011294.1" in runtmp.last_result.out,
+        )
+    )
 
 
 def test_gather_metagenome_output_unassigned_none(runtmp):
     # test what happens when there's nothing unassigned to output
-    testdata_glob = utils.get_test_data('gather/GCF_*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF_*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    runtmp.sourmash('gather', query_sig, *testdata_sigs, '-k', '21', '--output-unassigned=unassigned.sig', '--threshold=0')
+    runtmp.sourmash(
+        "gather",
+        query_sig,
+        *testdata_sigs,
+        "-k",
+        "21",
+        "--output-unassigned=unassigned.sig",
+        "--threshold=0",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'found 12 matches total' in runtmp.last_result.out
-    assert 'the recovered matches hit 100.0% of the query' in runtmp.last_result.out
-    assert all(('4.9 Mbp       33.2%  100.0%' in runtmp.last_result.out,
-                'NC_003198.1 Salmonella enterica subsp' in runtmp.last_result.out))
-    assert all(('4.5 Mbp        0.1%    0.4%' in runtmp.last_result.out,
-                'NC_004631.1 Salmonella enterica subsp' in runtmp.last_result.out))
+    assert "found 12 matches total" in runtmp.last_result.out
+    assert "the recovered matches hit 100.0% of the query" in runtmp.last_result.out
+    assert all(
+        (
+            "4.9 Mbp       33.2%  100.0%" in runtmp.last_result.out,
+            "NC_003198.1 Salmonella enterica subsp" in runtmp.last_result.out,
+        )
+    )
+    assert all(
+        (
+            "4.5 Mbp        0.1%    0.4%" in runtmp.last_result.out,
+            "NC_004631.1 Salmonella enterica subsp" in runtmp.last_result.out,
+        )
+    )
 
     # now examine unassigned
-    assert not os.path.exists(runtmp.output('unassigned.sig'))
-    assert 'no unassigned hashes to save with --output-unassigned!' in runtmp.last_result.err
+    assert not os.path.exists(runtmp.output("unassigned.sig"))
+    assert (
+        "no unassigned hashes to save with --output-unassigned!"
+        in runtmp.last_result.err
+    )
 
 
-def test_gather_metagenome_output_unassigned_nomatches(runtmp, prefetch_gather, linear_gather):
+def test_gather_metagenome_output_unassigned_nomatches(
+    runtmp, prefetch_gather, linear_gather
+):
     c = runtmp
 
     # test --output-unassigned when there are no matches
-    query_sig = utils.get_test_data('2.fa.sig')
-    against_sig = utils.get_test_data('47.fa.sig')
-
-    c.run_sourmash('gather', query_sig, against_sig,
-                   '--output-unassigned', 'foo.sig', linear_gather,
-                   prefetch_gather)
+    query_sig = utils.get_test_data("2.fa.sig")
+    against_sig = utils.get_test_data("47.fa.sig")
+
+    c.run_sourmash(
+        "gather",
+        query_sig,
+        against_sig,
+        "--output-unassigned",
+        "foo.sig",
+        linear_gather,
+        prefetch_gather,
+    )
 
     print(c.last_result.out)
     assert "No matches found for --threshold-bp at 50.0 kbp." in c.last_result.err
 
     x = sourmash.load_one_signature(query_sig, ksize=31)
-    y = sourmash.load_one_signature(c.output('foo.sig'))
+    y = sourmash.load_one_signature(c.output("foo.sig"))
 
     assert x.minhash == y.minhash
 
 
-def test_gather_metagenome_output_unassigned_nomatches_protein(runtmp, linear_gather, prefetch_gather):
+def test_gather_metagenome_output_unassigned_nomatches_protein(
+    runtmp, linear_gather, prefetch_gather
+):
     c = runtmp
 
     # test --output-unassigned with protein signatures
-    query_sig = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
-    against_sig = utils.get_test_data('prot/protein/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig')
-
-    c.run_sourmash('gather', query_sig, against_sig,
-                   '--output-unassigned', 'foo.sig', linear_gather,
-                   prefetch_gather)
+    query_sig = utils.get_test_data(
+        "prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
+    against_sig = utils.get_test_data(
+        "prot/protein/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig"
+    )
+
+    c.run_sourmash(
+        "gather",
+        query_sig,
+        against_sig,
+        "--output-unassigned",
+        "foo.sig",
+        linear_gather,
+        prefetch_gather,
+    )
 
     print(c.last_result.out)
     assert "No matches found for --threshold-bp at 50.0 kbp." in c.last_result.err
 
-    c.run_sourmash('sig', 'describe', c.output('foo.sig'))
+    c.run_sourmash("sig", "describe", c.output("foo.sig"))
     print(c.last_result.out)
 
     x = sourmash.load_one_signature(query_sig, ksize=57)
-    y = sourmash.load_one_signature(c.output('foo.sig'))
+    y = sourmash.load_one_signature(c.output("foo.sig"))
 
     assert x.minhash == y.minhash
     assert y.minhash.moltype == "protein"
 
 
 def test_gather_check_scaled_bounds_negative(runtmp, prefetch_gather, linear_gather):
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
-    testdata_sigs = glob.glob(testdata_glob)
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
+    glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('gather', query_sig, prefetch_gather, linear_gather, 'gcf_all', '-k', '21', '--scaled', '-5', '--threshold-bp', '50000')
+        runtmp.sourmash(
+            "gather",
+            query_sig,
+            prefetch_gather,
+            linear_gather,
+            "gcf_all",
+            "-k",
+            "21",
+            "--scaled",
+            "-5",
+            "--threshold-bp",
+            "50000",
+        )
 
     assert "ERROR: scaled value must be positive" in runtmp.last_result.err
 
 
-def test_gather_check_scaled_bounds_less_than_minimum(runtmp, prefetch_gather, linear_gather):
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
-    testdata_sigs = glob.glob(testdata_glob)
+def test_gather_check_scaled_bounds_less_than_minimum(
+    runtmp, prefetch_gather, linear_gather
+):
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
+    glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('gather', query_sig, prefetch_gather, linear_gather, 'gcf_all', '-k', '21', '--scaled', '50', '--threshold-bp', '50000')
-
-    assert "WARNING: scaled value should be >= 100. Continuing anyway." in runtmp.last_result.err
-
-
-def test_gather_check_scaled_bounds_more_than_maximum(runtmp, prefetch_gather, linear_gather):
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
-    testdata_sigs = glob.glob(testdata_glob)
-
-    query_sig = utils.get_test_data('gather/combined.sig')
+        runtmp.sourmash(
+            "gather",
+            query_sig,
+            prefetch_gather,
+            linear_gather,
+            "gcf_all",
+            "-k",
+            "21",
+            "--scaled",
+            "50",
+            "--threshold-bp",
+            "50000",
+        )
+
+    assert (
+        "WARNING: scaled value should be >= 100. Continuing anyway."
+        in runtmp.last_result.err
+    )
+
+
+def test_gather_check_scaled_bounds_more_than_maximum(
+    runtmp, prefetch_gather, linear_gather
+):
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
+    glob.glob(testdata_glob)
+
+    query_sig = utils.get_test_data("gather/combined.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('gather', query_sig, prefetch_gather, linear_gather, '-k', '21', '--scaled', '1e9', '--threshold-bp', '50000')
-
-    assert "WARNING: scaled value should be <= 1e6. Continuing anyway." in runtmp.last_result.err
+        runtmp.sourmash(
+            "gather",
+            query_sig,
+            prefetch_gather,
+            linear_gather,
+            "-k",
+            "21",
+            "--scaled",
+            "1e9",
+            "--threshold-bp",
+            "50000",
+        )
+
+    assert (
+        "WARNING: scaled value should be <= 1e6. Continuing anyway."
+        in runtmp.last_result.err
+    )
 
 
 def test_gather_metagenome_downsample(runtmp, prefetch_gather, linear_gather):
     # downsample w/scaled of 100,000
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all']
+    cmd = ["index", "gcf_all"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
 
     runtmp.sourmash(*cmd)
 
-    assert os.path.exists(runtmp.output('gcf_all.sbt.zip'))
-
-    runtmp.sourmash('gather', query_sig, 'gcf_all', '-k', '21', '--scaled', '100000', prefetch_gather, linear_gather, '--threshold-bp', '50000')
+    assert os.path.exists(runtmp.output("gcf_all.sbt.zip"))
+
+    runtmp.sourmash(
+        "gather",
+        query_sig,
+        "gcf_all",
+        "-k",
+        "21",
+        "--scaled",
+        "100000",
+        prefetch_gather,
+        linear_gather,
+        "--threshold-bp",
+        "50000",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'found 11 matches total' in runtmp.last_result.out
-    assert 'the recovered matches hit 100.0% of the query' in runtmp.last_result.out
-    assert all(('5.2 Mbp       32.9%  100.0%' in runtmp.last_result.out,
-                'NC_003198.1' in runtmp.last_result.out))
-    assert all(('4.1 Mbp        0.6%    2.4%' in runtmp.last_result.out,
-                '4.1 Mbp        4.4%   17.1%' in runtmp.last_result.out))
+    assert "found 11 matches total" in runtmp.last_result.out
+    assert "the recovered matches hit 100.0% of the query" in runtmp.last_result.out
+    assert all(
+        (
+            "5.2 Mbp       32.9%  100.0%" in runtmp.last_result.out,
+            "NC_003198.1" in runtmp.last_result.out,
+        )
+    )
+    assert all(
+        (
+            "4.1 Mbp        0.6%    2.4%" in runtmp.last_result.out,
+            "4.1 Mbp        4.4%   17.1%" in runtmp.last_result.out,
+        )
+    )
 
 
 def test_gather_query_downsample(runtmp, linear_gather, prefetch_gather):
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
     print(testdata_sigs)
 
-    query_sig = utils.get_test_data('GCF_000006945.2-s500.sig')
+    query_sig = utils.get_test_data("GCF_000006945.2-s500.sig")
 
-    runtmp.sourmash('gather', '-k', '31', linear_gather, prefetch_gather, query_sig, *testdata_sigs)
+    runtmp.sourmash(
+        "gather", "-k", "31", linear_gather, prefetch_gather, query_sig, *testdata_sigs
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
     err = runtmp.last_result.err
 
-    assert 'loaded 36 total signatures from 12 locations.' in err
-    assert 'after selecting signatures compatible with search, 12 remain.' in err
+    assert "loaded 36 total signatures from 12 locations." in err
+    assert "after selecting signatures compatible with search, 12 remain." in err
 
-    assert all(('4.9 Mbp      100.0%  100.0%' in runtmp.last_result.out,
-                'NC_003197.2' in runtmp.last_result.out))
+    assert all(
+        (
+            "4.9 Mbp      100.0%  100.0%" in runtmp.last_result.out,
+            "NC_003197.2" in runtmp.last_result.out,
+        )
+    )
 
-    assert 'WARNING: final scaled was 10000, vs query scaled of 500' in runtmp.last_result.out
+    assert (
+        "WARNING: final scaled was 10000, vs query scaled of 500"
+        in runtmp.last_result.out
+    )
 
 
 def test_gather_query_downsample_explicit(runtmp, linear_gather, prefetch_gather):
     # do an explicit downsampling to fix `test_gather_query_downsample`
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('GCF_000006945.2-s500.sig')
+    query_sig = utils.get_test_data("GCF_000006945.2-s500.sig")
 
-    runtmp.sourmash('gather', '-k', '31', '--scaled', '10000', linear_gather, prefetch_gather, query_sig, *testdata_sigs)
+    runtmp.sourmash(
+        "gather",
+        "-k",
+        "31",
+        "--scaled",
+        "10000",
+        linear_gather,
+        prefetch_gather,
+        query_sig,
+        *testdata_sigs,
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
     err = runtmp.last_result.err
 
-    assert 'loaded 36 total signatures from 12 locations.' in err
-    assert 'after selecting signatures compatible with search, 12 remain.' in err
+    assert "loaded 36 total signatures from 12 locations." in err
+    assert "after selecting signatures compatible with search, 12 remain." in err
 
-    assert all(('4.9 Mbp      100.0%  100.0%' in runtmp.last_result.out,
-                'NC_003197.2' in runtmp.last_result.out))
+    assert all(
+        (
+            "4.9 Mbp      100.0%  100.0%" in runtmp.last_result.out,
+            "NC_003197.2" in runtmp.last_result.out,
+        )
+    )
 
 
 def test_gather_downsample_multiple(runtmp, linear_gather, prefetch_gather):
     # test multiple different downsamplings in gather code
-    query_sig = utils.get_test_data('GCF_000006945.2-s500.sig')
+    query_sig = utils.get_test_data("GCF_000006945.2-s500.sig")
 
     # load in the hashes and do split them into four bins, randomly.
     ss = sourmash.load_one_signature(query_sig)
     hashes = list(ss.minhash.hashes)
 
-    random.seed(a=1)            # fix seed so test is reproducible
+    random.seed(a=1)  # fix seed so test is reproducible
     random.shuffle(hashes)
 
     # split into 4 bins:
-    mh_bins = [ ss.minhash.copy_and_clear() for i in range(4) ]
+    mh_bins = [ss.minhash.copy_and_clear() for i in range(4)]
     for i, hashval in enumerate(hashes):
         mh_bins[i % 4].add_hash(hashval)
 
@@ -4706,23 +5723,37 @@ def test_gather_downsample_multiple(runtmp, linear_gather, prefetch_gather):
 
         gathersigs.append(f"bin{i}.sig")
 
-    runtmp.sourmash('gather', '-k', '31', linear_gather, prefetch_gather, query_sig, *gathersigs)
+    runtmp.sourmash(
+        "gather", "-k", "31", linear_gather, prefetch_gather, query_sig, *gathersigs
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert "WARNING: final scaled was 1000, vs query scaled of 500" in runtmp.last_result.out
+    assert (
+        "WARNING: final scaled was 1000, vs query scaled of 500"
+        in runtmp.last_result.out
+    )
 
 
 def test_gather_with_picklist(runtmp, linear_gather, prefetch_gather):
     # test 'sourmash gather' with picklists
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    metag_sig = utils.get_test_data('gather/combined.sig')
-    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
-
-    runtmp.sourmash('gather', metag_sig, *gcf_sigs, '--threshold-bp=0',
-                    '-k', '21', '--picklist', f"{picklist}:md5:md5",
-                    linear_gather, prefetch_gather)
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    metag_sig = utils.get_test_data("gather/combined.sig")
+    picklist = utils.get_test_data("gather/thermotoga-picklist.csv")
+
+    runtmp.sourmash(
+        "gather",
+        metag_sig,
+        *gcf_sigs,
+        "--threshold-bp=0",
+        "-k",
+        "21",
+        "--picklist",
+        f"{picklist}:md5:md5",
+        linear_gather,
+        prefetch_gather,
+    )
 
     err = runtmp.last_result.err
     print(err)
@@ -4740,13 +5771,22 @@ def test_gather_with_picklist(runtmp, linear_gather, prefetch_gather):
 
 def test_gather_with_picklist_exclude(runtmp, linear_gather, prefetch_gather):
     # test 'sourmash gather' with picklists - exclude
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    metag_sig = utils.get_test_data('gather/combined.sig')
-    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
-
-    runtmp.sourmash('gather', metag_sig, *gcf_sigs, '--threshold-bp=0',
-                    '-k', '21', '--picklist', f"{picklist}:md5:md5:exclude",
-                    linear_gather, prefetch_gather)
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    metag_sig = utils.get_test_data("gather/combined.sig")
+    picklist = utils.get_test_data("gather/thermotoga-picklist.csv")
+
+    runtmp.sourmash(
+        "gather",
+        metag_sig,
+        *gcf_sigs,
+        "--threshold-bp=0",
+        "-k",
+        "21",
+        "--picklist",
+        f"{picklist}:md5:md5:exclude",
+        linear_gather,
+        prefetch_gather,
+    )
 
     err = runtmp.last_result.err
     print(err)
@@ -4769,12 +5809,21 @@ def test_gather_with_picklist_exclude(runtmp, linear_gather, prefetch_gather):
 
 def test_gather_with_pattern_include(runtmp, linear_gather, prefetch_gather):
     # test 'sourmash gather' with --include-db-pattern
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    metag_sig = utils.get_test_data('gather/combined.sig')
-
-    runtmp.sourmash('gather', metag_sig, *gcf_sigs, '--threshold-bp=0',
-                    '-k', '21', '--include', "thermotoga",
-                    linear_gather, prefetch_gather)
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    metag_sig = utils.get_test_data("gather/combined.sig")
+
+    runtmp.sourmash(
+        "gather",
+        metag_sig,
+        *gcf_sigs,
+        "--threshold-bp=0",
+        "-k",
+        "21",
+        "--include",
+        "thermotoga",
+        linear_gather,
+        prefetch_gather,
+    )
 
     err = runtmp.last_result.err
     print(err)
@@ -4789,12 +5838,21 @@ def test_gather_with_pattern_include(runtmp, linear_gather, prefetch_gather):
 
 def test_gather_with_pattern_exclude(runtmp, linear_gather, prefetch_gather):
     # test 'sourmash gather' with --exclude
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    metag_sig = utils.get_test_data('gather/combined.sig')
-
-    runtmp.sourmash('gather', metag_sig, *gcf_sigs, '--threshold-bp=0',
-                    '-k', '21', '--exclude', "thermotoga",
-                    linear_gather, prefetch_gather)
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    metag_sig = utils.get_test_data("gather/combined.sig")
+
+    runtmp.sourmash(
+        "gather",
+        metag_sig,
+        *gcf_sigs,
+        "--threshold-bp=0",
+        "-k",
+        "21",
+        "--exclude",
+        "thermotoga",
+        linear_gather,
+        prefetch_gather,
+    )
 
     err = runtmp.last_result.err
     print(err)
@@ -4814,53 +5872,78 @@ def test_gather_with_pattern_exclude(runtmp, linear_gather, prefetch_gather):
 
 
 def test_gather_save_matches(runtmp, linear_gather, prefetch_gather):
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all']
+    cmd = ["index", "gcf_all"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
 
     runtmp.sourmash(*cmd)
 
-    assert os.path.exists(runtmp.output('gcf_all.sbt.zip'))
-
-    runtmp.sourmash('gather', query_sig, 'gcf_all', '-k', '21', '--save-matches', 'save.sigs', linear_gather, prefetch_gather, '--threshold-bp', '0')
+    assert os.path.exists(runtmp.output("gcf_all.sbt.zip"))
+
+    runtmp.sourmash(
+        "gather",
+        query_sig,
+        "gcf_all",
+        "-k",
+        "21",
+        "--save-matches",
+        "save.sigs",
+        linear_gather,
+        prefetch_gather,
+        "--threshold-bp",
+        "0",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'found 12 matches total' in runtmp.last_result.out
-    assert 'the recovered matches hit 100.0% of the query' in runtmp.last_result.out
-    assert os.path.exists(runtmp.output('save.sigs'))
+    assert "found 12 matches total" in runtmp.last_result.out
+    assert "the recovered matches hit 100.0% of the query" in runtmp.last_result.out
+    assert os.path.exists(runtmp.output("save.sigs"))
 
 
 def test_gather_save_matches_and_save_prefetch(runtmp, linear_gather):
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    query_sig = utils.get_test_data('gather/combined.sig')
+    query_sig = utils.get_test_data("gather/combined.sig")
 
-    cmd = ['index', 'gcf_all']
+    cmd = ["index", "gcf_all"]
     cmd.extend(testdata_sigs)
-    cmd.extend(['-k', '21'])
+    cmd.extend(["-k", "21"])
 
     runtmp.sourmash(*cmd)
 
-    assert os.path.exists(runtmp.output('gcf_all.sbt.zip'))
-
-    runtmp.sourmash('gather', query_sig, 'gcf_all', '-k', '21', '--save-matches', 'save.sigs', '--save-prefetch', 'save2.sigs', linear_gather, '--threshold-bp', '0')
+    assert os.path.exists(runtmp.output("gcf_all.sbt.zip"))
+
+    runtmp.sourmash(
+        "gather",
+        query_sig,
+        "gcf_all",
+        "-k",
+        "21",
+        "--save-matches",
+        "save.sigs",
+        "--save-prefetch",
+        "save2.sigs",
+        linear_gather,
+        "--threshold-bp",
+        "0",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'found 12 matches total' in runtmp.last_result.out
-    assert 'the recovered matches hit 100.0% of the query' in runtmp.last_result.out
+    assert "found 12 matches total" in runtmp.last_result.out
+    assert "the recovered matches hit 100.0% of the query" in runtmp.last_result.out
 
-    matches_save = runtmp.output('save.sigs')
-    prefetch_save = runtmp.output('save2.sigs')
+    matches_save = runtmp.output("save.sigs")
+    prefetch_save = runtmp.output("save2.sigs")
     assert os.path.exists(matches_save)
     assert os.path.exists(prefetch_save)
 
@@ -4873,12 +5956,14 @@ def test_gather_save_matches_and_save_prefetch(runtmp, linear_gather):
 @utils.in_tempdir
 def test_gather_error_no_sigs_traverse(c):
     # test gather applied to a directory
-    query = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig')
+    query = utils.get_test_data(
+        "prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"
+    )
 
-    emptydir = c.output('')
+    emptydir = c.output("")
 
-    with pytest.raises(SourmashCommandFailed) as e:
-        c.run_sourmash('gather', query, emptydir)
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash("gather", query, emptydir)
 
     err = c.last_result.err
     print(err)
@@ -4886,65 +5971,85 @@ def test_gather_error_no_sigs_traverse(c):
 
 
 def test_gather_error_no_cardinality_query(runtmp, linear_gather, prefetch_gather):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'dna', '-p', 'k=31,num=500', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,num=500", testdata1, testdata2)
 
-    testdata3 = utils.get_test_data('short3.fa')
+    testdata3 = utils.get_test_data("short3.fa")
 
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=31,num=500', testdata3)
+    runtmp.sourmash("sketch", "translate", "-p", "k=31,num=500", testdata3)
 
-    runtmp.sourmash('index', 'zzz', 'short.fa.sig', 'short2.fa.sig')
+    runtmp.sourmash("index", "zzz", "short.fa.sig", "short2.fa.sig")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('gather', 'short3.fa.sig', 'zzz', linear_gather, prefetch_gather)
+        runtmp.sourmash(
+            "gather", "short3.fa.sig", "zzz", linear_gather, prefetch_gather
+        )
 
     assert runtmp.last_result.status == -1
     assert "query signature needs to be created with --scaled" in runtmp.last_result.err
 
 
 def test_gather_deduce_ksize(runtmp, prefetch_gather, linear_gather):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'dna', '-p', 'k=23,scaled=10', testdata1, testdata2)
+    runtmp.sourmash("sketch", "dna", "-p", "k=23,scaled=10", testdata1, testdata2)
 
-    runtmp.sourmash('sketch','dna','-p','k=23,scaled=10', '-o', 'query.fa.sig', testdata2)
+    runtmp.sourmash(
+        "sketch", "dna", "-p", "k=23,scaled=10", "-o", "query.fa.sig", testdata2
+    )
 
-    runtmp.sourmash('index', 'zzz', 'short.fa.sig', 'short2.fa.sig')
+    runtmp.sourmash("index", "zzz", "short.fa.sig", "short2.fa.sig")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('gather', 'query.fa.sig', 'zzz', prefetch_gather, linear_gather, '--threshold-bp=1')
+    runtmp.sourmash(
+        "gather",
+        "query.fa.sig",
+        "zzz",
+        prefetch_gather,
+        linear_gather,
+        "--threshold-bp=1",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert '0.9 kbp      100.0%  100.0%' in runtmp.last_result.out
+    assert "0.9 kbp      100.0%  100.0%" in runtmp.last_result.out
 
 
 def test_gather_deduce_moltype(runtmp, linear_gather, prefetch_gather):
     # gather should automatically figure out ksize
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=10,scaled=10', testdata1,testdata2)
+    runtmp.sourmash("sketch", "translate", "-p", "k=10,scaled=10", testdata1, testdata2)
 
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=10,scaled=10', '-o', 'query.fa.sig',testdata2)
+    runtmp.sourmash(
+        "sketch", "translate", "-p", "k=10,scaled=10", "-o", "query.fa.sig", testdata2
+    )
 
-    runtmp.sourmash('index', 'zzz', 'short.fa.sig', 'short2.fa.sig')
+    runtmp.sourmash("index", "zzz", "short.fa.sig", "short2.fa.sig")
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('gather', 'query.fa.sig', 'zzz', linear_gather, prefetch_gather, '--threshold-bp=1')
+    runtmp.sourmash(
+        "gather",
+        "query.fa.sig",
+        "zzz",
+        linear_gather,
+        prefetch_gather,
+        "--threshold-bp=1",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert '1.9 kbp      100.0%  100.0%' in runtmp.last_result.out
+    assert "1.9 kbp      100.0%  100.0%" in runtmp.last_result.out
 
 
 def test_gather_abund_1_1(runtmp, linear_gather, prefetch_gather):
@@ -4966,14 +6071,14 @@ def test_gather_abund_1_1(runtmp, linear_gather, prefetch_gather):
     # ./sourmash compute -k 21 --scaled 1000 --merge=1-1 -o reads-s10-s11.sig r[13].fa --track-abundance
     # ./sourmash compute -k 21 --scaled 1000 --merge=10-1 -o reads-s10x10-s11.sig r[23].fa --track-abundance
 
-    query = utils.get_test_data('gather-abund/reads-s10-s11.sig')
-    against_list = ['genome-s10', 'genome-s11', 'genome-s12']
-    against_list = ['gather-abund/' + i + '.fa.gz.sig'
-                    for i in against_list]
+    query = utils.get_test_data("gather-abund/reads-s10-s11.sig")
+    against_list = ["genome-s10", "genome-s11", "genome-s12"]
+    against_list = ["gather-abund/" + i + ".fa.gz.sig" for i in against_list]
     against_list = [utils.get_test_data(i) for i in against_list]
 
-    status, out, err = c.run_sourmash('gather', query, *against_list,
-                                      linear_gather, prefetch_gather)
+    status, out, err = c.run_sourmash(
+        "gather", query, *against_list, linear_gather, prefetch_gather
+    )
 
     print(out)
     print(err)
@@ -4985,9 +6090,9 @@ def test_gather_abund_1_1(runtmp, linear_gather, prefetch_gather):
     #   (this is due to the low coverage of 2 used to build queries)
     # * approximately 2.0 abundance (third column, avg_abund)
 
-    assert '49.6%   78.5%       1.8    tests/test-data/genome-s10.fa.gz' in out
-    assert '50.4%   80.0%       1.9    tests/test-data/genome-s11.fa.gz' in out
-    assert 'genome-s12.fa.gz' not in out
+    assert "49.6%   78.5%       1.8    tests/test-data/genome-s10.fa.gz" in out
+    assert "50.4%   80.0%       1.9    tests/test-data/genome-s11.fa.gz" in out
+    assert "genome-s12.fa.gz" not in out
 
     assert "the recovered matches hit 100.0% of the abundance-weighted query" in out
     assert "the recovered matches hit 100.0% of the query k-mers (unweighted)" in out
@@ -5003,15 +6108,14 @@ def test_gather_abund_10_1(runtmp, prefetch_gather, linear_gather):
     # ./sourmash compute -k 21 --scaled 1000 --merge=1-1 -o reads-s10-s11.sig r[13].fa --track-abundance
     # ./sourmash compute -k 21 --scaled 1000 --merge=10-1 -o reads-s10x10-s11.sig r[23].fa --track-abundance
 
-    query = utils.get_test_data('gather-abund/reads-s10x10-s11.sig')
-    against_list = ['genome-s10', 'genome-s11', 'genome-s12']
-    against_list = ['gather-abund/' + i + '.fa.gz.sig'
-                    for i in against_list]
+    query = utils.get_test_data("gather-abund/reads-s10x10-s11.sig")
+    against_list = ["genome-s10", "genome-s11", "genome-s12"]
+    against_list = ["gather-abund/" + i + ".fa.gz.sig" for i in against_list]
     against_list = [utils.get_test_data(i) for i in against_list]
 
-    status, out, err = c.run_sourmash('gather', query, '-o', 'xxx.csv',
-                                      *against_list, linear_gather,
-                                      prefetch_gather)
+    status, out, err = c.run_sourmash(
+        "gather", query, "-o", "xxx.csv", *against_list, linear_gather, prefetch_gather
+    )
 
     print(out)
     print(err)
@@ -5025,14 +6129,14 @@ def test_gather_abund_10_1(runtmp, prefetch_gather, linear_gather):
     # * approximately 2.0 abundance (third column, avg_abund) for s11,
     #   and (very) approximately 20x abundance for genome s10.
 
-    assert '91.0%  100.0%      14.5    tests/test-data/genome-s10.fa.gz' in out
-    assert '9.0%   80.0%       1.9    tests/test-data/genome-s11.fa.gz' in out
-    assert 'genome-s12.fa.gz' not in out
+    assert "91.0%  100.0%      14.5    tests/test-data/genome-s10.fa.gz" in out
+    assert "9.0%   80.0%       1.9    tests/test-data/genome-s11.fa.gz" in out
+    assert "genome-s12.fa.gz" not in out
     assert "the recovered matches hit 100.0% of the abundance-weighted query" in out
 
     # check the calculations behind the above output by looking into
     # the CSV.
-    with open(c.output('xxx.csv'), 'rt') as fp:
+    with open(c.output("xxx.csv")) as fp:
         r = csv.DictReader(fp)
 
         overlaps = []
@@ -5046,14 +6150,14 @@ def test_gather_abund_10_1(runtmp, prefetch_gather, linear_gather):
         total_weighted_list = []
 
         for n, row in enumerate(r):
-            assert int(row['gather_result_rank']) == n
+            assert int(row["gather_result_rank"]) == n
 
             # other than f_weighted, these are all 'flat' numbers - no abunds.
-            overlap = float(row['intersect_bp'])
-            remaining_bp = float(row['remaining_bp'])
-            unique_overlap = float(row['unique_intersect_bp'])
-            f_weighted = float(row['f_unique_weighted'])
-            average_abund = float(row['average_abund'])
+            overlap = float(row["intersect_bp"])
+            remaining_bp = float(row["remaining_bp"])
+            unique_overlap = float(row["unique_intersect_bp"])
+            f_weighted = float(row["f_unique_weighted"])
+            average_abund = float(row["average_abund"])
 
             overlaps.append(overlap)
             unique_overlaps.append(unique_overlap)
@@ -5062,14 +6166,14 @@ def test_gather_abund_10_1(runtmp, prefetch_gather, linear_gather):
             remaining_bps.append(remaining_bp)
 
             # also track weighted calculations
-            n_weighted_list.append(float(row['n_unique_weighted_found']))
-            sum_weighted_list.append(float(row['sum_weighted_found']))
-            total_weighted_list.append(float(row['total_weighted_hashes']))
+            n_weighted_list.append(float(row["n_unique_weighted_found"]))
+            sum_weighted_list.append(float(row["sum_weighted_found"]))
+            total_weighted_list.append(float(row["total_weighted_hashes"]))
 
     weighted_calc = []
-    for (overlap, average_abund) in zip(overlaps, average_abunds):
-        prod = overlap*average_abund
-        weighted_calc.append(prod) # @CTB redundant terms with below?
+    for overlap, average_abund in zip(overlaps, average_abunds):
+        prod = overlap * average_abund
+        weighted_calc.append(prod)  # @CTB redundant terms with below?
 
     total_weighted = sum(weighted_calc)
     for prod, f_weighted in zip(weighted_calc, f_weighted_list):
@@ -5103,6 +6207,7 @@ def test_gather_abund_10_1(runtmp, prefetch_gather, linear_gather):
         f_weighted = f_weighted_list[i]
         assert f_weighted == n_weighted / 7986
 
+
 def test_gather_abund_10_1_ignore_abundance(runtmp, linear_gather, prefetch_gather):
     # check gather with an abundance-weighted query, then flattened with
     # --ignore-abund
@@ -5115,18 +6220,21 @@ def test_gather_abund_10_1_ignore_abundance(runtmp, linear_gather, prefetch_gath
     # ./sourmash compute -k 21 --scaled 1000 --merge=1-1 -o reads-s10-s11.sig r[13].fa --track-abundance
     # ./sourmash compute -k 21 --scaled 1000 --merge=10-1 -o reads-s10x10-s11.sig r[23].fa --track-abundance
 
-    query = utils.get_test_data('gather-abund/reads-s10x10-s11.sig')
-    against_list = ['genome-s10', 'genome-s11', 'genome-s12']
-    against_list = ['gather-abund/' + i + '.fa.gz.sig'
-                    for i in against_list]
+    query = utils.get_test_data("gather-abund/reads-s10x10-s11.sig")
+    against_list = ["genome-s10", "genome-s11", "genome-s12"]
+    against_list = ["gather-abund/" + i + ".fa.gz.sig" for i in against_list]
     against_list = [utils.get_test_data(i) for i in against_list]
 
-    status, out, err = c.run_sourmash('gather', query,
-                                      '--ignore-abundance',
-                                      *against_list,
-                                      linear_gather, prefetch_gather,
-                                      '-o', c.output('results.csv'))
-
+    status, out, err = c.run_sourmash(
+        "gather",
+        query,
+        "--ignore-abundance",
+        *against_list,
+        linear_gather,
+        prefetch_gather,
+        "-o",
+        c.output("results.csv"),
+    )
 
     print(out)
     print(err)
@@ -5139,38 +6247,47 @@ def test_gather_abund_10_1_ignore_abundance(runtmp, linear_gather, prefetch_gath
     # * approximately 100% of the high coverage genome being matched,
     #       with only 80% of the low coverage genome
 
-    assert all(('57.2%  100.0%', 'tests/test-data/genome-s10.fa.gz' in out))
-    assert all(('42.8%   80.0%', 'tests/test-data/genome-s11.fa.gz' in out))
-    assert 'genome-s12.fa.gz' not in out
+    assert all(("57.2%  100.0%", "tests/test-data/genome-s10.fa.gz" in out))
+    assert all(("42.8%   80.0%", "tests/test-data/genome-s11.fa.gz" in out))
+    assert "genome-s12.fa.gz" not in out
 
-    with open(c.output('results.csv'), 'rt') as fp:
+    with open(c.output("results.csv")) as fp:
         r = csv.DictReader(fp)
         some_results = False
         for row in r:
             some_results = True
-            assert row['average_abund'] == ''
-            assert row['median_abund'] == ''
-            assert row['std_abund'] == ''
+            assert row["average_abund"] == ""
+            assert row["median_abund"] == ""
+            assert row["std_abund"] == ""
 
-            assert row['query_abundance'] == 'False', row['query_abundance']
-            assert row['n_unique_weighted_found'] == ''
+            assert row["query_abundance"] == "False", row["query_abundance"]
+            assert row["n_unique_weighted_found"] == ""
 
         assert some_results
 
 
-def test_gather_output_unassigned_with_abundance(runtmp, prefetch_gather, linear_gather):
+def test_gather_output_unassigned_with_abundance(
+    runtmp, prefetch_gather, linear_gather
+):
     # check --output-unassigned with an abund query
     # @CTB: could add check on sum weighted etc.
     c = runtmp
-    query = utils.get_test_data('gather-abund/reads-s10x10-s11.sig')
-    against = utils.get_test_data('gather-abund/genome-s10.fa.gz.sig')
-
-    c.run_sourmash('gather', query, against, '--output-unassigned',
-                   c.output('unassigned.sig'), linear_gather, prefetch_gather)
-
-    assert os.path.exists(c.output('unassigned.sig'))
-
-    nomatch = sourmash.load_one_signature(c.output('unassigned.sig'))
+    query = utils.get_test_data("gather-abund/reads-s10x10-s11.sig")
+    against = utils.get_test_data("gather-abund/genome-s10.fa.gz.sig")
+
+    c.run_sourmash(
+        "gather",
+        query,
+        against,
+        "--output-unassigned",
+        c.output("unassigned.sig"),
+        linear_gather,
+        prefetch_gather,
+    )
+
+    assert os.path.exists(c.output("unassigned.sig"))
+
+    nomatch = sourmash.load_one_signature(c.output("unassigned.sig"))
     assert nomatch.minhash.track_abundance
 
     query_ss = sourmash.load_one_signature(query)
@@ -5189,14 +6306,21 @@ def test_gather_output_unassigned_with_abundance(runtmp, prefetch_gather, linear
 
 def test_gather_empty_db_fail(runtmp, linear_gather, prefetch_gather):
     # gather should fail on empty db with --fail-on-empty-database
-    query = utils.get_test_data('2.fa.sig')
-    against = utils.get_test_data('47.fa.sig')
-    against2 = utils.get_test_data('lca/47+63.lca.json')
+    query = utils.get_test_data("2.fa.sig")
+    against = utils.get_test_data("47.fa.sig")
+    against2 = utils.get_test_data("lca/47+63.lca.json")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('gather', query, against, against2, '-k', '51',
-                        linear_gather, prefetch_gather)
-
+        runtmp.sourmash(
+            "gather",
+            query,
+            against,
+            against2,
+            "-k",
+            "51",
+            linear_gather,
+            prefetch_gather,
+        )
 
     err = runtmp.last_result.err
     assert "no compatible signatures found in " in err
@@ -5204,13 +6328,21 @@ def test_gather_empty_db_fail(runtmp, linear_gather, prefetch_gather):
 
 def test_gather_empty_db_nofail(runtmp, prefetch_gather, linear_gather):
     # gather should not fail on empty db with --no-fail-on-empty-database
-    query = utils.get_test_data('2.fa.sig')
-    against = utils.get_test_data('47.fa.sig')
-    against2 = utils.get_test_data('lca/47+63.lca.json')
-
-    runtmp.sourmash('gather', query, against, against2, '-k', '51',
-                    '--no-fail-on-empty-data',
-                    linear_gather, prefetch_gather)
+    query = utils.get_test_data("2.fa.sig")
+    against = utils.get_test_data("47.fa.sig")
+    against2 = utils.get_test_data("lca/47+63.lca.json")
+
+    runtmp.sourmash(
+        "gather",
+        query,
+        against,
+        against2,
+        "-k",
+        "51",
+        "--no-fail-on-empty-data",
+        linear_gather,
+        prefetch_gather,
+    )
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -5218,16 +6350,20 @@ def test_gather_empty_db_nofail(runtmp, prefetch_gather, linear_gather):
     print(err)
 
     assert "no compatible signatures found in " in err
-    assert "ksize on this database is 31; this is different from requested ksize of 51" in err
+    assert (
+        "ksize on this database is 31; this is different from requested ksize of 51"
+        in err
+    )
     assert "loaded 50 total signatures from 2 locations" in err
     assert "after selecting signatures compatible with search, 0 remain." in err
 
+
 def test_multigather_output_unassigned_with_abundance(runtmp):
     c = runtmp
-    query = utils.get_test_data('gather-abund/reads-s10x10-s11.sig')
-    against = utils.get_test_data('gather-abund/genome-s10.fa.gz.sig')
+    query = utils.get_test_data("gather-abund/reads-s10x10-s11.sig")
+    against = utils.get_test_data("gather-abund/genome-s10.fa.gz.sig")
 
-    cmd = 'multigather --query {} --db {}'.format(query, against).split()
+    cmd = f"multigather --query {query} --db {against}".split()
     c.run_sourmash(*cmd)
 
     print(c.last_result.out)
@@ -5237,9 +6373,9 @@ def test_multigather_output_unassigned_with_abundance(runtmp):
     assert "the recovered matches hit 91.0% of the abundance-weighted query." in out
     assert "the recovered matches hit 57.2% of the query k-mers (unweighted)." in out
 
-    assert os.path.exists(c.output('r3.fa.unassigned.sig'))
+    assert os.path.exists(c.output("r3.fa.unassigned.sig"))
 
-    nomatch = sourmash.load_one_signature(c.output('r3.fa.unassigned.sig'))
+    nomatch = sourmash.load_one_signature(c.output("r3.fa.unassigned.sig"))
     assert nomatch.minhash.track_abundance
 
     query_ss = sourmash.load_one_signature(query)
@@ -5258,13 +6394,14 @@ def test_multigather_output_unassigned_with_abundance(runtmp):
 
 def test_multigather_empty_db_fail(runtmp):
     # multigather should fail on empty db with --fail-on-empty-database
-    query = utils.get_test_data('2.fa.sig')
-    against = utils.get_test_data('47.fa.sig')
-    against2 = utils.get_test_data('lca/47+63.lca.json')
+    query = utils.get_test_data("2.fa.sig")
+    against = utils.get_test_data("47.fa.sig")
+    against2 = utils.get_test_data("lca/47+63.lca.json")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('multigather', '--query', query,
-                        '--db', against, against2, '-k', '51')
+        runtmp.sourmash(
+            "multigather", "--query", query, "--db", against, against2, "-k", "51"
+        )
 
     err = runtmp.last_result.err
     assert "no compatible signatures found in " in err
@@ -5272,13 +6409,21 @@ def test_multigather_empty_db_fail(runtmp):
 
 def test_multigather_empty_db_nofail(runtmp):
     # multigather should not fail on empty db with --no-fail-on-empty-database
-    query = utils.get_test_data('2.fa.sig')
-    against = utils.get_test_data('47.fa.sig')
-    against2 = utils.get_test_data('lca/47+63.lca.json')
-
-    runtmp.sourmash('multigather', '--query', query,
-                    '--db', against, against2, '-k', '51',
-                    '--no-fail-on-empty-data')
+    query = utils.get_test_data("2.fa.sig")
+    against = utils.get_test_data("47.fa.sig")
+    against2 = utils.get_test_data("lca/47+63.lca.json")
+
+    runtmp.sourmash(
+        "multigather",
+        "--query",
+        query,
+        "--db",
+        against,
+        against2,
+        "-k",
+        "51",
+        "--no-fail-on-empty-data",
+    )
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -5286,7 +6431,10 @@ def test_multigather_empty_db_nofail(runtmp):
     print(err)
 
     assert "no compatible signatures found in " in err
-    assert "ksize on this database is 31; this is different from requested ksize of 51" in err
+    assert (
+        "ksize on this database is 31; this is different from requested ksize of 51"
+        in err
+    )
     assert "conducted gather searches on 0 signatures" in err
     assert "loaded 50 total signatures from 2 locations" in err
     assert "after selecting signatures compatible with search, 0 remain." in err
@@ -5294,53 +6442,54 @@ def test_multigather_empty_db_nofail(runtmp):
 
 def test_multigather_nomatch(runtmp):
     testdata_query = utils.get_test_data(
-        'gather/GCF_000006945.2_ASM694v2_genomic.fna.gz.sig')
-    testdata_match = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
+        "gather/GCF_000006945.2_ASM694v2_genomic.fna.gz.sig"
+    )
+    testdata_match = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
 
-    runtmp.sourmash('multigather', '--query', testdata_query,
-                    '--db', testdata_match, '-k', '31')
+    runtmp.sourmash(
+        "multigather", "--query", testdata_query, "--db", testdata_match, "-k", "31"
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'found 0 matches total' in runtmp.last_result.out
-    assert 'the recovered matches hit 0.0% of the query' in runtmp.last_result.out
+    assert "found 0 matches total" in runtmp.last_result.out
+    assert "the recovered matches hit 0.0% of the query" in runtmp.last_result.out
 
 
 def test_multigather_abund_nomatch(runtmp):
-    testdata_query = utils.get_test_data('gather-abund/reads-s10x10-s11.sig')
-    testdata_match = utils.get_test_data('gather/GCF_000006945.2_ASM694v2_genomic.fna.gz.sig')
+    testdata_query = utils.get_test_data("gather-abund/reads-s10x10-s11.sig")
+    testdata_match = utils.get_test_data(
+        "gather/GCF_000006945.2_ASM694v2_genomic.fna.gz.sig"
+    )
 
-    runtmp.sourmash('multigather', '--query', testdata_query,
-                    '--db', testdata_match)
+    runtmp.sourmash("multigather", "--query", testdata_query, "--db", testdata_match)
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'found 0 matches total' in runtmp.last_result.out
-    assert 'the recovered matches hit 0.0% of the query' in runtmp.last_result.out
+    assert "found 0 matches total" in runtmp.last_result.out
+    assert "the recovered matches hit 0.0% of the query" in runtmp.last_result.out
 
 
 def test_sbt_categorize(runtmp):
-    testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
-    testdata2 = utils.get_test_data('genome-s11.fa.gz.sig')
-    testdata3 = utils.get_test_data('genome-s12.fa.gz.sig')
-    testdata4 = utils.get_test_data('genome-s10+s11.sig')
+    testdata1 = utils.get_test_data("genome-s10.fa.gz.sig")
+    testdata2 = utils.get_test_data("genome-s11.fa.gz.sig")
+    testdata3 = utils.get_test_data("genome-s12.fa.gz.sig")
+    testdata4 = utils.get_test_data("genome-s10+s11.sig")
 
     # all four in the current directory for categorize .
-    shutil.copyfile(testdata1, runtmp.output('1.sig'))
-    shutil.copyfile(testdata2, runtmp.output('2.sig'))
-    shutil.copyfile(testdata3, runtmp.output('3.sig'))
-    shutil.copyfile(testdata4, runtmp.output('4.sig'))
+    shutil.copyfile(testdata1, runtmp.output("1.sig"))
+    shutil.copyfile(testdata2, runtmp.output("2.sig"))
+    shutil.copyfile(testdata3, runtmp.output("3.sig"))
+    shutil.copyfile(testdata4, runtmp.output("4.sig"))
 
     # omit 3
-    args = ['index', '--dna', '-k', '21', 'zzz', '1.sig', '2.sig']
+    args = ["index", "--dna", "-k", "21", "zzz", "1.sig", "2.sig"]
     runtmp.sourmash(*args)
 
-
     # categorize all of the ones that were copied to 'location'
-    args = ['categorize', 'zzz', '.',
-            '--ksize', '21', '--dna', '--csv', 'out.csv']
+    args = ["categorize", "zzz", ".", "--ksize", "21", "--dna", "--csv", "out.csv"]
     runtmp.sourmash(*args)
 
     print(runtmp.last_result.out)
@@ -5348,27 +6497,34 @@ def test_sbt_categorize(runtmp):
 
     # mash dist genome-s10.fa.gz genome-s10+s11.fa.gz
     # yields 521/1000 ==> ~0.5
-    assert 'for genome-s10+s11, found: 0.50 genome-s10' in runtmp.last_result.err
+    assert "for genome-s10+s11, found: 0.50 genome-s10" in runtmp.last_result.err
 
-    out_csv = Path(runtmp.output('out.csv')).read_text()
+    out_csv = Path(runtmp.output("out.csv")).read_text()
     print(out_csv)
-    assert '4.sig,genome-s10+s11,genome-s10,0.504' in out_csv
+    assert "4.sig,genome-s10+s11,genome-s10,0.504" in out_csv
 
 
 def test_sbt_categorize_ignore_abundance_1(runtmp):
     # --- Categorize without ignoring abundance ---
-    query = utils.get_test_data('gather-abund/reads-s10x10-s11.sig')
-    against_list = ['reads-s10-s11']
-    against_list = ['gather-abund/' + i + '.sig'
-                    for i in against_list]
+    query = utils.get_test_data("gather-abund/reads-s10x10-s11.sig")
+    against_list = ["reads-s10-s11"]
+    against_list = ["gather-abund/" + i + ".sig" for i in against_list]
     against_list = [utils.get_test_data(i) for i in against_list]
 
     # omit 3
-    args = ['index', '--dna', '-k', '21', 'thebestdatabase'] + against_list
+    args = ["index", "--dna", "-k", "21", "thebestdatabase"] + against_list
     runtmp.sourmash(*args)
 
-    args = ['categorize', 'thebestdatabase',
-            '--ksize', '21', '--dna', '--csv', 'out3.csv', query]
+    args = [
+        "categorize",
+        "thebestdatabase",
+        "--ksize",
+        "21",
+        "--dna",
+        "--csv",
+        "out3.csv",
+        query,
+    ]
     with pytest.raises(SourmashCommandFailed):
         runtmp.sourmash(*args)
 
@@ -5377,120 +6533,138 @@ def test_sbt_categorize_ignore_abundance_1(runtmp):
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert "ERROR: this search cannot be done on signatures calculated with abundance." in runtmp.last_result.err
+    assert (
+        "ERROR: this search cannot be done on signatures calculated with abundance."
+        in runtmp.last_result.err
+    )
     assert "ERROR: please specify --ignore-abundance." in runtmp.last_result.err
 
 
 def test_sbt_categorize_ignore_abundance_3(runtmp):
     # --- Now categorize with ignored abundance ---
-    query = utils.get_test_data('gather-abund/reads-s10x10-s11.sig')
-    against_list = ['reads-s10-s11']
-    against_list = ['gather-abund/' + i + '.sig'
-                    for i in against_list]
+    query = utils.get_test_data("gather-abund/reads-s10x10-s11.sig")
+    against_list = ["reads-s10-s11"]
+    against_list = ["gather-abund/" + i + ".sig" for i in against_list]
     against_list = [utils.get_test_data(i) for i in against_list]
 
     # omit 3
-    args = ['index', '--dna', '-k', '21', 'thebestdatabase'] + against_list
+    args = ["index", "--dna", "-k", "21", "thebestdatabase"] + against_list
     runtmp.sourmash(*args)
 
-    args = ['categorize', '--ignore-abundance',
-            '--ksize', '21', '--dna', '--csv', 'out4.csv',
-            'thebestdatabase', query]
+    args = [
+        "categorize",
+        "--ignore-abundance",
+        "--ksize",
+        "21",
+        "--dna",
+        "--csv",
+        "out4.csv",
+        "thebestdatabase",
+        query,
+    ]
     runtmp.sourmash(*args)
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert 'for 1-1, found: 0.88 1-1' in runtmp.last_result.err
+    assert "for 1-1, found: 0.88 1-1" in runtmp.last_result.err
 
-    out_csv4 = Path(runtmp.output('out4.csv')).read_text()
-    assert 'reads-s10x10-s11.sig,1-1,1-1,0.87699' in out_csv4
+    out_csv4 = Path(runtmp.output("out4.csv")).read_text()
+    assert "reads-s10x10-s11.sig,1-1,1-1,0.87699" in out_csv4
 
 
 def test_sbt_categorize_already_done(runtmp):
-    testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
-    testdata2 = utils.get_test_data('genome-s11.fa.gz.sig')
-    testdata3 = utils.get_test_data('genome-s12.fa.gz.sig')
-    testdata4 = utils.get_test_data('genome-s10+s11.sig')
+    testdata1 = utils.get_test_data("genome-s10.fa.gz.sig")
+    testdata2 = utils.get_test_data("genome-s11.fa.gz.sig")
+    testdata3 = utils.get_test_data("genome-s12.fa.gz.sig")
+    testdata4 = utils.get_test_data("genome-s10+s11.sig")
 
-    shutil.copyfile(testdata1, runtmp.output('1.sig'))
-    shutil.copyfile(testdata2, runtmp.output('2.sig'))
-    shutil.copyfile(testdata3, runtmp.output('3.sig'))
-    shutil.copyfile(testdata4, runtmp.output('4.sig'))
+    shutil.copyfile(testdata1, runtmp.output("1.sig"))
+    shutil.copyfile(testdata2, runtmp.output("2.sig"))
+    shutil.copyfile(testdata3, runtmp.output("3.sig"))
+    shutil.copyfile(testdata4, runtmp.output("4.sig"))
 
     # omit 3
-    args = ['index', '--dna', '-k', '21', 'zzz', '1.sig', '2.sig']
+    args = ["index", "--dna", "-k", "21", "zzz", "1.sig", "2.sig"]
     runtmp.sourmash(*args)
 
-    with open(runtmp.output('in.csv'), 'wt') as fp:
-        fp.write('./4.sig,genome-s10.fa.gz,0.50')
-
-    args = ['categorize', 'zzz', './2.sig', './4.sig',
-            '--ksize', '21', '--dna', '--load-csv', 'in.csv']
+    with open(runtmp.output("in.csv"), "w") as fp:
+        fp.write("./4.sig,genome-s10.fa.gz,0.50")
+
+    args = [
+        "categorize",
+        "zzz",
+        "./2.sig",
+        "./4.sig",
+        "--ksize",
+        "21",
+        "--dna",
+        "--load-csv",
+        "in.csv",
+    ]
     runtmp.sourmash(*args)
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
-    assert 'for genome-s11.fa.gz, no match found'
-    assert not 'for s10+s11, found: 0.50 genome-s10.fa.gz' in runtmp.last_result.err
+    assert "for genome-s11.fa.gz, no match found"
+    assert "for s10+s11, found: 0.50 genome-s10.fa.gz" not in runtmp.last_result.err
 
 
 def test_sbt_categorize_already_done_traverse(runtmp):
-    testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
-    testdata2 = utils.get_test_data('genome-s11.fa.gz.sig')
-    testdata3 = utils.get_test_data('genome-s12.fa.gz.sig')
-    testdata4 = utils.get_test_data('genome-s10+s11.sig')
+    testdata1 = utils.get_test_data("genome-s10.fa.gz.sig")
+    testdata2 = utils.get_test_data("genome-s11.fa.gz.sig")
+    testdata3 = utils.get_test_data("genome-s12.fa.gz.sig")
+    testdata4 = utils.get_test_data("genome-s10+s11.sig")
 
-    shutil.copyfile(testdata1, runtmp.output('1.sig'))
-    shutil.copyfile(testdata2, runtmp.output('2.sig'))
-    shutil.copyfile(testdata3, runtmp.output('3.sig'))
-    shutil.copyfile(testdata4, runtmp.output('4.sig'))
+    shutil.copyfile(testdata1, runtmp.output("1.sig"))
+    shutil.copyfile(testdata2, runtmp.output("2.sig"))
+    shutil.copyfile(testdata3, runtmp.output("3.sig"))
+    shutil.copyfile(testdata4, runtmp.output("4.sig"))
 
     # omit 3
-    args = ['index', '--dna', '-k', '21', 'zzz', '1.sig', '2.sig']
+    args = ["index", "--dna", "-k", "21", "zzz", "1.sig", "2.sig"]
     runtmp.sourmash(*args)
 
-    with open(runtmp.output('in.csv'), 'wt') as fp:
-        fp.write('./4.sig,genome-s10.fa.gz,0.50')
+    with open(runtmp.output("in.csv"), "w") as fp:
+        fp.write("./4.sig,genome-s10.fa.gz,0.50")
 
-    args = ['categorize', 'zzz', '.',
-            '--ksize', '21', '--dna', '--load-csv', 'in.csv']
+    args = ["categorize", "zzz", ".", "--ksize", "21", "--dna", "--load-csv", "in.csv"]
     runtmp.sourmash(*args)
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
-    assert 'for genome-s11.fa.gz, no match found'
-    assert not 'for s10+s11, found: 0.50 genome-s10.fa.gz' in runtmp.last_result.err
+    assert "for genome-s11.fa.gz, no match found"
+    assert "for s10+s11, found: 0.50 genome-s10.fa.gz" not in runtmp.last_result.err
 
 
 def test_sbt_categorize_multiple_ksizes_moltypes(runtmp):
     # 'categorize' works fine with multiple moltypes/ksizes
-    testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
-    testdata2 = utils.get_test_data('genome-s11.fa.gz.sig')
-    testdata3 = utils.get_test_data('genome-s12.fa.gz.sig')
+    testdata1 = utils.get_test_data("genome-s10.fa.gz.sig")
+    testdata2 = utils.get_test_data("genome-s11.fa.gz.sig")
+    testdata3 = utils.get_test_data("genome-s12.fa.gz.sig")
 
-    shutil.copyfile(testdata1, runtmp.output('1.sig'))
-    shutil.copyfile(testdata2, runtmp.output('2.sig'))
-    shutil.copyfile(testdata3, runtmp.output('3.sig'))
+    shutil.copyfile(testdata1, runtmp.output("1.sig"))
+    shutil.copyfile(testdata2, runtmp.output("2.sig"))
+    shutil.copyfile(testdata3, runtmp.output("3.sig"))
 
-    args = ['index', '--dna', '-k', '21', 'zzz', '1.sig', '2.sig']
+    args = ["index", "--dna", "-k", "21", "zzz", "1.sig", "2.sig"]
     runtmp.sourmash(*args)
 
-    args = ['categorize', 'zzz', '.']
+    args = ["categorize", "zzz", "."]
     runtmp.sourmash(*args)
 
 
 def test_watch_check_num_bounds_negative(runtmp):
     # check that watch properly outputs error on negative num
     c = runtmp
-    testdata0 = utils.get_test_data('genome-s10.fa.gz')
-    testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
-    shutil.copyfile(testdata1, c.output('1.sig'))
+    testdata0 = utils.get_test_data("genome-s10.fa.gz")
+    testdata1 = utils.get_test_data("genome-s10.fa.gz.sig")
+    shutil.copyfile(testdata1, c.output("1.sig"))
 
-    c.run_sourmash('index', '--dna', '-k', '21', 'zzz', '1.sig')
+    c.run_sourmash("index", "--dna", "-k", "21", "zzz", "1.sig")
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('watch', '--ksize', '21', '-n', '-5', '--dna', 'zzz', testdata0)
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash("watch", "--ksize", "21", "-n", "-5", "--dna", "zzz", testdata0)
 
     assert "ERROR: num value must be positive" in c.last_result.err
 
@@ -5498,13 +6672,13 @@ def test_watch_check_num_bounds_negative(runtmp):
 def test_watch_check_num_bounds_less_than_minimum(runtmp):
     # check that watch properly outputs warnings on small num
     c = runtmp
-    testdata0 = utils.get_test_data('genome-s10.fa.gz')
-    testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
-    shutil.copyfile(testdata1, c.output('1.sig'))
+    testdata0 = utils.get_test_data("genome-s10.fa.gz")
+    testdata1 = utils.get_test_data("genome-s10.fa.gz.sig")
+    shutil.copyfile(testdata1, c.output("1.sig"))
 
-    c.run_sourmash('index', '--dna', '-k', '21', 'zzz', '1.sig')
+    c.run_sourmash("index", "--dna", "-k", "21", "zzz", "1.sig")
 
-    c.run_sourmash('watch', '--ksize', '21', '-n', '25', '--dna', 'zzz', testdata0)
+    c.run_sourmash("watch", "--ksize", "21", "-n", "25", "--dna", "zzz", testdata0)
 
     assert "WARNING: num value should be >= 50. Continuing anyway." in c.last_result.err
 
@@ -5512,113 +6686,124 @@ def test_watch_check_num_bounds_less_than_minimum(runtmp):
 def test_watch_check_num_bounds_more_than_maximum(runtmp):
     # check that watch properly outputs warnings on large num
     c = runtmp
-    testdata0 = utils.get_test_data('genome-s10.fa.gz')
-    testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
-    shutil.copyfile(testdata1, c.output('1.sig'))
+    testdata0 = utils.get_test_data("genome-s10.fa.gz")
+    testdata1 = utils.get_test_data("genome-s10.fa.gz.sig")
+    shutil.copyfile(testdata1, c.output("1.sig"))
 
-    c.run_sourmash('index', '--dna', '-k', '21', 'zzz', '1.sig')
+    c.run_sourmash("index", "--dna", "-k", "21", "zzz", "1.sig")
 
-    c.run_sourmash('watch', '--ksize', '21', '-n', '100000', '--dna', 'zzz', testdata0)
+    c.run_sourmash("watch", "--ksize", "21", "-n", "100000", "--dna", "zzz", testdata0)
 
-    assert "WARNING: num value should be <= 50000. Continuing anyway." in c.last_result.err
+    assert (
+        "WARNING: num value should be <= 50000. Continuing anyway." in c.last_result.err
+    )
 
 
 def test_watch(runtmp):
     # check basic watch functionality
     c = runtmp
-    testdata0 = utils.get_test_data('genome-s10.fa.gz')
-    testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
-    shutil.copyfile(testdata1, c.output('1.sig'))
+    testdata0 = utils.get_test_data("genome-s10.fa.gz")
+    testdata1 = utils.get_test_data("genome-s10.fa.gz.sig")
+    shutil.copyfile(testdata1, c.output("1.sig"))
 
-    c.run_sourmash('index', '--dna', '-k', '21', 'zzz', '1.sig')
+    c.run_sourmash("index", "--dna", "-k", "21", "zzz", "1.sig")
 
-    c.run_sourmash('watch', '--ksize', '21', '--dna', 'zzz', testdata0)
+    c.run_sourmash("watch", "--ksize", "21", "--dna", "zzz", testdata0)
 
     print(c.last_result.out)
     print(c.last_result.err)
-    assert 'FOUND: genome-s10, at 1.000' in c.last_result.out
+    assert "FOUND: genome-s10, at 1.000" in c.last_result.out
 
 
 def test_watch_deduce_ksize(runtmp):
     # check that watch guesses ksize automatically from database
     c = runtmp
-    testdata0 = utils.get_test_data('genome-s10.fa.gz')
-    c.run_sourmash('sketch','dna','-p','k=29,num=500', '-o', '1.sig', testdata0)
+    testdata0 = utils.get_test_data("genome-s10.fa.gz")
+    c.run_sourmash("sketch", "dna", "-p", "k=29,num=500", "-o", "1.sig", testdata0)
 
-    c.run_sourmash('index', '--dna', '-k', '29', 'zzz', '1.sig')
+    c.run_sourmash("index", "--dna", "-k", "29", "zzz", "1.sig")
 
-    c.run_sourmash('watch', '--dna', 'zzz', testdata0)
+    c.run_sourmash("watch", "--dna", "zzz", testdata0)
 
     print(c.last_result.out)
     print(c.last_result.err)
-    assert 'Computing signature for k=29' in c.last_result.err
-    assert 'genome-s10.fa.gz, at 1.000' in c.last_result.out
+    assert "Computing signature for k=29" in c.last_result.err
+    assert "genome-s10.fa.gz, at 1.000" in c.last_result.out
 
 
 def test_watch_coverage(runtmp):
     # check output details/coverage of found
-    testdata0 = utils.get_test_data('genome-s10.fa.gz')
-    testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
-    shutil.copyfile(testdata1, runtmp.output('1.sig'))
+    testdata0 = utils.get_test_data("genome-s10.fa.gz")
+    testdata1 = utils.get_test_data("genome-s10.fa.gz.sig")
+    shutil.copyfile(testdata1, runtmp.output("1.sig"))
 
-    args = ['index', '--dna', '-k', '21', 'zzz', '1.sig']
+    args = ["index", "--dna", "-k", "21", "zzz", "1.sig"]
     runtmp.sourmash(*args)
 
-    with open(runtmp.output('query.fa'), 'wt') as fp:
+    with open(runtmp.output("query.fa"), "w") as fp:
         record = list(screed.open(testdata0))[0]
         for start in range(0, len(record), 100):
-            fp.write('>{}\n{}\n'.format(start,
-                                        record.sequence[start:start+500]))
+            fp.write(f">{start}\n{record.sequence[start : start + 500]}\n")
 
-    args = ['watch', '--ksize', '21', '--dna', 'zzz', 'query.fa']
+    args = ["watch", "--ksize", "21", "--dna", "zzz", "query.fa"]
     runtmp.sourmash(*args)
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
-    assert 'FOUND: genome-s10, at 1.000' in runtmp.last_result.out
+    assert "FOUND: genome-s10, at 1.000" in runtmp.last_result.out
 
 
 def test_watch_output_sig(runtmp):
     # test watch --output
-    testdata0 = utils.get_test_data('genome-s10.fa.gz')
-    testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
-    shutil.copyfile(testdata1, runtmp.output('1.sig'))
+    testdata0 = utils.get_test_data("genome-s10.fa.gz")
+    testdata1 = utils.get_test_data("genome-s10.fa.gz.sig")
+    shutil.copyfile(testdata1, runtmp.output("1.sig"))
 
-    args = ['index', '--dna', '-k', '21', 'zzz', '1.sig']
+    args = ["index", "--dna", "-k", "21", "zzz", "1.sig"]
     runtmp.sourmash(*args)
 
-    with open(runtmp.output('query.fa'), 'wt') as fp:
+    with open(runtmp.output("query.fa"), "w") as fp:
         record = list(screed.open(testdata0))[0]
         for start in range(0, len(record), 100):
-            fp.write('>{}\n{}\n'.format(start,
-                                        record.sequence[start:start+500]))
-
-    args = ['watch', '--ksize', '21', '--dna', 'zzz', 'query.fa',
-            '-o', 'out.sig', '--name', 'xyzfoo']
+            fp.write(f">{start}\n{record.sequence[start : start + 500]}\n")
+
+    args = [
+        "watch",
+        "--ksize",
+        "21",
+        "--dna",
+        "zzz",
+        "query.fa",
+        "-o",
+        "out.sig",
+        "--name",
+        "xyzfoo",
+    ]
     runtmp.sourmash(*args)
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    out_sig = runtmp.output('out.sig')
+    out_sig = runtmp.output("out.sig")
     assert os.path.exists(out_sig)
 
     siglist = list(sourmash.load_file_as_signatures(out_sig))
     assert len(siglist) == 1
-    assert siglist[0].filename == 'stdin'
-    assert siglist[0].name == 'xyzfoo'
+    assert siglist[0].filename == "stdin"
+    assert siglist[0].name == "xyzfoo"
 
 
 def test_storage_convert(runtmp):
-    testdata = utils.get_test_data('v2.sbt.json')
-    shutil.copyfile(testdata, runtmp.output('v2.sbt.json'))
-    shutil.copytree(os.path.join(os.path.dirname(testdata), '.sbt.v2'),
-                    runtmp.output('.sbt.v2'))
-    testsbt = runtmp.output('v2.sbt.json')
+    testdata = utils.get_test_data("v2.sbt.json")
+    shutil.copyfile(testdata, runtmp.output("v2.sbt.json"))
+    shutil.copytree(
+        os.path.join(os.path.dirname(testdata), ".sbt.v2"), runtmp.output(".sbt.v2")
+    )
+    testsbt = runtmp.output("v2.sbt.json")
 
     original = SBT.load(testsbt, leaf_loader=SigLeaf.load)
 
-    args = ['storage', 'convert', '-b', 'ipfs', testsbt]
+    args = ["storage", "convert", "-b", "ipfs", testsbt]
     try:
         runtmp.sourmash(*args)
     except SourmashCommandFailed:
@@ -5626,151 +6811,165 @@ def test_storage_convert(runtmp):
 
     if runtmp.last_result.status:
         if "ipfshttpclient.ConnectionError" in runtmp.last_result.err:
-            raise pytest.xfail('ipfs probably not running')
+            raise pytest.xfail("ipfs probably not running")
         if "No module named 'ipfshttpclient'" in runtmp.last_result.err:
-            raise pytest.xfail('ipfshttpclient module not installed')
+            raise pytest.xfail("ipfshttpclient module not installed")
 
     print("NO FAIL; KEEP ON GOING!")
 
-
     ipfs = SBT.load(testsbt, leaf_loader=SigLeaf.load)
 
     assert len(original) == len(ipfs)
-    assert all(n1[1].name == n2[1].name
-                for (n1, n2) in zip(sorted(original), sorted(ipfs)))
-
-    args = ['storage', 'convert',
-            '-b', """'ZipStorage("{}")'""".format(
-                runtmp.output('v2.sbt.zip')),
-            testsbt]
+    assert all(
+        n1[1].name == n2[1].name for (n1, n2) in zip(sorted(original), sorted(ipfs))
+    )
+
+    args = [
+        "storage",
+        "convert",
+        "-b",
+        """'ZipStorage("{}")'""".format(runtmp.output("v2.sbt.zip")),
+        testsbt,
+    ]
     runtmp.sourmash(*args)
 
     tar = SBT.load(testsbt, leaf_loader=SigLeaf.load)
 
     assert len(original) == len(tar)
-    assert all(n1[1].name == n2[1].name
-                for (n1, n2) in zip(sorted(original), sorted(tar)))
+    assert all(
+        n1[1].name == n2[1].name for (n1, n2) in zip(sorted(original), sorted(tar))
+    )
 
     print("it all worked!!")
 
 
 def test_storage_convert_identity(runtmp):
-    testdata = utils.get_test_data('v2.sbt.json')
-    shutil.copyfile(testdata, runtmp.output('v2.sbt.json'))
-    shutil.copytree(os.path.join(os.path.dirname(testdata), '.sbt.v2'),
-                    runtmp.output('.sbt.v2'))
-    testsbt = runtmp.output('v2.sbt.json')
+    testdata = utils.get_test_data("v2.sbt.json")
+    shutil.copyfile(testdata, runtmp.output("v2.sbt.json"))
+    shutil.copytree(
+        os.path.join(os.path.dirname(testdata), ".sbt.v2"), runtmp.output(".sbt.v2")
+    )
+    testsbt = runtmp.output("v2.sbt.json")
 
     original = SBT.load(testsbt, leaf_loader=SigLeaf.load)
 
-    args = ['storage', 'convert', '-b', 'fsstorage', testsbt]
+    args = ["storage", "convert", "-b", "fsstorage", testsbt]
     runtmp.sourmash(*args)
 
     identity = SBT.load(testsbt, leaf_loader=SigLeaf.load)
 
     assert len(original) == len(identity)
-    assert all(n1[1].name == n2[1].name
-                for (n1, n2) in zip(sorted(original), sorted(identity)))
+    assert all(
+        n1[1].name == n2[1].name for (n1, n2) in zip(sorted(original), sorted(identity))
+    )
 
 
 def test_storage_convert_fsstorage_newpath(runtmp):
-    testdata = utils.get_test_data('v2.sbt.json')
-    shutil.copyfile(testdata, runtmp.output('v2.sbt.json'))
-    shutil.copytree(os.path.join(os.path.dirname(testdata), '.sbt.v2'),
-                    runtmp.output('.sbt.v2'))
-    testsbt = runtmp.output('v2.sbt.json')
+    testdata = utils.get_test_data("v2.sbt.json")
+    shutil.copyfile(testdata, runtmp.output("v2.sbt.json"))
+    shutil.copytree(
+        os.path.join(os.path.dirname(testdata), ".sbt.v2"), runtmp.output(".sbt.v2")
+    )
+    testsbt = runtmp.output("v2.sbt.json")
 
     original = SBT.load(testsbt, leaf_loader=SigLeaf.load)
 
-    args = ['storage', 'convert',
-                        '-b', 'fsstorage({})'.format(runtmp.output('v3')),
-                        testsbt]
+    args = [
+        "storage",
+        "convert",
+        "-b",
+        "fsstorage({})".format(runtmp.output("v3")),
+        testsbt,
+    ]
     runtmp.sourmash(*args)
 
     identity = SBT.load(testsbt, leaf_loader=SigLeaf.load)
 
     assert len(original) == len(identity)
-    assert all(n1[1].name == n2[1].name
-                for (n1, n2) in zip(sorted(original), sorted(identity)))
+    assert all(
+        n1[1].name == n2[1].name for (n1, n2) in zip(sorted(original), sorted(identity))
+    )
 
 
 def test_migrate(runtmp):
-    testdata = utils.get_test_data('v3.sbt.json')
-    shutil.copyfile(testdata, runtmp.output('v3.sbt.json'))
-    shutil.copytree(os.path.join(os.path.dirname(testdata), '.sbt.v3'),
-                        runtmp.output('.sbt.v3'))
-    testsbt = runtmp.output('v3.sbt.json')
+    testdata = utils.get_test_data("v3.sbt.json")
+    shutil.copyfile(testdata, runtmp.output("v3.sbt.json"))
+    shutil.copytree(
+        os.path.join(os.path.dirname(testdata), ".sbt.v3"), runtmp.output(".sbt.v3")
+    )
+    testsbt = runtmp.output("v3.sbt.json")
 
     original = SBT.load(testsbt, leaf_loader=SigLeaf.load)
 
-    runtmp.sourmash('migrate', testsbt)
+    runtmp.sourmash("migrate", testsbt)
 
     identity = SBT.load(testsbt, leaf_loader=SigLeaf.load)
 
     assert len(original) == len(identity)
-    assert all(n1[1].name == n2[1].name
-                for (n1, n2) in zip(sorted(original),
-                                    sorted(identity)))
+    assert all(
+        n1[1].name == n2[1].name for (n1, n2) in zip(sorted(original), sorted(identity))
+    )
 
     assert "this is an old index version" not in runtmp.last_result.err
-    assert all('min_n_below' in node.metadata
-                for node in identity
-                if isinstance(node, Node))
+    assert all(
+        "min_n_below" in node.metadata for node in identity if isinstance(node, Node)
+    )
 
 
 def test_license_cc0(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    runtmp.sourmash('sketch','translate', '-p', 'k=31', testdata1)
+    testdata1 = utils.get_test_data("short.fa")
+    runtmp.sourmash("sketch", "translate", "-p", "k=31", testdata1)
 
-    sigfile = runtmp.output('short.fa.sig')
+    sigfile = runtmp.output("short.fa.sig")
     assert os.path.exists(sigfile)
 
     sig = next(signature.load_signatures(sigfile))
-    assert str(sig).endswith('short.fa')
+    assert str(sig).endswith("short.fa")
 
-    assert sig.license == 'CC0'
+    assert sig.license == "CC0"
 
 
 def test_license_non_cc0(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
+    testdata1 = utils.get_test_data("short.fa")
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sketch', 'translate', '-p','k=31', '--license', 'GPL', testdata1)
+        runtmp.sourmash(
+            "sketch", "translate", "-p", "k=31", "--license", "GPL", testdata1
+        )
 
     assert runtmp.last_result.status != 0
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
-    assert 'sourmash only supports CC0' in runtmp.last_result.err
+    assert "sourmash only supports CC0" in runtmp.last_result.err
 
 
 def test_license_load_non_cc0():
-    sigfile = utils.get_test_data('bad-license.sig')
+    sigfile = utils.get_test_data("bad-license.sig")
 
     try:
-        sig = next(signature.load_signatures(sigfile, do_raise=True))
+        next(signature.load_signatures(sigfile, do_raise=True))
     except Exception as e:
         assert "sourmash only supports CC0-licensed signatures" in str(e)
 
 
 @utils.in_tempdir
 def test_do_sourmash_index_zipfile(c):
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
 
-    c.run_sourmash('index', '-k', '31', 'zzz.sbt.zip',
-                   *testdata_sigs)
+    c.run_sourmash("index", "-k", "31", "zzz.sbt.zip", *testdata_sigs)
 
-    outfile = c.output('zzz.sbt.zip')
+    outfile = c.output("zzz.sbt.zip")
     assert os.path.exists(outfile)
 
     print(c)
     assert c.last_result.status == 0
-    assert 'Finished saving SBT index, available at' in c.last_result.err
+    assert "Finished saving SBT index, available at" in c.last_result.err
 
     # look internally at the zip file
     with zipfile.ZipFile(outfile) as zf:
         content = zf.namelist()
         assert len(content) == 26
-        assert len([c for c in content if 'internal' in c]) == 11
+        assert len([c for c in content if "internal" in c]) == 11
         assert ".sbt.zzz/" in content
         sbts = [c for c in content if c.endswith(".sbt.json")]
         assert len(sbts) == 1
@@ -5779,7 +6978,7 @@ def test_do_sourmash_index_zipfile(c):
 
 @utils.in_tempdir
 def test_do_sourmash_index_zipfile_append(c):
-    testdata_glob = utils.get_test_data('gather/GCF*.sig')
+    testdata_glob = utils.get_test_data("gather/GCF*.sig")
     testdata_sigs = glob.glob(testdata_glob)
     half_point = int(len(testdata_sigs) / 2)
     first_half = testdata_sigs[:half_point]
@@ -5792,35 +6991,33 @@ def test_do_sourmash_index_zipfile_append(c):
     assert not set(first_half).intersection(set(second_half))
 
     with warnings.catch_warnings(record=True) as record:
-        c.run_sourmash('index', '-k', '31', 'zzz.sbt.zip',
-                       *first_half)
+        c.run_sourmash("index", "-k", "31", "zzz.sbt.zip", *first_half)
     # UserWarning is raised when there are duplicated entries in the zipfile
     assert not record, record
 
-    outfile = c.output('zzz.sbt.zip')
+    outfile = c.output("zzz.sbt.zip")
     assert os.path.exists(outfile)
 
     print(c)
     assert c.last_result.status == 0
-    assert 'Finished saving SBT index, available at' in c.last_result.err
+    assert "Finished saving SBT index, available at" in c.last_result.err
 
     with warnings.catch_warnings(record=True) as record:
-        c.run_sourmash('index', "--append", '-k', '31', 'zzz.sbt.zip',
-                       *second_half)
+        c.run_sourmash("index", "--append", "-k", "31", "zzz.sbt.zip", *second_half)
     # UserWarning is raised when there are duplicated entries in the zipfile
     print(record)
-    #assert not record, record
+    # assert not record, record
 
     print(c)
     assert c.last_result.status == 0
-    assert 'Finished saving SBT index, available at' in c.last_result.err
+    assert "Finished saving SBT index, available at" in c.last_result.err
 
     # look internally at the zip file
     with zipfile.ZipFile(outfile) as zf:
         content = zf.namelist()
         print(content)
         assert len(content) == 26
-        assert len([c for c in content if 'internal' in c]) == 11
+        assert len([c for c in content if "internal" in c]) == 11
         assert ".sbt.zzz/" in content
         sbts = [c for c in content if c.endswith(".sbt.json")]
         assert len(sbts) == 1
@@ -5829,13 +7026,14 @@ def test_do_sourmash_index_zipfile_append(c):
 
 def test_index_with_picklist(runtmp):
     # test 'sourmash index' with picklists
-    gcf_sig_dir = utils.get_test_data('gather/')
-    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
+    gcf_sig_dir = utils.get_test_data("gather/")
+    picklist = utils.get_test_data("gather/thermotoga-picklist.csv")
 
-    output_db = runtmp.output('thermo.sbt.zip')
+    output_db = runtmp.output("thermo.sbt.zip")
 
-    runtmp.sourmash('index', output_db, gcf_sig_dir,
-                    '-k', '31', '--picklist', f"{picklist}:md5:md5")
+    runtmp.sourmash(
+        "index", output_db, gcf_sig_dir, "-k", "31", "--picklist", f"{picklist}:md5:md5"
+    )
 
     err = runtmp.last_result.err
     print(err)
@@ -5848,18 +7046,25 @@ def test_index_with_picklist(runtmp):
     siglist = list(sourmash.load_file_as_signatures(output_db))
     assert len(siglist) == 3
     for ss in siglist:
-        assert 'Thermotoga' in ss.name
+        assert "Thermotoga" in ss.name
 
 
 def test_index_with_picklist_exclude(runtmp):
     # test 'sourmash index' with picklists - exclude
-    gcf_sig_dir = utils.get_test_data('gather/')
-    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
+    gcf_sig_dir = utils.get_test_data("gather/")
+    picklist = utils.get_test_data("gather/thermotoga-picklist.csv")
 
-    output_db = runtmp.output('thermo-exclude.sbt.zip')
+    output_db = runtmp.output("thermo-exclude.sbt.zip")
 
-    runtmp.sourmash('index', output_db, gcf_sig_dir,
-                    '-k', '31', '--picklist', f"{picklist}:md5:md5:exclude")
+    runtmp.sourmash(
+        "index",
+        output_db,
+        gcf_sig_dir,
+        "-k",
+        "31",
+        "--picklist",
+        f"{picklist}:md5:md5:exclude",
+    )
 
     err = runtmp.last_result.err
     print(err)
@@ -5869,35 +7074,43 @@ def test_index_with_picklist_exclude(runtmp):
     siglist = list(sourmash.load_file_as_signatures(output_db))
     assert len(siglist) == 9
     for ss in siglist:
-        assert 'Thermotoga' not in ss.name
+        assert "Thermotoga" not in ss.name
 
 
 def test_index_matches_search_with_picklist(runtmp):
     # test 'sourmash index' with picklists
-    gcf_sig_dir = utils.get_test_data('gather/')
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
-    metag_sig = utils.get_test_data('gather/combined.sig')
+    gcf_sig_dir = utils.get_test_data("gather/")
+    glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    picklist = utils.get_test_data("gather/thermotoga-picklist.csv")
+    metag_sig = utils.get_test_data("gather/combined.sig")
 
-    output_db = runtmp.output('thermo.sbt.zip')
+    output_db = runtmp.output("thermo.sbt.zip")
 
-    runtmp.sourmash('index', output_db, gcf_sig_dir, '-k', '21')
+    runtmp.sourmash("index", output_db, gcf_sig_dir, "-k", "21")
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
     # verify:
     siglist = list(sourmash.load_file_as_signatures(output_db))
-    assert len(siglist) > 3     # all signatures included...
+    assert len(siglist) > 3  # all signatures included...
 
     n_thermo = 0
     for ss in siglist:
-        if 'Thermotoga' in ss.name:
+        if "Thermotoga" in ss.name:
             n_thermo += 1
 
     assert n_thermo == 3
 
-    runtmp.sourmash('search', metag_sig, output_db, '--containment',
-                    '-k', '21', '--picklist', f"{picklist}:md5:md5")
+    runtmp.sourmash(
+        "search",
+        metag_sig,
+        output_db,
+        "--containment",
+        "-k",
+        "21",
+        "--picklist",
+        f"{picklist}:md5:md5",
+    )
 
     err = runtmp.last_result.err
     print(err)
@@ -5915,30 +7128,38 @@ def test_index_matches_search_with_picklist(runtmp):
 
 def test_index_matches_search_with_picklist_exclude(runtmp):
     # test 'sourmash index' with picklists - exclude
-    gcf_sig_dir = utils.get_test_data('gather/')
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
-    metag_sig = utils.get_test_data('gather/combined.sig')
+    gcf_sig_dir = utils.get_test_data("gather/")
+    glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    picklist = utils.get_test_data("gather/thermotoga-picklist.csv")
+    metag_sig = utils.get_test_data("gather/combined.sig")
 
-    output_db = runtmp.output('thermo-exclude.sbt.zip')
+    output_db = runtmp.output("thermo-exclude.sbt.zip")
 
-    runtmp.sourmash('index', output_db, gcf_sig_dir, '-k', '21')
+    runtmp.sourmash("index", output_db, gcf_sig_dir, "-k", "21")
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
     # verify:
     siglist = list(sourmash.load_file_as_signatures(output_db))
-    assert len(siglist) > 3     # all signatures included...
+    assert len(siglist) > 3  # all signatures included...
 
     n_thermo = 0
     for ss in siglist:
-        if 'Thermotoga' in ss.name:
+        if "Thermotoga" in ss.name:
             n_thermo += 1
 
     assert n_thermo == 3
 
-    runtmp.sourmash('search', metag_sig, output_db, '--containment',
-                    '-k', '21', '--picklist', f"{picklist}:md5:md5:exclude")
+    runtmp.sourmash(
+        "search",
+        metag_sig,
+        output_db,
+        "--containment",
+        "-k",
+        "21",
+        "--picklist",
+        f"{picklist}:md5:md5:exclude",
+    )
 
     err = runtmp.last_result.err
     print(err)
@@ -5956,12 +7177,11 @@ def test_index_matches_search_with_picklist_exclude(runtmp):
 
 def test_gather_with_prefetch_picklist(runtmp, linear_gather):
     # test 'gather' using a picklist taken from 'sourmash prefetch' output
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    metag_sig = utils.get_test_data('gather/combined.sig')
-    prefetch_csv = runtmp.output('prefetch-out.csv')
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    metag_sig = utils.get_test_data("gather/combined.sig")
+    prefetch_csv = runtmp.output("prefetch-out.csv")
 
-    runtmp.sourmash('prefetch', metag_sig, *gcf_sigs,
-                    '-k', '21', '-o', prefetch_csv)
+    runtmp.sourmash("prefetch", metag_sig, *gcf_sigs, "-k", "21", "-o", prefetch_csv)
 
     err = runtmp.last_result.err
     print(err)
@@ -5970,12 +7190,22 @@ def test_gather_with_prefetch_picklist(runtmp, linear_gather):
     print(out)
 
     assert "total of 12 matching signatures." in err
-    assert "of 1466 distinct query hashes, 1466 were found in matches above threshold." in err
+    assert (
+        "of 1466 distinct query hashes, 1466 were found in matches above threshold."
+        in err
+    )
 
     # now, do a gather with the results
-    runtmp.sourmash('gather', metag_sig, *gcf_sigs, linear_gather,
-                    '-k', '21', '--picklist',
-                    f'{prefetch_csv}:match_md5:md5short')
+    runtmp.sourmash(
+        "gather",
+        metag_sig,
+        *gcf_sigs,
+        linear_gather,
+        "-k",
+        "21",
+        "--picklist",
+        f"{prefetch_csv}:match_md5:md5short",
+    )
 
     err = runtmp.last_result.err
     print(err)
@@ -5993,12 +7223,11 @@ def test_gather_with_prefetch_picklist(runtmp, linear_gather):
 def test_gather_with_prefetch_picklist_2_prefetch(runtmp, linear_gather):
     # test 'gather' using a picklist taken from 'sourmash prefetch' output
     # using ::prefetch
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    metag_sig = utils.get_test_data('gather/combined.sig')
-    prefetch_csv = runtmp.output('prefetch-out.csv')
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    metag_sig = utils.get_test_data("gather/combined.sig")
+    prefetch_csv = runtmp.output("prefetch-out.csv")
 
-    runtmp.sourmash('prefetch', metag_sig, *gcf_sigs,
-                    '-k', '21', '-o', prefetch_csv)
+    runtmp.sourmash("prefetch", metag_sig, *gcf_sigs, "-k", "21", "-o", prefetch_csv)
 
     err = runtmp.last_result.err
     print(err)
@@ -6007,12 +7236,22 @@ def test_gather_with_prefetch_picklist_2_prefetch(runtmp, linear_gather):
     print(out)
 
     assert "total of 12 matching signatures." in err
-    assert "of 1466 distinct query hashes, 1466 were found in matches above threshold." in err
+    assert (
+        "of 1466 distinct query hashes, 1466 were found in matches above threshold."
+        in err
+    )
 
     # now, do a gather with the results
-    runtmp.sourmash('gather', metag_sig, *gcf_sigs, linear_gather,
-                    '-k', '21', '--picklist',
-                    f'{prefetch_csv}::prefetch')
+    runtmp.sourmash(
+        "gather",
+        metag_sig,
+        *gcf_sigs,
+        linear_gather,
+        "-k",
+        "21",
+        "--picklist",
+        f"{prefetch_csv}::prefetch",
+    )
 
     err = runtmp.last_result.err
     print(err)
@@ -6031,12 +7270,11 @@ def test_gather_with_prefetch_picklist_3_gather(runtmp, linear_gather):
     # test 'gather' using a picklist taken from 'sourmash gather' output,
     # using ::gather.
     # (this doesn't really do anything useful, but it's an ok test :)
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    metag_sig = utils.get_test_data('gather/combined.sig')
-    gather_csv = runtmp.output('gather-out.csv')
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    metag_sig = utils.get_test_data("gather/combined.sig")
+    gather_csv = runtmp.output("gather-out.csv")
 
-    runtmp.sourmash('gather', metag_sig, *gcf_sigs,
-                    '-k', '21', '-o', gather_csv)
+    runtmp.sourmash("gather", metag_sig, *gcf_sigs, "-k", "21", "-o", gather_csv)
 
     err = runtmp.last_result.err
     print(err)
@@ -6051,9 +7289,16 @@ def test_gather_with_prefetch_picklist_3_gather(runtmp, linear_gather):
     assert "1.9 Mbp       13.1%  100.0%    NC_000853.1 " in out
 
     # now, do another gather with the results
-    runtmp.sourmash('gather', metag_sig, *gcf_sigs, linear_gather,
-                    '-k', '21', '--picklist',
-                    f'{gather_csv}::gather')
+    runtmp.sourmash(
+        "gather",
+        metag_sig,
+        *gcf_sigs,
+        linear_gather,
+        "-k",
+        "21",
+        "--picklist",
+        f"{gather_csv}::gather",
+    )
 
     err = runtmp.last_result.err
     print(err)
@@ -6072,12 +7317,11 @@ def test_gather_with_prefetch_picklist_3_gather_badcol(runtmp):
     # test 'gather' using a picklist taken from 'sourmash gather' output,
     # using ::gather.
     # (this doesn't really do anything useful, but it's an ok test :)
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    metag_sig = utils.get_test_data('gather/combined.sig')
-    gather_csv = runtmp.output('gather-out.csv')
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    metag_sig = utils.get_test_data("gather/combined.sig")
+    gather_csv = runtmp.output("gather-out.csv")
 
-    runtmp.sourmash('gather', metag_sig, *gcf_sigs,
-                    '-k', '21', '-o', gather_csv)
+    runtmp.sourmash("gather", metag_sig, *gcf_sigs, "-k", "21", "-o", gather_csv)
 
     err = runtmp.last_result.err
     print(err)
@@ -6094,9 +7338,15 @@ def test_gather_with_prefetch_picklist_3_gather_badcol(runtmp):
     # now, do another gather with the results, but with a bad picklist
     # parameter
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('gather', metag_sig, *gcf_sigs,
-                        '-k', '21', '--picklist',
-                        f'{gather_csv}:FOO:gather')
+        runtmp.sourmash(
+            "gather",
+            metag_sig,
+            *gcf_sigs,
+            "-k",
+            "21",
+            "--picklist",
+            f"{gather_csv}:FOO:gather",
+        )
 
     err = runtmp.last_result.err
     print(err)
@@ -6112,11 +7362,11 @@ def test_gather_with_prefetch_picklist_4_manifest(runtmp, linear_gather):
     # test 'gather' using a picklist taken from 'sourmash sig manifest'
     # output, using ::manifest.
     # (this doesn't really do anything useful, but it's an ok test :)
-    gather_dir = utils.get_test_data('gather/')
-    metag_sig = utils.get_test_data('gather/combined.sig')
-    manifest_csv = runtmp.output('manifest.csv')
+    gather_dir = utils.get_test_data("gather/")
+    metag_sig = utils.get_test_data("gather/combined.sig")
+    manifest_csv = runtmp.output("manifest.csv")
 
-    runtmp.sourmash('sig', 'manifest', gather_dir, '-o', manifest_csv)
+    runtmp.sourmash("sig", "manifest", gather_dir, "-o", manifest_csv)
 
     err = runtmp.last_result.err
     print(err)
@@ -6125,9 +7375,16 @@ def test_gather_with_prefetch_picklist_4_manifest(runtmp, linear_gather):
     print(out)
 
     # now, do a gather on the manifest
-    runtmp.sourmash('gather', metag_sig, gather_dir, linear_gather,
-                    '-k', '21', '--picklist',
-                    f'{manifest_csv}::manifest')
+    runtmp.sourmash(
+        "gather",
+        metag_sig,
+        gather_dir,
+        linear_gather,
+        "-k",
+        "21",
+        "--picklist",
+        f"{manifest_csv}::manifest",
+    )
 
     err = runtmp.last_result.err
     print(err)
@@ -6146,11 +7403,11 @@ def test_gather_with_prefetch_picklist_4_manifest_excl(runtmp, linear_gather):
     # test 'gather' using a picklist taken from 'sourmash sig manifest'
     # output, using ::manifest.
     # (this doesn't really do anything useful, but it's an ok test :)
-    gather_dir = utils.get_test_data('gather/')
-    metag_sig = utils.get_test_data('gather/combined.sig')
-    manifest_csv = runtmp.output('manifest.csv')
+    gather_dir = utils.get_test_data("gather/")
+    metag_sig = utils.get_test_data("gather/combined.sig")
+    manifest_csv = runtmp.output("manifest.csv")
 
-    runtmp.sourmash('sig', 'manifest', gather_dir, '-o', manifest_csv)
+    runtmp.sourmash("sig", "manifest", gather_dir, "-o", manifest_csv)
 
     err = runtmp.last_result.err
     print(err)
@@ -6159,9 +7416,16 @@ def test_gather_with_prefetch_picklist_4_manifest_excl(runtmp, linear_gather):
     print(out)
 
     # now, do a gather on the manifest
-    runtmp.sourmash('gather', metag_sig, gather_dir, linear_gather,
-                    '-k', '21', '--picklist',
-                    f'{manifest_csv}::manifest:exclude')
+    runtmp.sourmash(
+        "gather",
+        metag_sig,
+        gather_dir,
+        linear_gather,
+        "-k",
+        "21",
+        "--picklist",
+        f"{manifest_csv}::manifest:exclude",
+    )
 
     err = runtmp.last_result.err
     print(err)
@@ -6176,12 +7440,13 @@ def test_gather_with_prefetch_picklist_4_manifest_excl(runtmp, linear_gather):
 def test_gather_with_prefetch_picklist_5_search(runtmp):
     # test 'gather' using a picklist taken from 'sourmash prefetch' output
     # using ::prefetch
-    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
-    metag_sig = utils.get_test_data('gather/combined.sig')
-    search_csv = runtmp.output('search-out.csv')
+    gcf_sigs = glob.glob(utils.get_test_data("gather/GCF*.sig"))
+    metag_sig = utils.get_test_data("gather/combined.sig")
+    search_csv = runtmp.output("search-out.csv")
 
-    runtmp.sourmash('search', '--containment', metag_sig, *gcf_sigs,
-                    '-k', '21', '-o', search_csv)
+    runtmp.sourmash(
+        "search", "--containment", metag_sig, *gcf_sigs, "-k", "21", "-o", search_csv
+    )
 
     err = runtmp.last_result.err
     print(err)
@@ -6193,9 +7458,15 @@ def test_gather_with_prefetch_picklist_5_search(runtmp):
     assert " 33.2%       NC_003198.1 Salmonella enterica subsp." in out
 
     # now, do a gather with the results
-    runtmp.sourmash('gather', metag_sig, *gcf_sigs,
-                    '-k', '21', '--picklist',
-                    f'{search_csv}::search')
+    runtmp.sourmash(
+        "gather",
+        metag_sig,
+        *gcf_sigs,
+        "-k",
+        "21",
+        "--picklist",
+        f"{search_csv}::search",
+    )
 
     err = runtmp.last_result.err
     print(err)
@@ -6212,17 +7483,17 @@ def test_gather_with_prefetch_picklist_5_search(runtmp):
 
 def test_gather_scaled_1(runtmp, linear_gather, prefetch_gather):
     # test gather on a sig indexed with scaled=1
-    inp = utils.get_test_data('short.fa')
-    outp = runtmp.output('out.sig')
+    inp = utils.get_test_data("short.fa")
+    outp = runtmp.output("out.sig")
 
     # prepare a signature with a scaled of 1
-    runtmp.sourmash('sketch', 'dna', '-p', 'scaled=1,k=31', inp, '-o', outp)
+    runtmp.sourmash("sketch", "dna", "-p", "scaled=1,k=31", inp, "-o", outp)
 
     # run with a low threshold
-    runtmp.sourmash('gather', outp, outp, '--threshold-bp', '0')
+    runtmp.sourmash("gather", outp, outp, "--threshold-bp", "0")
 
     print(runtmp.last_result.out)
-    print('---')
+    print("---")
     print(runtmp.last_result.err)
 
     assert "1.0 kbp      100.0%  100.0%" in runtmp.last_result.out
@@ -6231,25 +7502,25 @@ def test_gather_scaled_1(runtmp, linear_gather, prefetch_gather):
 
 def test_standalone_manifest_search(runtmp):
     # test loading/searching a manifest file from the command line.
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
-    dirname = runtmp.output('somedir')
+    dirname = runtmp.output("somedir")
     os.mkdir(dirname)
-    subdir = runtmp.output('somedir/subdir')
+    subdir = runtmp.output("somedir/subdir")
     os.mkdir(subdir)
-    shutil.copyfile(sig47, os.path.join(dirname, '47.fa.sig'))
-    shutil.copyfile(sig63, os.path.join(subdir, '63.fa.sig'))
+    shutil.copyfile(sig47, os.path.join(dirname, "47.fa.sig"))
+    shutil.copyfile(sig63, os.path.join(subdir, "63.fa.sig"))
 
     # for now, the output manifest must be within top level dir for
     # CLI stuff to work properly.
-    mf = os.path.join(dirname, 'mf.csv')
+    mf = os.path.join(dirname, "mf.csv")
 
     # build manifest...
-    runtmp.sourmash('sig', 'manifest', dirname, '-o', mf)
+    runtmp.sourmash("sig", "manifest", dirname, "-o", mf)
 
     # ...and now use for a search!
-    runtmp.sourmash('search', sig47, mf)
+    runtmp.sourmash("search", sig47, mf)
 
     out = runtmp.last_result.out
     print(out)
@@ -6261,95 +7532,100 @@ def test_standalone_manifest_search(runtmp):
 def test_standalone_manifest_search_fail(runtmp):
     # test loading/searching a manifest file from the command line; should
     # fail if manifest is not located within tld.
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
-    dirname = runtmp.output('somedir')
+    dirname = runtmp.output("somedir")
     os.mkdir(dirname)
-    subdir = runtmp.output('somedir/subdir')
+    subdir = runtmp.output("somedir/subdir")
     os.mkdir(subdir)
-    shutil.copyfile(sig47, os.path.join(dirname, '47.fa.sig'))
-    shutil.copyfile(sig63, os.path.join(subdir, '63.fa.sig'))
+    shutil.copyfile(sig47, os.path.join(dirname, "47.fa.sig"))
+    shutil.copyfile(sig63, os.path.join(subdir, "63.fa.sig"))
 
     # for now, the output manifest must be within top level dir for
     # CLI stuff to work properly. here we intentionally break this,
     # for testing purposes.
-    mf = runtmp.output('mf.csv')
+    mf = runtmp.output("mf.csv")
 
     # build manifest...
-    runtmp.sourmash('sig', 'manifest', dirname, '-o', mf)
+    runtmp.sourmash("sig", "manifest", dirname, "-o", mf)
 
     # ...and now use for a search!
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('search', sig47, mf)
+        runtmp.sourmash("search", sig47, mf)
 
 
 def test_search_ani_jaccard(runtmp):
     c = runtmp
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig4763 = utils.get_test_data('47+63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig4763 = utils.get_test_data("47+63.fa.sig")
 
-    c.run_sourmash('search', sig47, sig4763, '-o', 'xxx.csv')
+    c.run_sourmash("search", sig47, sig4763, "-o", "xxx.csv")
     print(c.last_result.status, c.last_result.out, c.last_result.err)
 
     search_result_names = SearchResult.search_write_cols
 
-    csv_file = c.output('xxx.csv')
+    csv_file = c.output("xxx.csv")
 
     with open(csv_file) as fp:
         reader = csv.DictReader(fp)
         row = next(reader)
         print(row)
         assert search_result_names == list(row.keys())
-        assert float(row['similarity']) == 0.6564798376870403
-        assert row['filename'].endswith('47+63.fa.sig')
-        assert row['md5'] == '491c0a81b2cfb0188c0d3b46837c2f42'
-        assert row['query_filename'].endswith('47.fa')
-        assert row['query_name'] == 'NC_009665.1 Shewanella baltica OS185, complete genome'
-        assert row['query_md5'] == '09a08691'
-        assert row['ani'] == "0.992530907924384"
+        assert float(row["similarity"]) == 0.6564798376870403
+        assert row["filename"].endswith("47+63.fa.sig")
+        assert row["md5"] == "491c0a81b2cfb0188c0d3b46837c2f42"
+        assert row["query_filename"].endswith("47.fa")
+        assert (
+            row["query_name"] == "NC_009665.1 Shewanella baltica OS185, complete genome"
+        )
+        assert row["query_md5"] == "09a08691"
+        assert row["ani"] == "0.992530907924384"
 
 
 def test_search_ani_jaccard_error_too_high(runtmp):
     c = runtmp
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    c.run_sourmash('sketch', 'dna', '-p', 'k=31,scaled=1', testdata1, testdata2)
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    c.run_sourmash("sketch", "dna", "-p", "k=31,scaled=1", testdata1, testdata2)
 
-    c.run_sourmash('search', 'short.fa.sig', 'short2.fa.sig', '-o', 'xxx.csv')
+    c.run_sourmash("search", "short.fa.sig", "short2.fa.sig", "-o", "xxx.csv")
     print(c.last_result.status, c.last_result.out, c.last_result.err)
 
     search_result_names = SearchResult.search_write_cols
 
-    csv_file = c.output('xxx.csv')
+    csv_file = c.output("xxx.csv")
 
     with open(csv_file) as fp:
         reader = csv.DictReader(fp)
         row = next(reader)
         print(row)
         assert search_result_names == list(row.keys())
-        assert float(row['similarity']) == 0.9288577154308617
-        assert row['filename'].endswith('short2.fa.sig')
-        assert row['md5'] == 'bf752903d635b1eb83c53fe4aae951db'
-        assert row['query_filename'].endswith('short.fa')
-        assert row['query_name'] == ''
-        assert row['query_md5'] == '9191284a'
-        #assert row['ani'] == "0.9987884602947684"
-        assert row['ani'] == ''
+        assert float(row["similarity"]) == 0.9288577154308617
+        assert row["filename"].endswith("short2.fa.sig")
+        assert row["md5"] == "bf752903d635b1eb83c53fe4aae951db"
+        assert row["query_filename"].endswith("short.fa")
+        assert row["query_name"] == ""
+        assert row["query_md5"] == "9191284a"
+        # assert row['ani'] == "0.9987884602947684"
+        assert row["ani"] == ""
 
-    assert "WARNING: Jaccard estimation for at least one of these comparisons is likely inaccurate. Could not estimate ANI for these comparisons." in c.last_result.err
+    assert (
+        "WARNING: Jaccard estimation for at least one of these comparisons is likely inaccurate. Could not estimate ANI for these comparisons."
+        in c.last_result.err
+    )
 
 
 def test_searchabund_no_ani(runtmp):
     c = runtmp
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    c.run_sourmash('sketch', 'dna', '-p', 'k=31,scaled=10,abund', testdata1, testdata2)
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    c.run_sourmash("sketch", "dna", "-p", "k=31,scaled=10,abund", testdata1, testdata2)
 
-    c.run_sourmash('search', 'short.fa.sig', 'short2.fa.sig', '-o', 'xxx.csv')
+    c.run_sourmash("search", "short.fa.sig", "short2.fa.sig", "-o", "xxx.csv")
     print(c.last_result.status, c.last_result.out, c.last_result.err)
 
-    csv_file = c.output('xxx.csv')
+    csv_file = c.output("xxx.csv")
     search_result_names = SearchResult.search_write_cols
 
     with open(csv_file) as fp:
@@ -6357,158 +7633,178 @@ def test_searchabund_no_ani(runtmp):
         row = next(reader)
         print(row)
         assert search_result_names == list(row.keys())
-        assert float(row['similarity']) == 0.8224046424612483
-        assert row['md5'] == 'c9d5a795eeaaf58e286fb299133e1938'
-        assert row['filename'].endswith('short2.fa.sig')
-        assert row['query_filename'].endswith('short.fa')
-        assert row['query_name'] == ''
-        assert row['query_md5'] == 'b5cc464c'
-        assert row['ani'] == "" # do we want empty column to appear??
+        assert float(row["similarity"]) == 0.8224046424612483
+        assert row["md5"] == "c9d5a795eeaaf58e286fb299133e1938"
+        assert row["filename"].endswith("short2.fa.sig")
+        assert row["query_filename"].endswith("short.fa")
+        assert row["query_name"] == ""
+        assert row["query_md5"] == "b5cc464c"
+        assert row["ani"] == ""  # do we want empty column to appear??
 
 
 def test_search_ani_containment(runtmp):
     c = runtmp
-    testdata1 = utils.get_test_data('2+63.fa.sig')
-    testdata2 = utils.get_test_data('47+63.fa.sig')
+    testdata1 = utils.get_test_data("2+63.fa.sig")
+    testdata2 = utils.get_test_data("47+63.fa.sig")
 
-    c.run_sourmash('search', '--containment', testdata1, testdata2, '-o', 'xxx.csv')
+    c.run_sourmash("search", "--containment", testdata1, testdata2, "-o", "xxx.csv")
     print(c.last_result.status, c.last_result.out, c.last_result.err)
 
     search_result_names = SearchResult.search_write_cols
 
-    csv_file = c.output('xxx.csv')
+    csv_file = c.output("xxx.csv")
 
     with open(csv_file) as fp:
         reader = csv.DictReader(fp)
         row = next(reader)
         print(row)
         assert search_result_names == list(row.keys())
-        assert float(row['similarity']) == 0.6597808288197506 
-        assert row['filename'].endswith('47+63.fa.sig')
-        assert row['md5'] == '491c0a81b2cfb0188c0d3b46837c2f42'
-        assert row['query_name'] == ''
-        assert row['query_md5'] == '832a45e8'
-        assert row['ani'] == "0.9866751346467802"
+        assert float(row["similarity"]) == 0.6597808288197506
+        assert row["filename"].endswith("47+63.fa.sig")
+        assert row["md5"] == "491c0a81b2cfb0188c0d3b46837c2f42"
+        assert row["query_name"] == ""
+        assert row["query_md5"] == "832a45e8"
+        assert row["ani"] == "0.9866751346467802"
 
     # search other direction
-    c.run_sourmash('search', '--containment', testdata2, testdata1, '-o', 'xxxx.csv')
+    c.run_sourmash("search", "--containment", testdata2, testdata1, "-o", "xxxx.csv")
     print(c.last_result.status, c.last_result.out, c.last_result.err)
 
-    csv_file = c.output('xxxx.csv')
+    csv_file = c.output("xxxx.csv")
 
     with open(csv_file) as fp:
         reader = csv.DictReader(fp)
         row = next(reader)
         print(row)
         assert search_result_names == list(row.keys())
-        assert float(row['similarity']) == 0.6642150646715699
-        assert row['filename'].endswith('2+63.fa.sig')
-        assert row['md5'] == '832a45e85bdca6eaef5d73047e3e6321'
-        assert row['query_name'] == ''
-        assert row['query_md5'] == '491c0a81'
-        assert row['ani'] == "0.9868883523107224"
+        assert float(row["similarity"]) == 0.6642150646715699
+        assert row["filename"].endswith("2+63.fa.sig")
+        assert row["md5"] == "832a45e85bdca6eaef5d73047e3e6321"
+        assert row["query_name"] == ""
+        assert row["query_md5"] == "491c0a81"
+        assert row["ani"] == "0.9868883523107224"
 
 
 def test_search_ani_containment_asymmetry(runtmp):
     # test contained_by asymmetries, viz #2215
-    query_sig = utils.get_test_data('47.fa.sig')
-    merged_sig = utils.get_test_data('47-63-merge.sig')
+    query_sig = utils.get_test_data("47.fa.sig")
+    merged_sig = utils.get_test_data("47-63-merge.sig")
 
-    runtmp.sourmash('search', query_sig, merged_sig, '-o',
-                    'query-in-merged.csv', '--containment')
-    runtmp.sourmash('search', merged_sig, query_sig, '-o',
-                    'merged-in-query.csv', '--containment')
+    runtmp.sourmash(
+        "search", query_sig, merged_sig, "-o", "query-in-merged.csv", "--containment"
+    )
+    runtmp.sourmash(
+        "search", merged_sig, query_sig, "-o", "merged-in-query.csv", "--containment"
+    )
 
-    with sourmash_args.FileInputCSV(runtmp.output('query-in-merged.csv')) as r:
+    with sourmash_args.FileInputCSV(runtmp.output("query-in-merged.csv")) as r:
         query_in_merged = list(r)[0]
 
-    with sourmash_args.FileInputCSV(runtmp.output('merged-in-query.csv')) as r:
+    with sourmash_args.FileInputCSV(runtmp.output("merged-in-query.csv")) as r:
         merged_in_query = list(r)[0]
 
-    assert query_in_merged['ani'] == '1.0'
-    assert merged_in_query['ani'] == '0.9865155060423993'
+    assert query_in_merged["ani"] == "1.0"
+    assert merged_in_query["ani"] == "0.9865155060423993"
 
 
 def test_search_ani_containment_fail(runtmp):
     c = runtmp
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    c.run_sourmash('sketch', 'dna', '-p', 'k=31,scaled=10', testdata1, testdata2)
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    c.run_sourmash("sketch", "dna", "-p", "k=31,scaled=10", testdata1, testdata2)
 
-    c.run_sourmash('search', '--containment', 'short.fa.sig', 'short2.fa.sig', '-o', 'xxx.csv')
+    c.run_sourmash(
+        "search", "--containment", "short.fa.sig", "short2.fa.sig", "-o", "xxx.csv"
+    )
     print(c.last_result.status, c.last_result.out, c.last_result.err)
 
     search_result_names = SearchResult.search_write_cols
-    csv_file = c.output('xxx.csv')
+    csv_file = c.output("xxx.csv")
 
     with open(csv_file) as fp:
         reader = csv.DictReader(fp)
         row = next(reader)
         print(row)
         assert search_result_names == list(row.keys())
-        assert round(float(row['similarity']), 3) == 0.967
-        assert row['ani'] == "0.998906999319701"
+        assert round(float(row["similarity"]), 3) == 0.967
+        assert row["ani"] == "0.998906999319701"
     # With PR #2268, this error message should not appear
-    #assert "WARNING: size estimation for at least one of these sketches may be inaccurate. ANI values will not be reported for these comparisons." in c.last_result.err
-    
+    # assert "WARNING: size estimation for at least one of these sketches may be inaccurate. ANI values will not be reported for these comparisons." in c.last_result.err
+
 
 def test_search_ani_containment_estimate_ci(runtmp):
     # test ANI confidence intervals, based on (asymmetric) containment
 
     c = runtmp
-    testdata1 = utils.get_test_data('2+63.fa.sig')
-    testdata2 = utils.get_test_data('47+63.fa.sig')
-
-    c.run_sourmash('search', '--containment', testdata1, testdata2, '-o', 'xxx.csv', '--estimate-ani-ci')
+    testdata1 = utils.get_test_data("2+63.fa.sig")
+    testdata2 = utils.get_test_data("47+63.fa.sig")
+
+    c.run_sourmash(
+        "search",
+        "--containment",
+        testdata1,
+        testdata2,
+        "-o",
+        "xxx.csv",
+        "--estimate-ani-ci",
+    )
     print(c.last_result.status, c.last_result.out, c.last_result.err)
 
     search_result_names_ci = SearchResult.search_write_cols_ci
-    csv_file = c.output('xxx.csv')
+    csv_file = c.output("xxx.csv")
 
     with open(csv_file) as fp:
         reader = csv.DictReader(fp)
         row = next(reader)
         print(row)
         assert search_result_names_ci == list(row.keys())
-        assert float(row['similarity']) == 0.6597808288197506 
-        assert row['filename'].endswith('47+63.fa.sig')
-        assert row['md5'] == '491c0a81b2cfb0188c0d3b46837c2f42'
-        assert row['query_name'] == ''
-        assert row['query_md5'] == '832a45e8'
-        assert row['ani'] == "0.9866751346467802"
-        assert row['ani_low'] == "0.9861576758035308" #"0.9861559138341189"
-        assert row['ani_high'] == "0.9871770716451368" #"0.9871787293232042"
+        assert float(row["similarity"]) == 0.6597808288197506
+        assert row["filename"].endswith("47+63.fa.sig")
+        assert row["md5"] == "491c0a81b2cfb0188c0d3b46837c2f42"
+        assert row["query_name"] == ""
+        assert row["query_md5"] == "832a45e8"
+        assert row["ani"] == "0.9866751346467802"
+        assert row["ani_low"] == "0.9861576758035308"  # "0.9861559138341189"
+        assert row["ani_high"] == "0.9871770716451368"  # "0.9871787293232042"
 
     # search other direction
-    c.run_sourmash('search', '--containment', testdata2, testdata1, '-o', 'xxxx.csv', '--estimate-ani-ci')
+    c.run_sourmash(
+        "search",
+        "--containment",
+        testdata2,
+        testdata1,
+        "-o",
+        "xxxx.csv",
+        "--estimate-ani-ci",
+    )
     print(c.last_result.status, c.last_result.out, c.last_result.err)
 
-    csv_file = c.output('xxxx.csv')
+    csv_file = c.output("xxxx.csv")
 
     with open(csv_file) as fp:
         reader = csv.DictReader(fp)
         row = next(reader)
         print(row)
         assert search_result_names_ci == list(row.keys())
-        assert float(row['similarity']) == 0.6642150646715699
-        assert row['filename'].endswith('2+63.fa.sig')
-        assert row['md5'] == '832a45e85bdca6eaef5d73047e3e6321'
-        assert row['query_name'] == ''
-        assert row['query_md5'] == '491c0a81'
-        assert row['ani'] == "0.9868883523107224"
-        assert row['ani_low'] == "0.986374049720872" #"0.9863757952722036"
-        assert row['ani_high'] == "0.9873870188726516" #"0.9873853776786775"
+        assert float(row["similarity"]) == 0.6642150646715699
+        assert row["filename"].endswith("2+63.fa.sig")
+        assert row["md5"] == "832a45e85bdca6eaef5d73047e3e6321"
+        assert row["query_name"] == ""
+        assert row["query_md5"] == "491c0a81"
+        assert row["ani"] == "0.9868883523107224"
+        assert row["ani_low"] == "0.986374049720872"  # "0.9863757952722036"
+        assert row["ani_high"] == "0.9873870188726516"  # "0.9873853776786775"
 
 
 def test_search_ani_max_containment(runtmp):
     c = runtmp
-    testdata1 = utils.get_test_data('2+63.fa.sig')
-    testdata2 = utils.get_test_data('47+63.fa.sig')
+    testdata1 = utils.get_test_data("2+63.fa.sig")
+    testdata2 = utils.get_test_data("47+63.fa.sig")
 
-    c.run_sourmash('search', '--max-containment', testdata1, testdata2, '-o', 'xxx.csv')
+    c.run_sourmash("search", "--max-containment", testdata1, testdata2, "-o", "xxx.csv")
     print(c.last_result.status, c.last_result.out, c.last_result.err)
 
-    csv_file = c.output('xxx.csv')
+    csv_file = c.output("xxx.csv")
     search_result_names = SearchResult.search_write_cols
 
     with open(csv_file) as fp:
@@ -6516,25 +7812,33 @@ def test_search_ani_max_containment(runtmp):
         row = next(reader)
         print(row)
         assert search_result_names == list(row.keys())
-        assert float(row['similarity']) == 0.6642150646715699
-        assert row['filename'].endswith('47+63.fa.sig')
-        assert row['md5'] == '491c0a81b2cfb0188c0d3b46837c2f42'
-        assert row['query_name'] == ''
-        assert row['query_md5'] == '832a45e8'
-        assert row['ani'] == "0.9868883523107224"
+        assert float(row["similarity"]) == 0.6642150646715699
+        assert row["filename"].endswith("47+63.fa.sig")
+        assert row["md5"] == "491c0a81b2cfb0188c0d3b46837c2f42"
+        assert row["query_name"] == ""
+        assert row["query_md5"] == "832a45e8"
+        assert row["ani"] == "0.9868883523107224"
 
 
 def test_search_ani_max_containment_estimate_ci(runtmp):
     # test ANI confidence intervals, based on (symmetric) max-containment
 
     c = runtmp
-    testdata1 = utils.get_test_data('2+63.fa.sig')
-    testdata2 = utils.get_test_data('47+63.fa.sig')
-
-    c.run_sourmash('search', '--max-containment', testdata1, testdata2, '-o', 'xxx.csv', '--estimate-ani-ci')
+    testdata1 = utils.get_test_data("2+63.fa.sig")
+    testdata2 = utils.get_test_data("47+63.fa.sig")
+
+    c.run_sourmash(
+        "search",
+        "--max-containment",
+        testdata1,
+        testdata2,
+        "-o",
+        "xxx.csv",
+        "--estimate-ani-ci",
+    )
     print(c.last_result.status, c.last_result.out, c.last_result.err)
 
-    csv_file = c.output('xxx.csv')
+    csv_file = c.output("xxx.csv")
     search_result_names_ci = SearchResult.search_write_cols_ci
 
     with open(csv_file) as fp:
@@ -6542,32 +7846,32 @@ def test_search_ani_max_containment_estimate_ci(runtmp):
         row = next(reader)
         print(row)
         assert search_result_names_ci == list(row.keys())
-        assert float(row['similarity']) == 0.6642150646715699
-        assert row['filename'].endswith('47+63.fa.sig')
-        assert row['md5'] == '491c0a81b2cfb0188c0d3b46837c2f42'
-        assert row['query_name'] == ''
-        assert row['query_md5'] == '832a45e8'
-        assert row['ani'] == "0.9868883523107224"
-        assert row['ani_low'] == "0.986374049720872"
-        assert row['ani_high'] == "0.9873870188726516"
+        assert float(row["similarity"]) == 0.6642150646715699
+        assert row["filename"].endswith("47+63.fa.sig")
+        assert row["md5"] == "491c0a81b2cfb0188c0d3b46837c2f42"
+        assert row["query_name"] == ""
+        assert row["query_md5"] == "832a45e8"
+        assert row["ani"] == "0.9868883523107224"
+        assert row["ani_low"] == "0.986374049720872"
+        assert row["ani_high"] == "0.9873870188726516"
 
 
 def test_search_jaccard_ani_downsample(runtmp):
     c = runtmp
 
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig4763 = utils.get_test_data('47+63.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig4763 = utils.get_test_data("47+63.fa.sig")
     ss47 = sourmash.load_one_signature(sig47)
     ss4763 = sourmash.load_one_signature(sig4763)
     print(f"SCALED: sig1: {ss47.minhash.scaled}, sig2: {ss4763.minhash.scaled}")
 
-    c.run_sourmash('search', sig47, sig4763, '-o', 'xxx.csv')
+    c.run_sourmash("search", sig47, sig4763, "-o", "xxx.csv")
     print(c.last_result.status, c.last_result.out, c.last_result.err)
 
     search_result_names = SearchResult.search_write_cols
     search_result_names_ci = SearchResult.search_write_cols_ci
 
-    csv_file = c.output('xxx.csv')
+    csv_file = c.output("xxx.csv")
 
     with open(csv_file) as fp:
         reader = csv.DictReader(fp)
@@ -6575,50 +7879,61 @@ def test_search_jaccard_ani_downsample(runtmp):
         print(row)
         assert search_result_names == list(row.keys())
         assert search_result_names_ci != list(row.keys())
-        assert float(row['similarity']) == 0.6564798376870403
-        assert row['filename'].endswith('47+63.fa.sig')
-        assert row['md5'] == '491c0a81b2cfb0188c0d3b46837c2f42'
-        assert row['query_filename'].endswith('47.fa')
-        assert row['query_name'] == 'NC_009665.1 Shewanella baltica OS185, complete genome'
-        assert row['query_md5'] == '09a08691'
-        assert row['ani'] == "0.992530907924384"
+        assert float(row["similarity"]) == 0.6564798376870403
+        assert row["filename"].endswith("47+63.fa.sig")
+        assert row["md5"] == "491c0a81b2cfb0188c0d3b46837c2f42"
+        assert row["query_filename"].endswith("47.fa")
+        assert (
+            row["query_name"] == "NC_009665.1 Shewanella baltica OS185, complete genome"
+        )
+        assert row["query_md5"] == "09a08691"
+        assert row["ani"] == "0.992530907924384"
 
     # downsample one and check similarity and ANI
     ds_sig47 = c.output("ds_sig47.sig")
-    c.run_sourmash('sig', "downsample", sig47, "--scaled", "2000", '-o', ds_sig47)
-    c.run_sourmash('search', ds_sig47, sig4763, '-o', 'xxx.csv')
-#
-    csv_file = c.output('xxx.csv')
+    c.run_sourmash("sig", "downsample", sig47, "--scaled", "2000", "-o", ds_sig47)
+    c.run_sourmash("search", ds_sig47, sig4763, "-o", "xxx.csv")
+    #
+    csv_file = c.output("xxx.csv")
     with open(csv_file) as fp:
         reader = csv.DictReader(fp)
         row = next(reader)
         print(row)
-        assert round(float(row['similarity']), 3) == round(0.6634517766497462, 3)
-        assert round(float(row['ani']), 3) == 0.993
+        assert round(float(row["similarity"]), 3) == round(0.6634517766497462, 3)
+        assert round(float(row["ani"]), 3) == 0.993
 
-    #downsample manually and assert same ANI
+    # downsample manually and assert same ANI
     ss47_ds = signature.load_one_signature(ds_sig47)
     print("SCALED:", ss47_ds.minhash.scaled, ss4763.minhash.scaled)
     ani_info = ss47_ds.jaccard_ani(ss4763, downsample=True)
     print(ani_info)
-    assert round(ani_info.ani,3) == 0.993
+    assert round(ani_info.ani, 3) == 0.993
     assert (1 - round(ani_info.dist, 3)) == 0.993
 
 
 def test_gather_ani_csv(runtmp, linear_gather, prefetch_gather):
-    testdata1 = utils.get_test_data('63.fa.sig')
-    testdata2 = utils.get_test_data('47+63.fa.sig')
+    testdata1 = utils.get_test_data("63.fa.sig")
+    testdata2 = utils.get_test_data("47+63.fa.sig")
 
-    runtmp.sourmash('index', '-k', '31', 'zzz', testdata2)
+    runtmp.sourmash("index", "-k", "31", "zzz", testdata2)
 
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
 
-    runtmp.sourmash('gather', testdata1, 'zzz', '-o', 'foo.csv', '--threshold-bp=1', linear_gather, prefetch_gather)
+    runtmp.sourmash(
+        "gather",
+        testdata1,
+        "zzz",
+        "-o",
+        "foo.csv",
+        "--threshold-bp=1",
+        linear_gather,
+        prefetch_gather,
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    csv_file = runtmp.output('foo.csv')
+    csv_file = runtmp.output("foo.csv")
     gather_result_names = GatherResult.gather_write_cols
     gather_result_names_ci = GatherResult.gather_write_cols_ci
 
@@ -6628,42 +7943,63 @@ def test_gather_ani_csv(runtmp, linear_gather, prefetch_gather):
         print(row)
         assert gather_result_names == list(row.keys())
         assert gather_result_names_ci != list(row.keys())
-        assert float(row['intersect_bp']) == 5238000.0
-        assert float(row['unique_intersect_bp']) == 5238000.0
-        assert float(row['remaining_bp']) == 0.0
-        assert float(row['f_orig_query']) == 1.0
-        assert float(row['f_unique_to_query']) == 1.0
-        assert float(row['f_match']) == 0.6642150646715699
-        assert row['filename'] == 'zzz'
-        assert row['md5'] == '491c0a81b2cfb0188c0d3b46837c2f42'
-        assert row['gather_result_rank'] == '0'
-        assert row['query_md5'] == '38729c63'
-        assert row['query_bp'] == '5238000'
-        assert row['query_containment_ani']== '1.0'
-        assert round(float(row['match_containment_ani']), 3) == 0.987
-        assert round(float(row['average_containment_ani']), 3) ==  0.993
-        assert round(float(row['max_containment_ani']),3) == 1.0
-        assert row['potential_false_negative'] == 'False'
+        assert float(row["intersect_bp"]) == 5238000.0
+        assert float(row["unique_intersect_bp"]) == 5238000.0
+        assert float(row["remaining_bp"]) == 0.0
+        assert float(row["f_orig_query"]) == 1.0
+        assert float(row["f_unique_to_query"]) == 1.0
+        assert float(row["f_match"]) == 0.6642150646715699
+        assert row["filename"] == "zzz"
+        assert row["md5"] == "491c0a81b2cfb0188c0d3b46837c2f42"
+        assert row["gather_result_rank"] == "0"
+        assert row["query_md5"] == "38729c63"
+        assert row["query_bp"] == "5238000"
+        assert row["query_containment_ani"] == "1.0"
+        assert round(float(row["match_containment_ani"]), 3) == 0.987
+        assert round(float(row["average_containment_ani"]), 3) == 0.993
+        assert round(float(row["max_containment_ani"]), 3) == 1.0
+        assert row["potential_false_negative"] == "False"
 
 
 def test_gather_ani_csv_estimate_ci(runtmp, linear_gather, prefetch_gather):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-
-    runtmp.sourmash('sketch','dna','-p','scaled=10', '--name-from-first', testdata1, testdata2)
-
-    runtmp.sourmash('sketch','dna','-p','scaled=10', '-o', 'query.fa.sig', '--name-from-first', testdata2)
-
-    runtmp.sourmash('index', '-k', '31', 'zzz', 'short.fa.sig', 'short2.fa.sig')
-
-    assert os.path.exists(runtmp.output('zzz.sbt.zip'))
-
-    runtmp.sourmash('gather', 'query.fa.sig', 'zzz', '-o', 'foo.csv', '--threshold-bp=1', '--estimate-ani-ci', linear_gather, prefetch_gather)
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+
+    runtmp.sourmash(
+        "sketch", "dna", "-p", "scaled=10", "--name-from-first", testdata1, testdata2
+    )
+
+    runtmp.sourmash(
+        "sketch",
+        "dna",
+        "-p",
+        "scaled=10",
+        "-o",
+        "query.fa.sig",
+        "--name-from-first",
+        testdata2,
+    )
+
+    runtmp.sourmash("index", "-k", "31", "zzz", "short.fa.sig", "short2.fa.sig")
+
+    assert os.path.exists(runtmp.output("zzz.sbt.zip"))
+
+    runtmp.sourmash(
+        "gather",
+        "query.fa.sig",
+        "zzz",
+        "-o",
+        "foo.csv",
+        "--threshold-bp=1",
+        "--estimate-ani-ci",
+        linear_gather,
+        prefetch_gather,
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    csv_file = runtmp.output('foo.csv')
+    csv_file = runtmp.output("foo.csv")
 
     gather_result_names = GatherResult.gather_write_cols_ci
 
@@ -6672,29 +8008,29 @@ def test_gather_ani_csv_estimate_ci(runtmp, linear_gather, prefetch_gather):
         row = next(reader)
         print(row)
         assert gather_result_names == list(row.keys())
-        assert float(row['intersect_bp']) == 910
-        assert float(row['unique_intersect_bp']) == 910
-        assert float(row['remaining_bp']) == 0
-        assert float(row['f_orig_query']) == 1.0
-        assert float(row['f_unique_to_query']) == 1.0
-        assert float(row['f_match']) == 1.0
-        assert row['filename'] == 'zzz'
-        assert row['name'] == 'tr1 4'
-        assert row['md5'] == 'c9d5a795eeaaf58e286fb299133e1938'
-        assert row['gather_result_rank'] == '0'
-        assert row['query_filename'].endswith('short2.fa')
-        assert row['query_name'] == 'tr1 4'
-        assert row['query_md5'] == 'c9d5a795'
-        assert row['query_bp'] == '910'
-        assert row['query_containment_ani'] == '1.0'
-        assert row['query_containment_ani_low'] == '1.0'
-        assert row['query_containment_ani_high'] == '1.0'
-        assert row['match_containment_ani'] == '1.0'
-        assert row['match_containment_ani_low'] == '1.0'
-        assert row['match_containment_ani_high'] == '1.0'
-        assert row['average_containment_ani'] == '1.0'
-        assert row['max_containment_ani'] == '1.0'
-        assert row['potential_false_negative'] == 'False'
+        assert float(row["intersect_bp"]) == 910
+        assert float(row["unique_intersect_bp"]) == 910
+        assert float(row["remaining_bp"]) == 0
+        assert float(row["f_orig_query"]) == 1.0
+        assert float(row["f_unique_to_query"]) == 1.0
+        assert float(row["f_match"]) == 1.0
+        assert row["filename"] == "zzz"
+        assert row["name"] == "tr1 4"
+        assert row["md5"] == "c9d5a795eeaaf58e286fb299133e1938"
+        assert row["gather_result_rank"] == "0"
+        assert row["query_filename"].endswith("short2.fa")
+        assert row["query_name"] == "tr1 4"
+        assert row["query_md5"] == "c9d5a795"
+        assert row["query_bp"] == "910"
+        assert row["query_containment_ani"] == "1.0"
+        assert row["query_containment_ani_low"] == "1.0"
+        assert row["query_containment_ani_high"] == "1.0"
+        assert row["match_containment_ani"] == "1.0"
+        assert row["match_containment_ani_low"] == "1.0"
+        assert row["match_containment_ani_high"] == "1.0"
+        assert row["average_containment_ani"] == "1.0"
+        assert row["max_containment_ani"] == "1.0"
+        assert row["potential_false_negative"] == "False"
 
 
 def test_compare_containment_ani(runtmp):
@@ -6704,12 +8040,21 @@ def test_compare_containment_ani(runtmp):
     sigfiles = ["2.fa.sig", "2+63.fa.sig", "47.fa.sig", "63.fa.sig"]
     testdata_sigs = [utils.get_test_data(c) for c in sigfiles]
 
-    c.run_sourmash('compare', '--containment', '-k', '31',
-                   '--ani', '--csv', 'output.csv', *testdata_sigs)
+    c.run_sourmash(
+        "compare",
+        "--containment",
+        "-k",
+        "31",
+        "--ani",
+        "--csv",
+        "output.csv",
+        *testdata_sigs,
+    )
 
     # load the matrix output
-    mat, idx_to_sig = _load_compare_matrix_and_sigs(c.output('output.csv'),
-                                                    testdata_sigs)
+    mat, idx_to_sig = _load_compare_matrix_and_sigs(
+        c.output("output.csv"), testdata_sigs
+    )
 
     # check explicit containment against output of compare
     for i in range(len(idx_to_sig)):
@@ -6728,11 +8073,14 @@ def test_compare_containment_ani(runtmp):
                     containment_ani = 0.0
                 mat_val = round(mat[i][j], 3)
 
-                assert containment_ani == mat_val #, (i, j)
+                assert containment_ani == mat_val  # , (i, j)
 
     print(c.last_result.err)
     print(c.last_result.out)
-    assert "WARNING: Some of these sketches may have no hashes in common based on chance alone (false negatives). Consider decreasing your scaled value to prevent this." in c.last_result.err
+    assert (
+        "WARNING: Some of these sketches may have no hashes in common based on chance alone (false negatives). Consider decreasing your scaled value to prevent this."
+        in c.last_result.err
+    )
 
 
 def test_compare_containment_ani_asymmetry(runtmp):
@@ -6744,11 +8092,19 @@ def test_compare_containment_ani_asymmetry(runtmp):
     sigfiles = ["47.fa.sig", "47-63-merge.sig"]
     testdata_sigs = [utils.get_test_data(c) for c in sigfiles]
 
-    c.run_sourmash('compare', '--containment', '-k', '31',
-                   '--ani', '--csv', 'output.csv', *testdata_sigs)
+    c.run_sourmash(
+        "compare",
+        "--containment",
+        "-k",
+        "31",
+        "--ani",
+        "--csv",
+        "output.csv",
+        *testdata_sigs,
+    )
 
     # load the matrix output of compare --containment --estimate-ani
-    with open(c.output('output.csv'), 'rt') as fp:
+    with open(c.output("output.csv")) as fp:
         r = iter(csv.reader(fp))
         headers = next(r)
 
@@ -6760,7 +8116,7 @@ def test_compare_containment_ani_asymmetry(runtmp):
         print(mat)
 
     # load in all the input signatures
-    idx_to_sig = dict()
+    idx_to_sig = {}
     for idx, filename in enumerate(testdata_sigs):
         ss = sourmash.load_one_signature(filename, ksize=31)
         idx_to_sig[idx] = ss
@@ -6782,7 +8138,7 @@ def test_compare_containment_ani_asymmetry(runtmp):
                     containment_ani = 0.0
                 mat_val = round(mat[i][j], 6)
 
-                assert containment_ani == mat_val #, (i, j)
+                assert containment_ani == mat_val  # , (i, j)
 
     print(c.last_result.err)
     print(c.last_result.out)
@@ -6794,12 +8150,21 @@ def test_compare_jaccard_ani(runtmp):
     sigfiles = ["47.fa.sig", "47-63-merge.sig"]
     testdata_sigs = [utils.get_test_data(c) for c in sigfiles]
 
-    c.run_sourmash('compare', '--containment', '-k', '31',
-                   '--ani', '--csv', 'output.csv', *testdata_sigs)
+    c.run_sourmash(
+        "compare",
+        "--containment",
+        "-k",
+        "31",
+        "--ani",
+        "--csv",
+        "output.csv",
+        *testdata_sigs,
+    )
 
     # load the matrix output
-    mat, idx_to_sig = _load_compare_matrix_and_sigs(c.output('output.csv'),
-                                                    testdata_sigs)
+    mat, idx_to_sig = _load_compare_matrix_and_sigs(
+        c.output("output.csv"), testdata_sigs
+    )
 
     # check explicit containment against output of compare
     for i in range(len(idx_to_sig)):
@@ -6818,7 +8183,7 @@ def test_compare_jaccard_ani(runtmp):
                     containment_ani = 0.0
                 mat_val = round(mat[i][j], 6)
 
-                assert containment_ani == mat_val #, (i, j)
+                assert containment_ani == mat_val  # , (i, j)
 
     print(c.last_result.err)
     print(c.last_result.out)
@@ -6831,8 +8196,7 @@ def test_compare_jaccard_protein_parallel_ani_bug(runtmp):
 
     sigfile = utils.get_test_data("prot/protein.zip")
 
-    c.run_sourmash('compare', '--ani', '-p', '2', '--csv', 'output.csv',
-                   sigfile)
+    c.run_sourmash("compare", "--ani", "-p", "2", "--csv", "output.csv", sigfile)
 
     print(c.last_result.err)
     print(c.last_result.out)
@@ -6846,12 +8210,22 @@ def test_compare_containment_ani_asymmetry_distance(runtmp):
     sigfiles = ["47.fa.sig", "47-63-merge.sig"]
     testdata_sigs = [utils.get_test_data(c) for c in sigfiles]
 
-    c.run_sourmash('compare', '--containment', '-k', '31', '--distance-matrix',
-                   '--ani', '--csv', 'output.csv', *testdata_sigs)
+    c.run_sourmash(
+        "compare",
+        "--containment",
+        "-k",
+        "31",
+        "--distance-matrix",
+        "--ani",
+        "--csv",
+        "output.csv",
+        *testdata_sigs,
+    )
 
     # load the matrix output
-    mat, idx_to_sig = _load_compare_matrix_and_sigs(c.output('output.csv'),
-                                                    testdata_sigs)
+    mat, idx_to_sig = _load_compare_matrix_and_sigs(
+        c.output("output.csv"), testdata_sigs
+    )
 
     # check explicit containment against output of compare
     for i in range(len(idx_to_sig)):
@@ -6870,7 +8244,7 @@ def test_compare_containment_ani_asymmetry_distance(runtmp):
                     containment_ani = 1
                 mat_val = round(mat[i][j], 6)
 
-                assert containment_ani == mat_val #, (i, j)
+                assert containment_ani == mat_val  # , (i, j)
 
     print(c.last_result.err)
     print(c.last_result.out)
@@ -6882,12 +8256,14 @@ def test_compare_jaccard_ani(runtmp):
     sigfiles = ["2.fa.sig", "2+63.fa.sig", "47.fa.sig", "63.fa.sig"]
     testdata_sigs = [utils.get_test_data(c) for c in sigfiles]
 
-    c.run_sourmash('compare', '-k', '31', '--estimate-ani',
-                         '--csv', 'output.csv', *testdata_sigs)
+    c.run_sourmash(
+        "compare", "-k", "31", "--estimate-ani", "--csv", "output.csv", *testdata_sigs
+    )
 
     # load the matrix output
-    mat, idx_to_sig = _load_compare_matrix_and_sigs(c.output('output.csv'),
-                                                    testdata_sigs)
+    mat, idx_to_sig = _load_compare_matrix_and_sigs(
+        c.output("output.csv"), testdata_sigs
+    )
 
     # check explicit calculations against output of compare
     for i in range(len(idx_to_sig)):
@@ -6906,30 +8282,43 @@ def test_compare_jaccard_ani(runtmp):
                     jaccard_ani = 0.0
                 print(jaccard_ani)
 
-                assert jaccard_ani == mat_val #, (i, j)
+                assert jaccard_ani == mat_val  # , (i, j)
 
     print(c.last_result.err)
     print(c.last_result.out)
-    assert "WARNING: Some of these sketches may have no hashes in common based on chance alone (false negatives). Consider decreasing your scaled value to prevent this." in c.last_result.err
+    assert (
+        "WARNING: Some of these sketches may have no hashes in common based on chance alone (false negatives). Consider decreasing your scaled value to prevent this."
+        in c.last_result.err
+    )
 
 
 def test_compare_jaccard_ani_jaccard_error_too_high(runtmp):
     c = runtmp
 
-    testdata1 = utils.get_test_data('short.fa')
-    sig1 = c.output('short.fa.sig')
-    testdata2 = utils.get_test_data('short2.fa')
-    sig2 = c.output('short2.fa.sig')
-    c.run_sourmash('sketch', 'dna', '-p', 'k=31,scaled=1', '-o', sig1, testdata1)
-    c.run_sourmash('sketch', 'dna', '-p', 'k=31,scaled=1', '-o', sig2, testdata2)
+    testdata1 = utils.get_test_data("short.fa")
+    sig1 = c.output("short.fa.sig")
+    testdata2 = utils.get_test_data("short2.fa")
+    sig2 = c.output("short2.fa.sig")
+    c.run_sourmash("sketch", "dna", "-p", "k=31,scaled=1", "-o", sig1, testdata1)
+    c.run_sourmash("sketch", "dna", "-p", "k=31,scaled=1", "-o", sig2, testdata2)
     testdata_sigs = [sig1, sig2]
 
-    c.run_sourmash('compare', '-k', '31', '--estimate-ani', '--csv', 'output.csv', 'short.fa.sig', 'short2.fa.sig')
+    c.run_sourmash(
+        "compare",
+        "-k",
+        "31",
+        "--estimate-ani",
+        "--csv",
+        "output.csv",
+        "short.fa.sig",
+        "short2.fa.sig",
+    )
     print(c.last_result.status, c.last_result.out, c.last_result.err)
 
     # load the matrix output
-    mat, idx_to_sig = _load_compare_matrix_and_sigs(c.output('output.csv'),
-                                                    testdata_sigs)
+    mat, idx_to_sig = _load_compare_matrix_and_sigs(
+        c.output("output.csv"), testdata_sigs
+    )
 
     # check explicit containment against output of compare
     for i in range(len(idx_to_sig)):
@@ -6948,10 +8337,12 @@ def test_compare_jaccard_ani_jaccard_error_too_high(runtmp):
                     jaccard_ani = 0.0
                 print(jaccard_ani)
 
-                assert jaccard_ani == mat_val #, (i, j)
+                assert jaccard_ani == mat_val  # , (i, j)
 
-
-    assert "WARNING: Jaccard estimation for at least one of these comparisons is likely inaccurate. Could not estimate ANI for these comparisons." in c.last_result.err
+    assert (
+        "WARNING: Jaccard estimation for at least one of these comparisons is likely inaccurate. Could not estimate ANI for these comparisons."
+        in c.last_result.err
+    )
 
 
 def test_compare_max_containment_ani(runtmp):
@@ -6960,12 +8351,21 @@ def test_compare_max_containment_ani(runtmp):
     sigfiles = ["2.fa.sig", "2+63.fa.sig", "47.fa.sig", "63.fa.sig"]
     testdata_sigs = [utils.get_test_data(c) for c in sigfiles]
 
-    c.run_sourmash('compare', '--max-containment', '-k', '31',
-                   '--estimate-ani', '--csv', 'output.csv', *testdata_sigs)
+    c.run_sourmash(
+        "compare",
+        "--max-containment",
+        "-k",
+        "31",
+        "--estimate-ani",
+        "--csv",
+        "output.csv",
+        *testdata_sigs,
+    )
 
     # load the matrix output
-    mat, idx_to_sig = _load_compare_matrix_and_sigs(c.output('output.csv'),
-                                                    testdata_sigs)
+    mat, idx_to_sig = _load_compare_matrix_and_sigs(
+        c.output("output.csv"), testdata_sigs
+    )
 
     # check explicit containment against output of compare
     for i in range(len(idx_to_sig)):
@@ -6987,7 +8387,10 @@ def test_compare_max_containment_ani(runtmp):
 
     print(c.last_result.err)
     print(c.last_result.out)
-    assert "WARNING: Some of these sketches may have no hashes in common based on chance alone (false negatives). Consider decreasing your scaled value to prevent this." in c.last_result.err
+    assert (
+        "WARNING: Some of these sketches may have no hashes in common based on chance alone (false negatives). Consider decreasing your scaled value to prevent this."
+        in c.last_result.err
+    )
 
 
 def test_compare_avg_containment_ani(runtmp):
@@ -6997,12 +8400,21 @@ def test_compare_avg_containment_ani(runtmp):
     sigfiles = ["2.fa.sig", "2+63.fa.sig", "47.fa.sig", "63.fa.sig"]
     testdata_sigs = [utils.get_test_data(c) for c in sigfiles]
 
-    c.run_sourmash('compare', '--avg-containment', '-k', '31',
-                   '--estimate-ani', '--csv', 'output.csv', *testdata_sigs)
+    c.run_sourmash(
+        "compare",
+        "--avg-containment",
+        "-k",
+        "31",
+        "--estimate-ani",
+        "--csv",
+        "output.csv",
+        *testdata_sigs,
+    )
 
     # load the matrix output
-    mat, idx_to_sig = _load_compare_matrix_and_sigs(c.output('output.csv'),
-                                                    testdata_sigs)
+    mat, idx_to_sig = _load_compare_matrix_and_sigs(
+        c.output("output.csv"), testdata_sigs
+    )
 
     # check explicit avg containment against output of compare
     for i in range(len(idx_to_sig)):
@@ -7024,29 +8436,40 @@ def test_compare_avg_containment_ani(runtmp):
 
     print(c.last_result.err)
     print(c.last_result.out)
-    assert "WARNING: Some of these sketches may have no hashes in common based on chance alone (false negatives). Consider decreasing your scaled value to prevent this." in c.last_result.err
+    assert (
+        "WARNING: Some of these sketches may have no hashes in common based on chance alone (false negatives). Consider decreasing your scaled value to prevent this."
+        in c.last_result.err
+    )
 
 
 def test_compare_ANI_require_scaled(runtmp):
     # check that compare with containment requires scaled sketches
     c = runtmp
 
-    s47 = utils.get_test_data('num/47.fa.sig')
-    s63 = utils.get_test_data('num/63.fa.sig')
+    s47 = utils.get_test_data("num/47.fa.sig")
+    s63 = utils.get_test_data("num/63.fa.sig")
 
     # containment and estimate ANI will give this error
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('compare', '--containment', '--estimate-ani', '-k', '31', s47, s63,
-                       fail_ok=True)
-    assert 'must use scaled signatures with --containment, --max-containment, and --avg-containment' in \
-        c.last_result.err
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash(
+            "compare",
+            "--containment",
+            "--estimate-ani",
+            "-k",
+            "31",
+            s47,
+            s63,
+            fail_ok=True,
+        )
+    assert (
+        "must use scaled signatures with --containment, --max-containment, and --avg-containment"
+        in c.last_result.err
+    )
     assert c.last_result.status != 0
 
     # jaccard + estimate ANI will give this error
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('compare', '--estimate-ani', '-k', '31', s47, s63,
-                       fail_ok=True)
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash("compare", "--estimate-ani", "-k", "31", s47, s63, fail_ok=True)
 
-    assert 'must use scaled signatures with --estimate-ani' in \
-        c.last_result.err
+    assert "must use scaled signatures with --estimate-ani" in c.last_result.err
     assert c.last_result.status != 0
diff --git a/tests/test_sourmash_args.py b/tests/test_sourmash_args.py
index ae83dc324d..7fcbe2511e 100644
--- a/tests/test_sourmash_args.py
+++ b/tests/test_sourmash_args.py
@@ -22,9 +22,9 @@
 
 def test_save_signatures_api_none():
     # save to sigfile
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss47 = sourmash.load_one_signature(sig47, ksize=31)
 
     with sourmash_args.SaveSignaturesToLocation(None) as save_sig:
@@ -37,12 +37,12 @@ def test_save_signatures_api_none():
 
 def test_save_signatures_to_location_1_sig(runtmp):
     # save to sigfile.sig
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss47 = sourmash.load_one_signature(sig47, ksize=31)
 
-    outloc = runtmp.output('foo.sig')
+    outloc = runtmp.output("foo.sig")
     with sourmash_args.SaveSignaturesToLocation(outloc) as save_sig:
         print(save_sig)
         save_sig.add(ss2)
@@ -56,9 +56,9 @@ def test_save_signatures_to_location_1_sig(runtmp):
 
 def test_save_signatures_to_location_1_stdout():
     # save to stdout
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss47 = sourmash.load_one_signature(sig47, ksize=31)
 
     output_capture = io.StringIO()
@@ -77,12 +77,12 @@ def test_save_signatures_to_location_1_stdout():
 
 def test_save_signatures_to_location_1_sig_is_default(runtmp):
     # save to sigfile.txt
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss47 = sourmash.load_one_signature(sig47, ksize=31)
 
-    outloc = runtmp.output('foo.txt')
+    outloc = runtmp.output("foo.txt")
     with sourmash_args.SaveSignaturesToLocation(outloc) as save_sig:
         print(save_sig)
         save_sig.add(ss2)
@@ -96,12 +96,12 @@ def test_save_signatures_to_location_1_sig_is_default(runtmp):
 
 def test_save_signatures_to_location_1_sig_gz(runtmp):
     # save to sigfile.gz
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss47 = sourmash.load_one_signature(sig47, ksize=31)
 
-    outloc = runtmp.output('foo.sig.gz')
+    outloc = runtmp.output("foo.sig.gz")
     with sourmash_args.SaveSignaturesToLocation(outloc) as save_sig:
         print(save_sig)
         save_sig.add(ss2)
@@ -120,12 +120,12 @@ def test_save_signatures_to_location_1_sig_gz(runtmp):
 
 def test_save_signatures_to_location_1_zip(runtmp):
     # save to sigfile.zip
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss47 = sourmash.load_one_signature(sig47, ksize=31)
 
-    outloc = runtmp.output('foo.zip')
+    outloc = runtmp.output("foo.zip")
     with sourmash_args.SaveSignaturesToLocation(outloc) as save_sig:
         print(save_sig)
         save_sig.add(ss2)
@@ -143,33 +143,33 @@ def test_save_signatures_to_location_1_zip(runtmp):
 
 def test_save_signatures_to_location_1_zip_bad(runtmp):
     # try saving to bad sigfile.zip
-    sig2 = utils.get_test_data('2.fa.sig')
-    ss2 = sourmash.load_one_signature(sig2, ksize=31)
-    sig47 = utils.get_test_data('47.fa.sig')
-    ss47 = sourmash.load_one_signature(sig47, ksize=31)
+    sig2 = utils.get_test_data("2.fa.sig")
+    sourmash.load_one_signature(sig2, ksize=31)
+    sig47 = utils.get_test_data("47.fa.sig")
+    sourmash.load_one_signature(sig47, ksize=31)
 
-    outloc = runtmp.output('foo.zip')
+    outloc = runtmp.output("foo.zip")
 
     # create bad zip:
-    with open(outloc, 'wt') as fp:
+    with open(outloc, "w"):
         pass
 
     # now check for error
     with pytest.raises(ValueError) as exc:
-        with sourmash_args.SaveSignaturesToLocation(outloc) as save_sig:
+        with sourmash_args.SaveSignaturesToLocation(outloc):
             pass
 
-    assert 'cannot be opened as a zip file' in str(exc)
+    assert "cannot be opened as a zip file" in str(exc)
 
 
 def test_save_signatures_to_location_1_zip_dup(runtmp):
     # save to sigfile.zip
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss47 = sourmash.load_one_signature(sig47, ksize=31)
 
-    outloc = runtmp.output('foo.zip')
+    outloc = runtmp.output("foo.zip")
     with sourmash_args.SaveSignaturesToLocation(outloc) as save_sig:
         print(save_sig)
         save_sig.add(ss2)
@@ -178,11 +178,11 @@ def test_save_signatures_to_location_1_zip_dup(runtmp):
         # here we have to change the names so the sig content is different;
         # exact duplicates will not be saved, otherwise.
         ss2 = ss2.to_mutable()
-        ss2.name = 'different name for ss2'
+        ss2.name = "different name for ss2"
         save_sig.add(ss2)
 
         ss47 = ss47.to_mutable()
-        ss47.name = 'different name for ss47'
+        ss47.name = "different name for ss47"
         save_sig.add(ss47)
 
     # can we open as a .zip file?
@@ -197,13 +197,13 @@ def test_save_signatures_to_location_1_zip_dup(runtmp):
 
 def test_save_signatures_to_location_2_zip_add(runtmp):
     # create sigfile.zip; then, add a new signature.
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss47 = sourmash.load_one_signature(sig47, ksize=31)
 
     # add only ss2
-    outloc = runtmp.output('foo.zip')
+    outloc = runtmp.output("foo.zip")
     with sourmash_args.SaveSignaturesToLocation(outloc) as save_sig:
         print(save_sig)
         save_sig.add(ss2)
@@ -230,13 +230,13 @@ def test_save_signatures_to_location_2_zip_add(runtmp):
 
 def test_save_signatures_to_location_2_zip_add_dup(runtmp):
     # create sigfile.zip; then, add a new signature, plus a ~duplicate.
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss47 = sourmash.load_one_signature(sig47, ksize=31)
 
     # add only ss2
-    outloc = runtmp.output('foo.zip')
+    outloc = runtmp.output("foo.zip")
     with sourmash_args.SaveSignaturesToLocation(outloc) as save_sig:
         print(save_sig)
         save_sig.add(ss2)
@@ -257,8 +257,9 @@ def test_save_signatures_to_location_2_zip_add_dup(runtmp):
         # add ss2; here we have to change the names so the sig content is
         # different exact duplicates will not be saved, otherwise.
         import copy
+
         ss2copy = ss2.to_mutable()
-        ss2copy.name = 'different name for ss2'
+        ss2copy.name = "different name for ss2"
         save_sig.add(ss2copy)
 
     # updated file should contain all three.
@@ -271,15 +272,15 @@ def test_save_signatures_to_location_2_zip_add_dup(runtmp):
 
 def test_save_signatures_to_location_3_zip_add_fail(runtmp):
     # create sigfile.zip using zipfile, then try to add to it (& fail)
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
-    sig47 = utils.get_test_data('47.fa.sig')
-    ss47 = sourmash.load_one_signature(sig47, ksize=31)
+    sig47 = utils.get_test_data("47.fa.sig")
+    sourmash.load_one_signature(sig47, ksize=31)
 
     # add only ss2, using zipfile API
-    outloc = runtmp.output('foo.zip')
-    with zipfile.ZipFile(outloc, 'x') as zf:
-        with zf.open('xyz.sig', 'w') as fp:
+    outloc = runtmp.output("foo.zip")
+    with zipfile.ZipFile(outloc, "x") as zf:
+        with zf.open("xyz.sig", "w") as fp:
             sourmash.save_signatures([ss2], fp=fp, compression=1)
 
     # verify it can be loaded, yada yada
@@ -289,28 +290,29 @@ def test_save_signatures_to_location_3_zip_add_fail(runtmp):
 
     # now, try to open existing file with SaveSignaturesToLocation...
     with pytest.raises(ValueError) as exc:
-        with sourmash_args.SaveSignaturesToLocation(outloc) as save_sig:
+        with sourmash_args.SaveSignaturesToLocation(outloc):
             pass
 
-    assert 'Cannot add to existing zipfile' in str(exc)
+    assert "Cannot add to existing zipfile" in str(exc)
 
 
 def test_save_signatures_to_location_3_zip_add_with_manifest(runtmp):
     # create sigfile.zip using zipfile, then try to add to it (& fail)
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss47 = sourmash.load_one_signature(sig47, ksize=31)
 
     # add only ss2, using zipfile API; add manifest manually.
-    outloc = runtmp.output('foo.zip')
-    with zipfile.ZipFile(outloc, 'x') as zf:
-        with zf.open('xyz.sig', 'w') as fp:
+    outloc = runtmp.output("foo.zip")
+    with zipfile.ZipFile(outloc, "x") as zf:
+        with zf.open("xyz.sig", "w") as fp:
             sourmash.save_signatures([ss2], fp=fp, compression=1)
 
         # make a manifest row...
-        row = manifest.CollectionManifest.make_manifest_row(ss2, 'xyz.sig',
-                                                   include_signature=False)
+        row = manifest.CollectionManifest.make_manifest_row(
+            ss2, "xyz.sig", include_signature=False
+        )
 
         # construct & save manifest
         mf = manifest.CollectionManifest([row])
@@ -320,7 +322,7 @@ def test_save_signatures_to_location_3_zip_add_with_manifest(runtmp):
         mf.write_to_csv(manifest_fp, write_header=True)
         manifest_data = manifest_fp.getvalue().encode("utf-8")
 
-        with zf.open(mf_name, 'w') as fp:
+        with zf.open(mf_name, "w") as fp:
             fp.write(manifest_data)
 
         # fini! made our artisanal hand-crafted zipfile. Now...
@@ -345,12 +347,12 @@ def test_save_signatures_to_location_3_zip_add_with_manifest(runtmp):
 
 def test_save_signatures_to_location_1_dirout(runtmp):
     # save to sigout/ (directory)
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss47 = sourmash.load_one_signature(sig47, ksize=31)
 
-    outloc = runtmp.output('sigout/')
+    outloc = runtmp.output("sigout/")
     with sourmash_args.SaveSignaturesToLocation(outloc) as save_sig:
         print(save_sig)
         save_sig.add(ss2)
@@ -366,12 +368,12 @@ def test_save_signatures_to_location_1_dirout(runtmp):
 
 def test_save_signatures_to_location_1_dirout_bug_2751(runtmp):
     # check for 2x compressed sig files
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss47 = sourmash.load_one_signature(sig47, ksize=31)
 
-    outloc = runtmp.output('sigout/')
+    outloc = runtmp.output("sigout/")
     with sourmash_args.SaveSignaturesToLocation(outloc) as save_sig:
         print(save_sig)
         save_sig.add(ss2)
@@ -380,7 +382,7 @@ def test_save_signatures_to_location_1_dirout_bug_2751(runtmp):
     assert os.path.isdir(outloc)
     print(os.listdir(outloc))
 
-    outloc2 = runtmp.output('sigout/09a08691ce52952152f0e866a59f6261.sig.gz')
+    outloc2 = runtmp.output("sigout/09a08691ce52952152f0e866a59f6261.sig.gz")
     with gzip.open(outloc2, "r") as fp:
         data = fp.read()
         print(data)
@@ -389,12 +391,12 @@ def test_save_signatures_to_location_1_dirout_bug_2751(runtmp):
 
 def test_save_signatures_to_location_1_dirout_duplicate(runtmp):
     # save to sigout/ (directory)
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss47 = sourmash.load_one_signature(sig47, ksize=31)
 
-    outloc = runtmp.output('sigout/')
+    outloc = runtmp.output("sigout/")
     with sourmash_args.SaveSignaturesToLocation(outloc) as save_sig:
         print(save_sig)
         save_sig.add(ss2)
@@ -411,8 +413,8 @@ def test_save_signatures_to_location_1_dirout_duplicate(runtmp):
 
 
 def test_load_empty_zipfile(runtmp):
-    outloc = runtmp.output('empty.zip')
-    with sourmash_args.SaveSignaturesToLocation(outloc) as save_sig:
+    outloc = runtmp.output("empty.zip")
+    with sourmash_args.SaveSignaturesToLocation(outloc):
         pass
 
     sigiter = sourmash.load_file_as_signatures(outloc)
@@ -422,15 +424,14 @@ def test_load_empty_zipfile(runtmp):
 def test_load_many_sigs_empty_file(runtmp):
     # make sure load_many_signatures behaves properly on empty file
     outloc = runtmp.output("empty.sig")
-    with open(outloc, "wt") as fp:
+    with open(outloc, "w"):
         pass
 
     progress = sourmash_args.SignatureLoadingProgress()
 
     with contextlib.redirect_stderr(io.StringIO()) as errfp:
-        with pytest.raises(SystemExit) as exc:
-            for ss, sigloc in sourmash_args.load_many_signatures([outloc],
-                                                                 progress):
+        with pytest.raises(SystemExit):
+            for ss, sigloc in sourmash_args.load_many_signatures([outloc], progress):
                 pass
 
     err = errfp.getvalue()
@@ -442,15 +443,15 @@ def test_load_many_sigs_empty_file(runtmp):
 def test_load_many_sigs_empty_file_force(runtmp):
     # make sure load_many_signatures behaves properly on empty file w/force
     outloc = runtmp.output("empty.sig")
-    with open(outloc, "wt") as fp:
+    with open(outloc, "w"):
         pass
 
     progress = sourmash_args.SignatureLoadingProgress()
 
     with contextlib.redirect_stderr(io.StringIO()) as errfp:
-        for ss, sigloc in sourmash_args.load_many_signatures([outloc],
-                                                             progress,
-                                                             force=True):
+        for ss, sigloc in sourmash_args.load_many_signatures(
+            [outloc], progress, force=True
+        ):
             pass
 
     err = errfp.getvalue()
@@ -461,7 +462,7 @@ def test_load_many_sigs_empty_file_force(runtmp):
 
 def test_get_manifest_1():
     # basic get_manifest retrieves a manifest
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     idx = sourmash.load_file_as_index(sig47)
 
     manifest = sourmash_args.get_manifest(idx)
@@ -470,18 +471,18 @@ def test_get_manifest_1():
 
 def test_get_manifest_2_cannot_build():
     # test what happens when get_manifest cannot build manifest
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss47 = sourmash.load_one_signature(sig47)
 
     idx = LinearIndex([ss47])
 
-    with pytest.raises(SystemExit) as exc:
-        m = sourmash_args.get_manifest(idx)
+    with pytest.raises(SystemExit):
+        sourmash_args.get_manifest(idx)
 
 
 def test_get_manifest_2_cannot_buildno_require():
     # test what happens when get_manifest cannot build manifest
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss47 = sourmash.load_one_signature(sig47)
 
     idx = LinearIndex([ss47])
@@ -493,11 +494,12 @@ def test_get_manifest_2_cannot_buildno_require():
 
 def test_get_manifest_3_build():
     # check that manifest is building
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss47 = sourmash.load_one_signature(sig47)
 
     class FakeIndex(LinearIndex):
         was_called = 0
+
         def _signatures_with_internal(self):
             self.was_called = 1
             return [(ss47, "fakeiloc")]
@@ -510,12 +512,12 @@ def _signatures_with_internal(self):
 
     print(m)
     assert len(m) == 1
-    assert m.rows[0]['internal_location'] == "fakeiloc"
+    assert m.rows[0]["internal_location"] == "fakeiloc"
 
 
 def test_get_manifest_3_build_2():
     # check that manifest is building, but only when asked
-    sig47 = utils.get_test_data('47.fa.sig')
+    sig47 = utils.get_test_data("47.fa.sig")
     ss47 = sourmash.load_one_signature(sig47)
 
     class FakeIndex(LinearIndex):
@@ -546,7 +548,7 @@ def _signatures_with_internal(self):
     assert m == m3
 
 
-class FakeArgs(object):
+class FakeArgs:
     picklist = None
     include_db_pattern = None
     exclude_db_pattern = None
@@ -567,63 +569,63 @@ def test_pattern_1():
     # test just --include-pattern handling
     args = FakeArgs()
     args.picklist = None
-    args.include_db_pattern = 'foo'
+    args.include_db_pattern = "foo"
     args.exclude_db_pattern = None
 
     pattern_search = sourmash_args.load_include_exclude_db_patterns(args)
-    assert pattern_search(['foo', 'bar', 'baz'])
-    assert not pattern_search(['bar', 'bif'])
+    assert pattern_search(["foo", "bar", "baz"])
+    assert not pattern_search(["bar", "bif"])
 
 
 def test_pattern_2():
     # test just --exclude-pattern handling
     args = FakeArgs()
     args.picklist = None
-    args.exclude_db_pattern = 'foo'
+    args.exclude_db_pattern = "foo"
     args.include_db_pattern = None
 
     pattern_search = sourmash_args.load_include_exclude_db_patterns(args)
-    assert not pattern_search(['foo', 'bar', 'baz'])
-    assert pattern_search(['bar', 'baz', 'bif'])
+    assert not pattern_search(["foo", "bar", "baz"])
+    assert pattern_search(["bar", "baz", "bif"])
 
 
 def test_pattern_3():
     # test with --picklist and --exclude: should fail
     args = FakeArgs()
     args.picklist = True
-    args.exclude_db_pattern = 'foo'
+    args.exclude_db_pattern = "foo"
     args.include_db_pattern = None
 
     with pytest.raises(SystemExit):
-        pattern_search = sourmash_args.load_include_exclude_db_patterns(args)
+        sourmash_args.load_include_exclude_db_patterns(args)
 
 
 def test_pattern_4():
     # test with --picklist and --include: should fail
     args = FakeArgs()
     args.picklist = True
-    args.include_db_pattern = 'foo'
+    args.include_db_pattern = "foo"
     args.exclude_db_pattern = None
 
     with pytest.raises(SystemExit):
-        pattern_search = sourmash_args.load_include_exclude_db_patterns(args)
+        sourmash_args.load_include_exclude_db_patterns(args)
 
 
 def test_pattern_5():
     # test with --include and --exclude: should fail
     args = FakeArgs()
     args.picklist = None
-    args.exclude_db_pattern = 'foo'
-    args.include_db_pattern = 'bar'
+    args.exclude_db_pattern = "foo"
+    args.include_db_pattern = "bar"
 
     with pytest.raises(SystemExit):
-        pattern_search = sourmash_args.load_include_exclude_db_patterns(args)
+        sourmash_args.load_include_exclude_db_patterns(args)
 
 
 def test_fileinput_csv_1_plain():
     # test basic CSV input
 
-    testfile = utils.get_test_data('tax/test.taxonomy.csv')
+    testfile = utils.get_test_data("tax/test.taxonomy.csv")
 
     with sourmash_args.FileInputCSV(testfile) as r:
         rows = list(r)
@@ -633,21 +635,21 @@ def test_fileinput_csv_1_plain():
 def test_fileinput_csv_1_no_such_file(runtmp):
     # test fail to load file
 
-    noexistfile = runtmp.output('does-not-exist.csv')
+    noexistfile = runtmp.output("does-not-exist.csv")
 
     with pytest.raises(FileNotFoundError):
-        with sourmash_args.FileInputCSV(noexistfile) as r:
+        with sourmash_args.FileInputCSV(noexistfile):
             pass
 
 
 def test_fileinput_csv_2_gz(runtmp):
     # test basic CSV input from gz file
 
-    testfile = utils.get_test_data('tax/test.taxonomy.csv')
-    gzfile = runtmp.output('test.csv.gz')
+    testfile = utils.get_test_data("tax/test.taxonomy.csv")
+    gzfile = runtmp.output("test.csv.gz")
 
-    with gzip.open(gzfile, 'wt') as outfp:
-        with open(testfile, 'rt', newline='') as infp:
+    with gzip.open(gzfile, "wt") as outfp:
+        with open(testfile, newline="") as infp:
             outfp.write(infp.read())
 
     with sourmash_args.FileInputCSV(gzfile) as r:
@@ -658,42 +660,42 @@ def test_fileinput_csv_2_gz(runtmp):
 def test_fileinput_csv_2_gz_not_csv(runtmp):
     # test basic CSV input from gz file that's not CSV - works
 
-    gzfile = runtmp.output('test.csv.gz')
+    gzfile = runtmp.output("test.csv.gz")
 
-    with gzip.open(gzfile, 'wt') as outfp:
+    with gzip.open(gzfile, "wt") as outfp:
         outfp.write("hello world!")
 
     with sourmash_args.FileInputCSV(gzfile) as r:
-        assert r.fieldnames == ['hello world!']
+        assert r.fieldnames == ["hello world!"]
 
 
 def test_fileinput_csv_2_gz_bad_version_header(runtmp):
     # test basic CSV input from gz file with bad version header
     # currently this works; not clear to me how it should fail :grin:
 
-    gzfile = runtmp.output('test.csv.gz')
+    gzfile = runtmp.output("test.csv.gz")
 
-    with gzip.open(gzfile, 'wt') as outfp:
+    with gzip.open(gzfile, "wt") as outfp:
         outfp.write("# excelsior\nhello world!")
 
     with sourmash_args.FileInputCSV(gzfile) as r:
-        assert r.fieldnames == ['hello world!']
+        assert r.fieldnames == ["hello world!"]
         print(r.version_info)
-        assert r.version_info == ['excelsior']
+        assert r.version_info == ["excelsior"]
 
 
 def test_fileinput_csv_2_zip(runtmp):
     # test CSV input from zip file, with component filename
 
-    testfile = utils.get_test_data('tax/test.taxonomy.csv')
-    zf_file = runtmp.output('test.zip')
+    testfile = utils.get_test_data("tax/test.taxonomy.csv")
+    zf_file = runtmp.output("test.zip")
 
-    with zipfile.ZipFile(zf_file, 'w') as outzip:
-        with open(testfile, 'rb') as infp:
-            with outzip.open('XYZ.csv', 'w') as outfp:
+    with zipfile.ZipFile(zf_file, "w") as outzip:
+        with open(testfile, "rb") as infp:
+            with outzip.open("XYZ.csv", "w") as outfp:
                 outfp.write(infp.read())
 
-    with sourmash_args.FileInputCSV(zf_file, default_csv_name='XYZ.csv') as r:
+    with sourmash_args.FileInputCSV(zf_file, default_csv_name="XYZ.csv") as r:
         rows = list(r)
         assert len(rows) == 6
         print(rows)
@@ -702,20 +704,21 @@ def test_fileinput_csv_2_zip(runtmp):
 def test_fileinput_csv_3_load_manifest():
     # test loading a manifest from a zipfile collection, using
     # FileInputCSV.
-    testfile = utils.get_test_data('prot/all.zip')
-
-    with sourmash_args.FileInputCSV(testfile, default_csv_name='SOURMASH-MANIFEST.csv') as r:
+    testfile = utils.get_test_data("prot/all.zip")
 
+    with sourmash_args.FileInputCSV(
+        testfile, default_csv_name="SOURMASH-MANIFEST.csv"
+    ) as r:
         rows = list(r)
         assert len(rows) == 8
 
-        assert r.version_info == ['SOURMASH-MANIFEST-VERSION', '1.0']
+        assert r.version_info == ["SOURMASH-MANIFEST-VERSION", "1.0"]
 
 
 def test_fileinput_csv_3_load_manifest_no_default():
     # test loading a manifest from a zipfile collection, using
     # FileInputCSV, but with no default_csv_name - should fail
-    testfile = utils.get_test_data('prot/all.zip')
+    testfile = utils.get_test_data("prot/all.zip")
 
     with pytest.raises(csv.Error):
         with sourmash_args.FileInputCSV(testfile) as r:
@@ -725,72 +728,71 @@ def test_fileinput_csv_3_load_manifest_no_default():
 def test_fileinput_csv_3_load_manifest_zipfile_obj():
     # test loading a manifest from an open zipfile obj, using
     # FileInputCSV.
-    testfile = utils.get_test_data('prot/all.zip')
+    testfile = utils.get_test_data("prot/all.zip")
 
     with zipfile.ZipFile(testfile, "r") as zf:
-        with sourmash_args.FileInputCSV(testfile,
-                                     default_csv_name='SOURMASH-MANIFEST.csv',
-                                     zipfile_obj=zf) as r:
+        with sourmash_args.FileInputCSV(
+            testfile, default_csv_name="SOURMASH-MANIFEST.csv", zipfile_obj=zf
+        ) as r:
             rows = list(r)
             assert len(rows) == 8
 
-            assert r.version_info == ['SOURMASH-MANIFEST-VERSION', '1.0']
+            assert r.version_info == ["SOURMASH-MANIFEST-VERSION", "1.0"]
 
 
 def test_fileinput_csv_3_load_manifest_zipfile_obj_no_defualt():
     # test loading a manifest from an open zipfile obj, using
     # FileInputCSV, but with no default csv name => should fail.
-    testfile = utils.get_test_data('prot/all.zip')
+    testfile = utils.get_test_data("prot/all.zip")
 
     with zipfile.ZipFile(testfile, "r") as zf:
         with pytest.raises(ValueError):
-            with sourmash_args.FileInputCSV(testfile,
-                                            zipfile_obj=zf) as r:
+            with sourmash_args.FileInputCSV(testfile, zipfile_obj=zf):
                 pass
 
 
 def test_fileoutput_csv_1(runtmp):
     # test basic behavior
-    outfile = runtmp.output('xxx.csv')
+    outfile = runtmp.output("xxx.csv")
 
     with sourmash_args.FileOutputCSV(outfile) as fp:
         w = csv.writer(fp)
-        w.writerow(['a', 'b', 'c'])
-        w.writerow(['x', 'y', 'z'])
+        w.writerow(["a", "b", "c"])
+        w.writerow(["x", "y", "z"])
 
     with open(outfile, newline="") as fp:
         r = csv.DictReader(fp)
         rows = list(r)
         assert len(rows) == 1
         row = rows[0]
-        assert row['a'] == 'x'
-        assert row['b'] == 'y'
-        assert row['c'] == 'z'
+        assert row["a"] == "x"
+        assert row["b"] == "y"
+        assert row["c"] == "z"
 
 
 def test_fileoutput_csv_1_gz(runtmp):
     # test basic behavior => gz
-    outfile = runtmp.output('xxx.csv.gz')
+    outfile = runtmp.output("xxx.csv.gz")
 
     with sourmash_args.FileOutputCSV(outfile) as fp:
         w = csv.writer(fp)
-        w.writerow(['a', 'b', 'c'])
-        w.writerow(['x', 'y', 'z'])
+        w.writerow(["a", "b", "c"])
+        w.writerow(["x", "y", "z"])
 
-    with gzip.open(outfile, 'rt') as fp:
+    with gzip.open(outfile, "rt") as fp:
         r = csv.DictReader(fp)
         rows = list(r)
         assert len(rows) == 1
         row = rows[0]
-        assert row['a'] == 'x'
-        assert row['b'] == 'y'
-        assert row['c'] == 'z'
+        assert row["a"] == "x"
+        assert row["b"] == "y"
+        assert row["c"] == "z"
 
 
 def test_fileoutput_csv_2_stdout():
     # test '-' and 'None' go to sys.stdout
 
-    with sourmash_args.FileOutputCSV('-') as fp:
+    with sourmash_args.FileOutputCSV("-") as fp:
         assert fp == sys.stdout
 
     with sourmash_args.FileOutputCSV(None) as fp:
@@ -802,14 +804,14 @@ def test_add_ksize_arg_no_default():
     p = argparse.ArgumentParser()
     add_ksize_arg(p)
     args = p.parse_args()
-    assert args.ksize == None
+    assert args.ksize is None
 
 
 def test_add_ksize_arg_no_default_specify():
     # test behavior of cli.utils.add_ksize_arg
     p = argparse.ArgumentParser()
     add_ksize_arg(p)
-    args = p.parse_args(['-k', '21'])
+    args = p.parse_args(["-k", "21"])
     assert args.ksize == 21
 
 
@@ -825,17 +827,17 @@ def test_add_ksize_arg_default_31_specify():
     # test behavior of cli.utils.add_ksize_arg
     p = argparse.ArgumentParser()
     add_ksize_arg(p, default=31)
-    args = p.parse_args(['-k', '21'])
+    args = p.parse_args(["-k", "21"])
     assert args.ksize == 21
 
 
 def test_bug_2370(runtmp):
     # bug - manifest loading code does not catch gzip.BadGzipFile
-    sigfile = utils.get_test_data('63.fa.sig')
+    sigfile = utils.get_test_data("63.fa.sig")
 
     # copy sigfile over to a .gz file without compressing it -
-    shutil.copyfile(sigfile, runtmp.output('not_really_gzipped.gz'))
+    shutil.copyfile(sigfile, runtmp.output("not_really_gzipped.gz"))
 
     # try running sourmash_args.load_file_as_index
-    #runtmp.sourmash('sig', 'describe', runtmp.output('not_really_gzipped.gz'))
-    sourmash_args.load_file_as_index(runtmp.output('not_really_gzipped.gz'))
+    # runtmp.sourmash('sig', 'describe', runtmp.output('not_really_gzipped.gz'))
+    sourmash_args.load_file_as_index(runtmp.output("not_really_gzipped.gz"))
diff --git a/tests/test_sourmash_compute.py b/tests/test_sourmash_compute.py
index cb3c48fc32..f6f6370785 100644
--- a/tests/test_sourmash_compute.py
+++ b/tests/test_sourmash_compute.py
@@ -27,155 +27,231 @@
 
 def test_do_sourmash_compute():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '31', testdata1],
-                                           in_directory=location)
+        testdata1 = utils.get_test_data("short.fa")
+        status, out, err = utils.runscript(
+            "sourmash", ["compute", "-k", "31", testdata1], in_directory=location
+        )
 
-        sigfile = os.path.join(location, 'short.fa.sig')
+        sigfile = os.path.join(location, "short.fa.sig")
         assert os.path.exists(sigfile)
 
         sig = next(signature.load_signatures(sigfile))
-        assert str(sig).endswith('short.fa')
+        assert str(sig).endswith("short.fa")
 
 
 def test_do_sourmash_compute_check_num_bounds_negative(runtmp):
-    c=runtmp
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    testdata3 = utils.get_test_data('short3.fa')
-    sigfile = c.output('short.fa.sig')
+    c = runtmp
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    testdata3 = utils.get_test_data("short3.fa")
+    sigfile = c.output("short.fa.sig")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('compute', '-k', '31', '--num-hashes', '-5', '-o', sigfile, '--merge', '"name"', testdata1, testdata2, testdata3)
-    
+        c.run_sourmash(
+            "compute",
+            "-k",
+            "31",
+            "--num-hashes",
+            "-5",
+            "-o",
+            sigfile,
+            "--merge",
+            '"name"',
+            testdata1,
+            testdata2,
+            testdata3,
+        )
+
     assert "ERROR: num value must be positive" in c.last_result.err
 
 
 def test_do_sourmash_compute_check_num_bounds_less_than_minimum(runtmp):
-    c=runtmp
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    testdata3 = utils.get_test_data('short3.fa')
-    sigfile = c.output('short.fa.sig')
-
-    c.run_sourmash('compute', '-k', '31', '--num-hashes', '25', '-o', sigfile, '--merge', '"name"', testdata1, testdata2, testdata3)
-    
+    c = runtmp
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    testdata3 = utils.get_test_data("short3.fa")
+    sigfile = c.output("short.fa.sig")
+
+    c.run_sourmash(
+        "compute",
+        "-k",
+        "31",
+        "--num-hashes",
+        "25",
+        "-o",
+        sigfile,
+        "--merge",
+        '"name"',
+        testdata1,
+        testdata2,
+        testdata3,
+    )
+
     assert "WARNING: num value should be >= 50. Continuing anyway." in c.last_result.err
 
 
 def test_do_sourmash_compute_check_num_bounds_more_than_maximum(runtmp):
-    c=runtmp
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    testdata3 = utils.get_test_data('short3.fa')
-    sigfile = c.output('short.fa.sig')
-
-    c.run_sourmash('compute', '-k', '31', '--num-hashes', '100000', '-o', sigfile, '--merge', '"name"', testdata1, testdata2, testdata3)
-    
-    assert "WARNING: num value should be <= 50000. Continuing anyway." in c.last_result.err
+    c = runtmp
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    testdata3 = utils.get_test_data("short3.fa")
+    sigfile = c.output("short.fa.sig")
+
+    c.run_sourmash(
+        "compute",
+        "-k",
+        "31",
+        "--num-hashes",
+        "100000",
+        "-o",
+        sigfile,
+        "--merge",
+        '"name"',
+        testdata1,
+        testdata2,
+        testdata3,
+    )
+
+    assert (
+        "WARNING: num value should be <= 50000. Continuing anyway." in c.last_result.err
+    )
 
 
 @utils.in_tempdir
 def test_do_sourmash_compute_outdir(c):
-    testdata1 = utils.get_test_data('short.fa')
-    status, out, err = utils.runscript('sourmash',
-                                       ['compute', '-k', '31', testdata1,
-                                        '--outdir', c.location])
+    testdata1 = utils.get_test_data("short.fa")
+    status, out, err = utils.runscript(
+        "sourmash", ["compute", "-k", "31", testdata1, "--outdir", c.location]
+    )
 
-
-    sigfile = os.path.join(c.location, 'short.fa.sig')
+    sigfile = os.path.join(c.location, "short.fa.sig")
     assert os.path.exists(sigfile)
 
     sig = next(signature.load_signatures(sigfile))
-    assert str(sig).endswith('short.fa')
+    assert str(sig).endswith("short.fa")
 
 
 def test_do_sourmash_compute_output_valid_file():
-    """ Trigger bug #123 """
+    """Trigger bug #123"""
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        testdata2 = utils.get_test_data('short2.fa')
-        testdata3 = utils.get_test_data('short3.fa')
-        sigfile = os.path.join(location, 'short.fa.sig')
+        testdata1 = utils.get_test_data("short.fa")
+        testdata2 = utils.get_test_data("short2.fa")
+        testdata3 = utils.get_test_data("short3.fa")
+        sigfile = os.path.join(location, "short.fa.sig")
 
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '31', '-o', sigfile,
-                                            testdata1,
-                                            testdata2, testdata3],
-                                           in_directory=location)
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "31", "-o", sigfile, testdata1, testdata2, testdata3],
+            in_directory=location,
+        )
 
         assert os.path.exists(sigfile)
-        assert not out # stdout should be empty
+        assert not out  # stdout should be empty
 
         # is it valid json?
-        with open(sigfile, 'r') as f:
+        with open(sigfile) as f:
             data = json.load(f)
 
-        filesigs = [sig['filename'] for sig in data]
-        assert all(testdata in filesigs
-                   for testdata in (testdata1, testdata2, testdata3))
+        filesigs = [sig["filename"] for sig in data]
+        assert all(
+            testdata in filesigs for testdata in (testdata1, testdata2, testdata3)
+        )
 
 
 def test_do_sourmash_compute_output_stdout_valid():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        testdata2 = utils.get_test_data('short2.fa')
-        testdata3 = utils.get_test_data('short3.fa')
+        testdata1 = utils.get_test_data("short.fa")
+        testdata2 = utils.get_test_data("short2.fa")
+        testdata3 = utils.get_test_data("short3.fa")
 
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '31', '-o', '-',
-                                            testdata1,
-                                            testdata2, testdata3],
-                                           in_directory=location)
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "31", "-o", "-", testdata1, testdata2, testdata3],
+            in_directory=location,
+        )
 
         # is it valid json?
         data = json.loads(out)
 
-        filesigs = [sig['filename'] for sig in data]
-        assert all(testdata in filesigs
-                   for testdata in (testdata1, testdata2, testdata3))
+        filesigs = [sig["filename"] for sig in data]
+        assert all(
+            testdata in filesigs for testdata in (testdata1, testdata2, testdata3)
+        )
 
 
 @utils.in_tempdir
 def test_do_sourmash_compute_output_and_name_valid_file(c):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    testdata3 = utils.get_test_data('short3.fa')
-    sigfile = c.output('short.fa.sig')
-
-    c.run_sourmash('compute', '-k', '31', '-o', sigfile, '--merge', '"name"', testdata1, testdata2, testdata3)
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    testdata3 = utils.get_test_data("short3.fa")
+    sigfile = c.output("short.fa.sig")
+
+    c.run_sourmash(
+        "compute",
+        "-k",
+        "31",
+        "-o",
+        sigfile,
+        "--merge",
+        '"name"',
+        testdata1,
+        testdata2,
+        testdata3,
+    )
 
     assert os.path.exists(sigfile)
-    assert 'calculated 1 signature for 4 sequences taken from 3 files' in c.last_result.err
+    assert (
+        "calculated 1 signature for 4 sequences taken from 3 files" in c.last_result.err
+    )
 
     # is it valid json?
-    with open(sigfile, 'r') as f:
+    with open(sigfile) as f:
         data = json.load(f)
 
     assert len(data) == 1
 
-    sigfile_merged = c.output('short.all.fa.sig')
-    c.run_sourmash('compute', '-k', '31', '-o', sigfile_merged, '--merge', '"name"', testdata1, testdata2, testdata3)
-
-    with open(sigfile_merged, 'r') as f:
+    sigfile_merged = c.output("short.all.fa.sig")
+    c.run_sourmash(
+        "compute",
+        "-k",
+        "31",
+        "-o",
+        sigfile_merged,
+        "--merge",
+        '"name"',
+        testdata1,
+        testdata2,
+        testdata3,
+    )
+
+    with open(sigfile_merged) as f:
         data_merged = json.load(f)
 
-    assert data[0]['signatures'][0]['mins'] == data_merged[0]['signatures'][0]['mins']
+    assert data[0]["signatures"][0]["mins"] == data_merged[0]["signatures"][0]["mins"]
 
 
 @utils.in_tempdir
 def test_do_sourmash_compute_output_and_name_valid_file_outdir(c):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    testdata3 = utils.get_test_data('short3.fa')
-    sigfile = os.path.join(c.location, 'short.fa.sig')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    testdata3 = utils.get_test_data("short3.fa")
+    sigfile = os.path.join(c.location, "short.fa.sig")
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('compute', '-k', '31', '-o', sigfile,
-                       '--merge', '"name"',
-                       testdata1, testdata2, testdata3,
-                       '--outdir', c.location)
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash(
+            "compute",
+            "-k",
+            "31",
+            "-o",
+            sigfile,
+            "--merge",
+            '"name"',
+            testdata1,
+            testdata2,
+            testdata3,
+            "--outdir",
+            c.location,
+        )
 
     errmsg = c.last_result.err
     assert "ERROR: --output-dir doesn't make sense with -o/--output" in errmsg
@@ -183,103 +259,109 @@ def test_do_sourmash_compute_output_and_name_valid_file_outdir(c):
 
 def test_do_sourmash_compute_singleton():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '31', '--singleton',
-                                            testdata1],
-                                           in_directory=location)
-
-        sigfile = os.path.join(location, 'short.fa.sig')
+        testdata1 = utils.get_test_data("short.fa")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "31", "--singleton", testdata1],
+            in_directory=location,
+        )
+
+        sigfile = os.path.join(location, "short.fa.sig")
         assert os.path.exists(sigfile)
 
         sig = next(signature.load_signatures(sigfile))
-        assert sig.name.endswith('shortName')
+        assert sig.name.endswith("shortName")
 
 
 def test_do_sourmash_compute_name():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '31', '--merge', 'foo',
-                                            testdata1, '-o', 'foo.sig'],
-                                           in_directory=location)
-
-        sigfile = os.path.join(location, 'foo.sig')
+        testdata1 = utils.get_test_data("short.fa")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "31", "--merge", "foo", testdata1, "-o", "foo.sig"],
+            in_directory=location,
+        )
+
+        sigfile = os.path.join(location, "foo.sig")
         assert os.path.exists(sigfile)
 
         sig = next(signature.load_signatures(sigfile))
-        assert sig.name == 'foo'
+        assert sig.name == "foo"
 
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '31', '--name', 'foo',
-                                            testdata1, '-o', 'foo2.sig'],
-                                           in_directory=location)
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "31", "--name", "foo", testdata1, "-o", "foo2.sig"],
+            in_directory=location,
+        )
 
-        sigfile2 = os.path.join(location, 'foo2.sig')
+        sigfile2 = os.path.join(location, "foo2.sig")
         assert os.path.exists(sigfile2)
 
         sig2 = next(signature.load_signatures(sigfile))
-        assert sig2.name == 'foo'
+        assert sig2.name == "foo"
         assert sig.name == sig2.name
 
 
 def test_do_sourmash_compute_name_fail_no_output():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '31', '--merge', 'foo',
-                                            testdata1],
-                                           in_directory=location,
-                                           fail_ok=True)
+        testdata1 = utils.get_test_data("short.fa")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "31", "--merge", "foo", testdata1],
+            in_directory=location,
+            fail_ok=True,
+        )
         assert status == -1
 
 
 def test_do_sourmash_compute_merge_fail_no_output():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '31', '--merge', 'foo',
-                                            testdata1],
-                                           in_directory=location,
-                                           fail_ok=True)
+        testdata1 = utils.get_test_data("short.fa")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "31", "--merge", "foo", testdata1],
+            in_directory=location,
+            fail_ok=True,
+        )
         assert status == -1
 
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '31', '--name', 'foo',
-                                            testdata1],
-                                           in_directory=location,
-                                           fail_ok=True)
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "31", "--name", "foo", testdata1],
+            in_directory=location,
+            fail_ok=True,
+        )
         assert status == -1
 
 
 def test_do_sourmash_compute_name_from_first():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short3.fa')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '31', '--name-from-first',
-                                            testdata1],
-                                           in_directory=location)
-
-        sigfile = os.path.join(location, 'short3.fa.sig')
+        testdata1 = utils.get_test_data("short3.fa")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "31", "--name-from-first", testdata1],
+            in_directory=location,
+        )
+
+        sigfile = os.path.join(location, "short3.fa.sig")
         assert os.path.exists(sigfile)
 
         sig = next(signature.load_signatures(sigfile))
-        assert sig.name == 'firstname'
+        assert sig.name == "firstname"
 
 
 def test_do_sourmash_compute_multik():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21,31',
-                                            testdata1],
-                                           in_directory=location)
-        outfile = os.path.join(location, 'short.fa.sig')
+        testdata1 = utils.get_test_data("short.fa")
+        status, out, err = utils.runscript(
+            "sourmash", ["compute", "-k", "21,31", testdata1], in_directory=location
+        )
+        outfile = os.path.join(location, "short.fa.sig")
         assert os.path.exists(outfile)
 
         siglist = list(signature.load_signatures(outfile))
         assert len(siglist) == 2
-        ksizes = set([ x.minhash.ksize for x in siglist ])
+        ksizes = set([x.minhash.ksize for x in siglist])
         assert 21 in ksizes
         assert 31 in ksizes
         assert len(ksizes) == 2
@@ -287,20 +369,20 @@ def test_do_sourmash_compute_multik():
 
 def test_do_sourmash_compute_multik_with_protein():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21,30',
-                                            '--protein',
-                                            testdata1],
-                                           in_directory=location)
-        outfile = os.path.join(location, 'short.fa.sig')
+        testdata1 = utils.get_test_data("short.fa")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "21,30", "--protein", testdata1],
+            in_directory=location,
+        )
+        outfile = os.path.join(location, "short.fa.sig")
         assert os.path.exists(outfile)
 
-        with open(outfile, 'rt') as fp:
+        with open(outfile) as fp:
             sigdata = fp.read()
             siglist = list(signature.load_signatures(sigdata))
             assert len(siglist) == 4
-            ksizes = set([ x.minhash.ksize for x in siglist ])
+            ksizes = set([x.minhash.ksize for x in siglist])
             assert 21 in ksizes
             assert 30 in ksizes
             assert 7 in ksizes
@@ -310,22 +392,24 @@ def test_do_sourmash_compute_multik_with_protein():
 
 def test_do_sourmash_compute_multik_with_dayhoff():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21,30',
-                                            '--dayhoff', '--no-dna',
-                                            testdata1],
-                                           in_directory=location)
-        assert 'Computing only Dayhoff-encoded protein (and not nucleotide) ' \
-               'signatures.' in err
-        outfile = os.path.join(location, 'short.fa.sig')
+        testdata1 = utils.get_test_data("short.fa")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "21,30", "--dayhoff", "--no-dna", testdata1],
+            in_directory=location,
+        )
+        assert (
+            "Computing only Dayhoff-encoded protein (and not nucleotide) "
+            "signatures." in err
+        )
+        outfile = os.path.join(location, "short.fa.sig")
         assert os.path.exists(outfile)
 
-        with open(outfile, 'rt') as fp:
+        with open(outfile) as fp:
             sigdata = fp.read()
             siglist = list(signature.load_signatures(sigdata))
             assert len(siglist) == 2
-            ksizes = set([ x.minhash.ksize for x in siglist ])
+            ksizes = set([x.minhash.ksize for x in siglist])
             assert 7 in ksizes
             assert 10 in ksizes
             assert all(x.minhash.dayhoff for x in siglist)
@@ -334,47 +418,49 @@ def test_do_sourmash_compute_multik_with_dayhoff():
 
 def test_do_sourmash_compute_multik_with_dayhoff_and_dna():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21,30',
-                                            '--dayhoff',
-                                            testdata1],
-                                           in_directory=location)
-        outfile = os.path.join(location, 'short.fa.sig')
+        testdata1 = utils.get_test_data("short.fa")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "21,30", "--dayhoff", testdata1],
+            in_directory=location,
+        )
+        outfile = os.path.join(location, "short.fa.sig")
         assert os.path.exists(outfile)
 
-        with open(outfile, 'rt') as fp:
+        with open(outfile) as fp:
             sigdata = fp.read()
             siglist = list(signature.load_signatures(sigdata))
             assert len(siglist) == 4
-            ksizes = set([ x.minhash.ksize for x in siglist ])
+            ksizes = set([x.minhash.ksize for x in siglist])
             assert 21 in ksizes
             assert 30 in ksizes
             assert 7 in ksizes
             assert 10 in ksizes
-            assert sum(x.minhash.moltype == 'DNA' for x in siglist) == 2
-            assert sum(x.minhash.moltype == 'dayhoff' for x in siglist) == 2
+            assert sum(x.minhash.moltype == "DNA" for x in siglist) == 2
+            assert sum(x.minhash.moltype == "dayhoff" for x in siglist) == 2
             assert len(ksizes) == 4
 
 
 def test_do_sourmash_compute_multik_with_hp():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21,30',
-                                            '--hp', '--no-dna',
-                                            testdata1],
-                                           in_directory=location)
-        assert 'Computing only hp-encoded protein (and not nucleotide) ' \
-               'signatures.' in err
-        outfile = os.path.join(location, 'short.fa.sig')
+        testdata1 = utils.get_test_data("short.fa")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "21,30", "--hp", "--no-dna", testdata1],
+            in_directory=location,
+        )
+        assert (
+            "Computing only hp-encoded protein (and not nucleotide) "
+            "signatures." in err
+        )
+        outfile = os.path.join(location, "short.fa.sig")
         assert os.path.exists(outfile)
 
-        with open(outfile, 'rt') as fp:
+        with open(outfile) as fp:
             sigdata = fp.read()
             siglist = list(signature.load_signatures(sigdata))
             assert len(siglist) == 2
-            ksizes = set([ x.minhash.ksize for x in siglist ])
+            ksizes = set([x.minhash.ksize for x in siglist])
             assert 7 in ksizes
             assert 10 in ksizes
             assert all(x.minhash.hp for x in siglist)
@@ -383,20 +469,20 @@ def test_do_sourmash_compute_multik_with_hp():
 
 def test_do_sourmash_compute_multik_with_hp_and_dna():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21,30',
-                                            '--hp',
-                                            testdata1],
-                                           in_directory=location)
-        outfile = os.path.join(location, 'short.fa.sig')
+        testdata1 = utils.get_test_data("short.fa")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "21,30", "--hp", testdata1],
+            in_directory=location,
+        )
+        outfile = os.path.join(location, "short.fa.sig")
         assert os.path.exists(outfile)
 
-        with open(outfile, 'rt') as fp:
+        with open(outfile) as fp:
             sigdata = fp.read()
             siglist = list(signature.load_signatures(sigdata))
             assert len(siglist) == 4
-            ksizes = set([ x.minhash.ksize for x in siglist ])
+            ksizes = set([x.minhash.ksize for x in siglist])
             assert 7 in ksizes
             assert 10 in ksizes
             assert 21 in ksizes
@@ -406,99 +492,98 @@ def test_do_sourmash_compute_multik_with_hp_and_dna():
 
 def test_do_sourmash_compute_multik_with_dayhoff_dna_protein():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21,30',
-                                            '--dayhoff', '--protein',
-                                            testdata1],
-                                           in_directory=location)
-        outfile = os.path.join(location, 'short.fa.sig')
+        testdata1 = utils.get_test_data("short.fa")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "21,30", "--dayhoff", "--protein", testdata1],
+            in_directory=location,
+        )
+        outfile = os.path.join(location, "short.fa.sig")
         assert os.path.exists(outfile)
 
-        with open(outfile, 'rt') as fp:
+        with open(outfile) as fp:
             sigdata = fp.read()
             siglist = list(signature.load_signatures(sigdata))
             assert len(siglist) == 6
-            ksizes = set([ x.minhash.ksize for x in siglist ])
+            ksizes = set([x.minhash.ksize for x in siglist])
             assert 21 in ksizes
             assert 30 in ksizes
             assert 7 in ksizes
             assert 10 in ksizes
-            assert sum(x.minhash.moltype == 'DNA' for x in siglist) == 2
-            assert sum(x.minhash.moltype == 'dayhoff' for x in siglist) == 2
-            assert sum(x.minhash.moltype == 'protein' for x in siglist) == 2
+            assert sum(x.minhash.moltype == "DNA" for x in siglist) == 2
+            assert sum(x.minhash.moltype == "dayhoff" for x in siglist) == 2
+            assert sum(x.minhash.moltype == "protein" for x in siglist) == 2
             assert len(ksizes) == 4
 
 
 def test_do_sourmash_compute_multik_with_dayhoff_hp_dna_protein():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21,30',
-                                            '--dayhoff', '--hp', '--protein',
-                                            testdata1],
-                                           in_directory=location)
-        outfile = os.path.join(location, 'short.fa.sig')
+        testdata1 = utils.get_test_data("short.fa")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "21,30", "--dayhoff", "--hp", "--protein", testdata1],
+            in_directory=location,
+        )
+        outfile = os.path.join(location, "short.fa.sig")
         assert os.path.exists(outfile)
 
-        with open(outfile, 'rt') as fp:
+        with open(outfile) as fp:
             sigdata = fp.read()
             siglist = list(signature.load_signatures(sigdata))
             assert len(siglist) == 8
-            ksizes = set([ x.minhash.ksize for x in siglist ])
+            ksizes = set([x.minhash.ksize for x in siglist])
             assert 7 in ksizes
             assert 10 in ksizes
             assert 21 in ksizes
             assert 30 in ksizes
-            assert sum(x.minhash.moltype == 'DNA' for x in siglist) == 2
-            assert sum(x.minhash.moltype == 'dayhoff' for x in siglist) == 2
-            assert sum(x.minhash.moltype == 'hp' for x in siglist) == 2
+            assert sum(x.minhash.moltype == "DNA" for x in siglist) == 2
+            assert sum(x.minhash.moltype == "dayhoff" for x in siglist) == 2
+            assert sum(x.minhash.moltype == "hp" for x in siglist) == 2
             # 2 = dayhoff, 2 = hp = 4 protein
-            assert sum(x.minhash.moltype == 'protein' for x in siglist) == 2
+            assert sum(x.minhash.moltype == "protein" for x in siglist) == 2
             assert len(ksizes) == 4
 
 
 def test_do_sourmash_compute_multik_with_nothing():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21,31',
-                                            '--no-protein', '--no-dna',
-                                            testdata1],
-                                           in_directory=location,
-                                           fail_ok=True)
-        outfile = os.path.join(location, 'short.fa.sig')
+        testdata1 = utils.get_test_data("short.fa")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "21,31", "--no-protein", "--no-dna", testdata1],
+            in_directory=location,
+            fail_ok=True,
+        )
+        outfile = os.path.join(location, "short.fa.sig")
         assert not os.path.exists(outfile)
 
 
 def test_do_sourmash_compute_multik_protein_bad_ksize():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '20,32',
-                                            '--protein', '--no-dna',
-                                            testdata1],
-                                           in_directory=location,
-                                           fail_ok=True)
-        outfile = os.path.join(location, 'short.fa.sig')
+        testdata1 = utils.get_test_data("short.fa")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "20,32", "--protein", "--no-dna", testdata1],
+            in_directory=location,
+            fail_ok=True,
+        )
+        outfile = os.path.join(location, "short.fa.sig")
         assert not os.path.exists(outfile)
-        assert 'protein ksizes must be divisible by 3' in err
+        assert "protein ksizes must be divisible by 3" in err
 
 
 @utils.in_tempdir
 def test_do_sourmash_compute_multik_only_protein(c):
     # check sourmash compute with only protein, no nucl
-    testdata1 = utils.get_test_data('short.fa')
-    c.run_sourmash('compute', '-k', '21,30',
-                   '--protein', '--no-dna', testdata1)
-    outfile = os.path.join(c.location, 'short.fa.sig')
+    testdata1 = utils.get_test_data("short.fa")
+    c.run_sourmash("compute", "-k", "21,30", "--protein", "--no-dna", testdata1)
+    outfile = os.path.join(c.location, "short.fa.sig")
     assert os.path.exists(outfile)
 
-    with open(outfile, 'rt') as fp:
+    with open(outfile) as fp:
         sigdata = fp.read()
         siglist = list(signature.load_signatures(sigdata))
         assert len(siglist) == 2
-        ksizes = set([ x.minhash.ksize for x in siglist ])
+        ksizes = set([x.minhash.ksize for x in siglist])
         assert 7 in ksizes
         assert 10 in ksizes
         assert len(ksizes) == 2
@@ -506,34 +591,40 @@ def test_do_sourmash_compute_multik_only_protein(c):
 
 def test_do_sourmash_compute_multik_protein_input_bad_ksize():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short-protein.fa')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '20,32',
-                                            '--protein', '--no-dna',
-                                            '--input-is-protein',
-                                            testdata1],
-                                           in_directory=location,
-                                           fail_ok=True)
-        outfile = os.path.join(location, 'short-protein.fa.sig')
+        testdata1 = utils.get_test_data("short-protein.fa")
+        status, out, err = utils.runscript(
+            "sourmash",
+            [
+                "compute",
+                "-k",
+                "20,32",
+                "--protein",
+                "--no-dna",
+                "--input-is-protein",
+                testdata1,
+            ],
+            in_directory=location,
+            fail_ok=True,
+        )
+        os.path.join(location, "short-protein.fa.sig")
         assert status != 0
-        assert 'protein ksizes must be divisible by 3' in err
+        assert "protein ksizes must be divisible by 3" in err
 
 
 @utils.in_tempdir
 def test_do_sourmash_compute_multik_only_protein_no_rna(c):
     # test --no-rna as well (otherwise identical to previous test)
-    testdata1 = utils.get_test_data('short.fa')
+    testdata1 = utils.get_test_data("short.fa")
 
-    c.run_sourmash('compute', '-k', '21,30',
-                   '--protein', '--no-rna', testdata1)
-    outfile = os.path.join(c.location, 'short.fa.sig')
+    c.run_sourmash("compute", "-k", "21,30", "--protein", "--no-rna", testdata1)
+    outfile = os.path.join(c.location, "short.fa.sig")
     assert os.path.exists(outfile)
 
-    with open(outfile, 'rt') as fp:
+    with open(outfile) as fp:
         sigdata = fp.read()
         siglist = list(signature.load_signatures(sigdata))
         assert len(siglist) == 2
-        ksizes = set([ x.minhash.ksize for x in siglist ])
+        ksizes = set([x.minhash.ksize for x in siglist])
         assert 7 in ksizes
         assert 10 in ksizes
         assert len(ksizes) == 2
@@ -542,20 +633,20 @@ def test_do_sourmash_compute_multik_only_protein_no_rna(c):
 def test_do_sourmash_compute_protein_bad_sequences():
     """Proper error handling when Ns in dna sequence"""
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.bad.fa')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21,30',
-                                            '--protein', '--no-dna',
-                                            testdata1],
-                                           in_directory=location)
-        outfile = os.path.join(location, 'short.bad.fa.sig')
+        testdata1 = utils.get_test_data("short.bad.fa")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "21,30", "--protein", "--no-dna", testdata1],
+            in_directory=location,
+        )
+        outfile = os.path.join(location, "short.bad.fa.sig")
         assert os.path.exists(outfile)
 
-        with open(outfile, 'rt') as fp:
+        with open(outfile) as fp:
             sigdata = fp.read()
             siglist = list(signature.load_signatures(sigdata))
             assert len(siglist) == 2
-            ksizes = set([ x.minhash.ksize for x in siglist ])
+            ksizes = set([x.minhash.ksize for x in siglist])
             assert 7 in ksizes
             assert 10 in ksizes
             assert len(ksizes) == 2
@@ -563,178 +654,176 @@ def test_do_sourmash_compute_protein_bad_sequences():
 
 def test_do_sourmash_compute_multik_input_is_protein():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('ecoli.faa')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21,30',
-                                            '--input-is-protein',
-                                            testdata1],
-                                           in_directory=location)
-        outfile = os.path.join(location, 'ecoli.faa.sig')
+        testdata1 = utils.get_test_data("ecoli.faa")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "21,30", "--input-is-protein", testdata1],
+            in_directory=location,
+        )
+        outfile = os.path.join(location, "ecoli.faa.sig")
         assert os.path.exists(outfile)
 
-        with open(outfile, 'rt') as fp:
+        with open(outfile) as fp:
             sigdata = fp.read()
             siglist = list(signature.load_signatures(sigdata))
             assert len(siglist) == 2
-            ksizes = set([ x.minhash.ksize for x in siglist ])
+            ksizes = set([x.minhash.ksize for x in siglist])
             assert 7 in ksizes
             assert 10 in ksizes
             assert len(ksizes) == 2
 
-            moltype = set([ x.minhash.moltype == 'protein'
-                            for x in siglist ])
+            moltype = set([x.minhash.moltype == "protein" for x in siglist])
             assert len(moltype) == 1
             assert True in moltype
 
 
 def test_do_sourmash_compute_multik_outfile():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        outfile = os.path.join(location, 'FOO.xxx')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21,31',
-                                            testdata1, '-o', outfile],
-                                           in_directory=location)
+        testdata1 = utils.get_test_data("short.fa")
+        outfile = os.path.join(location, "FOO.xxx")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "21,31", testdata1, "-o", outfile],
+            in_directory=location,
+        )
         assert os.path.exists(outfile)
 
         siglist = list(signature.load_signatures(outfile))
         assert len(siglist) == 2
-        ksizes = set([ x.minhash.ksize for x in siglist ])
+        ksizes = set([x.minhash.ksize for x in siglist])
         assert 21 in ksizes
         assert 31 in ksizes
 
 
 def test_do_sourmash_compute_with_scaled_1():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        outfile = os.path.join(location, 'FOO.xxx')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21,31',
-                                            '--scaled', '1',
-                                            testdata1, '-o', outfile],
-                                            in_directory=location)
+        testdata1 = utils.get_test_data("short.fa")
+        outfile = os.path.join(location, "FOO.xxx")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "21,31", "--scaled", "1", testdata1, "-o", outfile],
+            in_directory=location,
+        )
         assert os.path.exists(outfile)
 
         siglist = list(signature.load_signatures(outfile))
         assert len(siglist) == 2
 
-        scaled_vals = [ x.minhash.scaled for x in siglist ]
+        scaled_vals = [x.minhash.scaled for x in siglist]
         assert len(scaled_vals) == 2
-        assert set(scaled_vals) == { 1 }
+        assert set(scaled_vals) == {1}
 
 
 def test_do_sourmash_compute_with_scaled_2():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        outfile = os.path.join(location, 'FOO.xxx')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21,31',
-                                            '--scaled', '2',
-                                            testdata1, '-o', outfile],
-                                            in_directory=location)
+        testdata1 = utils.get_test_data("short.fa")
+        outfile = os.path.join(location, "FOO.xxx")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "21,31", "--scaled", "2", testdata1, "-o", outfile],
+            in_directory=location,
+        )
         assert os.path.exists(outfile)
 
         siglist = list(signature.load_signatures(outfile))
         assert len(siglist) == 2
 
-        max_hashes = [ x.minhash._max_hash for x in siglist ]
+        max_hashes = [x.minhash._max_hash for x in siglist]
         assert len(max_hashes) == 2
-        assert set(max_hashes) == set([ int(2**64 /2.) ])
+        assert set(max_hashes) == set([int(2**64 / 2.0)])
 
 
 def test_do_sourmash_compute_with_scaled():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        outfile = os.path.join(location, 'FOO.xxx')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21,31',
-                                            '--scaled', '100',
-                                            testdata1, '-o', outfile],
-                                            in_directory=location)
+        testdata1 = utils.get_test_data("short.fa")
+        outfile = os.path.join(location, "FOO.xxx")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "21,31", "--scaled", "100", testdata1, "-o", outfile],
+            in_directory=location,
+        )
         assert os.path.exists(outfile)
 
         siglist = list(signature.load_signatures(outfile))
         assert len(siglist) == 2
 
-        max_hashes = [ x.minhash._max_hash for x in siglist ]
+        max_hashes = [x.minhash._max_hash for x in siglist]
         assert len(max_hashes) == 2
-        assert set(max_hashes) == set([ int(2**64 /100.) ])
+        assert set(max_hashes) == set([int(2**64 / 100.0)])
 
 
 def test_do_sourmash_compute_with_bad_scaled():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        outfile = os.path.join(location, 'FOO.xxx')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21,31',
-                                            '--scaled', '-1',
-                                            testdata1, '-o', outfile],
-                                            in_directory=location,
-                                            fail_ok=True)
+        testdata1 = utils.get_test_data("short.fa")
+        outfile = os.path.join(location, "FOO.xxx")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "21,31", "--scaled", "-1", testdata1, "-o", outfile],
+            in_directory=location,
+            fail_ok=True,
+        )
 
         assert status != 0
-        assert '--scaled value must be >= 1' in err
+        assert "--scaled value must be >= 1" in err
 
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21,31',
-                                            '--scaled', '1000.5',
-                                            testdata1, '-o', outfile],
-                                            in_directory=location,
-                                            fail_ok=True)
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "21,31", "--scaled", "1000.5", testdata1, "-o", outfile],
+            in_directory=location,
+            fail_ok=True,
+        )
 
         assert status != 0
-        assert '--scaled value must be integer value' in err
+        assert "--scaled value must be integer value" in err
 
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21,31',
-                                            '--scaled', '1e9',
-                                            testdata1, '-o', outfile],
-                                            in_directory=location)
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "21,31", "--scaled", "1e9", testdata1, "-o", outfile],
+            in_directory=location,
+        )
 
         assert status == 0
-        assert 'WARNING: scaled value is nonsensical!?' in err
+        assert "WARNING: scaled value is nonsensical!?" in err
 
 
 def test_do_sourmash_compute_with_seed():
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('short.fa')
-        outfile = os.path.join(location, 'FOO.xxx')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21,31',
-                                            '--seed', '43',
-                                            testdata1, '-o', outfile],
-                                            in_directory=location)
+        testdata1 = utils.get_test_data("short.fa")
+        outfile = os.path.join(location, "FOO.xxx")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "21,31", "--seed", "43", testdata1, "-o", outfile],
+            in_directory=location,
+        )
         assert os.path.exists(outfile)
 
         siglist = list(signature.load_signatures(outfile))
         assert len(siglist) == 2
 
-        seeds = [ x.minhash.seed for x in siglist ]
+        seeds = [x.minhash.seed for x in siglist]
         assert len(seeds) == 2
-        assert set(seeds) == set([ 43 ])
+        assert set(seeds) == set([43])
 
 
 def test_do_sourmash_check_protein_comparisons():
     # this test checks 2 x 2 protein comparisons with E. coli genes.
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('ecoli.faa')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21',
-                                            '--input-is-protein',
-                                            '--singleton',
-                                            testdata1],
-                                           in_directory=location)
-        sig1 = os.path.join(location, 'ecoli.faa.sig')
+        testdata1 = utils.get_test_data("ecoli.faa")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "21", "--input-is-protein", "--singleton", testdata1],
+            in_directory=location,
+        )
+        sig1 = os.path.join(location, "ecoli.faa.sig")
         assert os.path.exists(sig1)
 
-        testdata2 = utils.get_test_data('ecoli.genes.fna')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21',
-                                            '--protein', '--no-dna',
-                                            '--singleton',
-                                            testdata2],
-                                           in_directory=location)
-        sig2 = os.path.join(location, 'ecoli.genes.fna.sig')
+        testdata2 = utils.get_test_data("ecoli.genes.fna")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "21", "--protein", "--no-dna", "--singleton", testdata2],
+            in_directory=location,
+        )
+        sig2 = os.path.join(location, "ecoli.genes.fna.sig")
         assert os.path.exists(sig2)
 
         # I'm not sure why load_signatures is randomizing order, but ok.
@@ -745,13 +834,13 @@ def test_do_sourmash_check_protein_comparisons():
         sig1_trans, sig2_trans = sorted(x, key=lambda x: x.name)
 
         name1 = sig1_aa.name.split()[0]
-        assert name1 == 'NP_414543.1'
+        assert name1 == "NP_414543.1"
         name2 = sig2_aa.name.split()[0]
-        assert name2 == 'NP_414544.1'
+        assert name2 == "NP_414544.1"
         name3 = sig1_trans.name.split()[0]
-        assert name3 == 'gi|556503834:2801-3733'
+        assert name3 == "gi|556503834:2801-3733"
         name4 = sig2_trans.name.split()[0]
-        assert name4 == 'gi|556503834:337-2799'
+        assert name4 == "gi|556503834:337-2799"
 
         print(name1, name3, round(sig1_aa.similarity(sig1_trans), 3))
         print(name2, name3, round(sig2_aa.similarity(sig1_trans), 3))
@@ -768,11 +857,9 @@ def test_do_sourmash_check_protein_comparisons():
 def test_do_sourmash_check_knowngood_dna_comparisons(c):
     # this test checks against a known good signature calculated
     # by utils/compute-dna-mh-another-way.py
-    testdata1 = utils.get_test_data('ecoli.genes.fna')
-    c.run_sourmash('compute', '-k', '21',
-                   '--singleton', '--dna',
-                   testdata1)
-    sig1 = c.output('ecoli.genes.fna.sig')
+    testdata1 = utils.get_test_data("ecoli.genes.fna")
+    c.run_sourmash("compute", "-k", "21", "--singleton", "--dna", testdata1)
+    sig1 = c.output("ecoli.genes.fna.sig")
     assert os.path.exists(sig1)
 
     x = list(signature.load_signatures(sig1))
@@ -780,7 +867,7 @@ def test_do_sourmash_check_knowngood_dna_comparisons(c):
     print(sig1.name)
     print(sig2.name)
 
-    knowngood = utils.get_test_data('benchmark.dna.sig')
+    knowngood = utils.get_test_data("benchmark.dna.sig")
     good = list(signature.load_signatures(knowngood))[0]
 
     assert sig2.similarity(good) == 1.0
@@ -789,16 +876,15 @@ def test_do_sourmash_check_knowngood_dna_comparisons(c):
 @utils.in_tempdir
 def test_do_sourmash_check_knowngood_dna_comparisons_use_rna(c):
     # check the --rna flag; otherwise identical to previous test.
-    testdata1 = utils.get_test_data('ecoli.genes.fna')
-    c.run_sourmash('compute', '-k', '21', '--singleton', '--rna',
-                   testdata1)
-    sig1 = c.output('ecoli.genes.fna.sig')
+    testdata1 = utils.get_test_data("ecoli.genes.fna")
+    c.run_sourmash("compute", "-k", "21", "--singleton", "--rna", testdata1)
+    sig1 = c.output("ecoli.genes.fna.sig")
     assert os.path.exists(sig1)
 
     x = list(signature.load_signatures(sig1))
     sig1, sig2 = sorted(x, key=lambda x: x.name)
 
-    knowngood = utils.get_test_data('benchmark.dna.sig')
+    knowngood = utils.get_test_data("benchmark.dna.sig")
     good = list(signature.load_signatures(knowngood))[0]
 
     assert sig2.similarity(good) == 1.0
@@ -808,20 +894,19 @@ def test_do_sourmash_check_knowngood_input_protein_comparisons():
     # this test checks against a known good signature calculated
     # by utils/compute-input-prot-another-way.py
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('ecoli.faa')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21',
-                                            '--input-is-protein',
-                                            '--singleton',
-                                            testdata1],
-                                           in_directory=location)
-        sig1 = os.path.join(location, 'ecoli.faa.sig')
+        testdata1 = utils.get_test_data("ecoli.faa")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "21", "--input-is-protein", "--singleton", testdata1],
+            in_directory=location,
+        )
+        sig1 = os.path.join(location, "ecoli.faa.sig")
         assert os.path.exists(sig1)
 
         x = list(signature.load_signatures(sig1))
         sig1_aa, sig2_aa = sorted(x, key=lambda x: x.name)
 
-        knowngood = utils.get_test_data('benchmark.input_prot.sig')
+        knowngood = utils.get_test_data("benchmark.input_prot.sig")
         good_aa = list(signature.load_signatures(knowngood))[0]
 
         assert sig1_aa.similarity(good_aa) == 1.0
@@ -831,29 +916,36 @@ def test_do_sourmash_check_knowngood_protein_comparisons():
     # this test checks against a known good signature calculated
     # by utils/compute-prot-mh-another-way.py
     with utils.TempDirectory() as location:
-        testdata1 = utils.get_test_data('ecoli.genes.fna')
-        status, out, err = utils.runscript('sourmash',
-                                           ['compute', '-k', '21',
-                                            '--singleton', '--protein',
-                                            '--no-dna',
-                                            testdata1],
-                                           in_directory=location)
-        sig1 = os.path.join(location, 'ecoli.genes.fna.sig')
+        testdata1 = utils.get_test_data("ecoli.genes.fna")
+        status, out, err = utils.runscript(
+            "sourmash",
+            ["compute", "-k", "21", "--singleton", "--protein", "--no-dna", testdata1],
+            in_directory=location,
+        )
+        sig1 = os.path.join(location, "ecoli.genes.fna.sig")
         assert os.path.exists(sig1)
 
         x = list(signature.load_signatures(sig1))
         sig1_trans, sig2_trans = sorted(x, key=lambda x: x.name)
 
-        knowngood = utils.get_test_data('benchmark.prot.sig')
+        knowngood = utils.get_test_data("benchmark.prot.sig")
         good_trans = list(signature.load_signatures(knowngood))[0]
 
         assert sig2_trans.similarity(good_trans) == 1.0
 
 
 def test_compute_parameters():
-    args_list = ["compute", "-k", "21,31", "--singleton", "--protein", "--no-dna", "input_file"]
-
-    parser = SourmashParser(prog='sourmash')
+    args_list = [
+        "compute",
+        "-k",
+        "21,31",
+        "--singleton",
+        "--protein",
+        "--no-dna",
+        "input_file",
+    ]
+
+    parser = SourmashParser(prog="sourmash")
     subp = parser.add_subparsers(title="instruction", dest="cmd", metavar="cmd")
     subparser(subp)
 
diff --git a/tests/test_sourmash_sketch.py b/tests/test_sourmash_sketch.py
index 15925cb344..5c06ace5f2 100644
--- a/tests/test_sourmash_sketch.py
+++ b/tests/test_sourmash_sketch.py
@@ -9,7 +9,6 @@
 import json
 import csv
 import pytest
-import screed
 
 import sourmash_tst_utils as utils
 import sourmash
@@ -31,55 +30,67 @@
 
 
 def test_do_sourmash_sketch_check_scaled_bounds_negative(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
+    testdata1 = utils.get_test_data("short.fa")
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sketch', 'translate', '-p', 'scaled=-5', testdata1)
+        runtmp.sourmash("sketch", "translate", "-p", "scaled=-5", testdata1)
     assert "ERROR: scaled value must be positive" in runtmp.last_result.err
 
 
 def test_do_sourmash_sketch_check_scaled_bounds_less_than_minimum(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    runtmp.sourmash('sketch', 'translate', '-p', 'scaled=50', testdata1)
-    assert "WARNING: scaled value should be >= 100. Continuing anyway." in runtmp.last_result.err
+    testdata1 = utils.get_test_data("short.fa")
+    runtmp.sourmash("sketch", "translate", "-p", "scaled=50", testdata1)
+    assert (
+        "WARNING: scaled value should be >= 100. Continuing anyway."
+        in runtmp.last_result.err
+    )
 
 
 def test_do_sourmash_sketch_check_scaled_bounds_more_than_maximum(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    runtmp.sourmash('sketch', 'translate', '-p', 'scaled=1000000000', testdata1)
-    assert "WARNING: scaled value should be <= 1e6. Continuing anyway." in runtmp.last_result.err
+    testdata1 = utils.get_test_data("short.fa")
+    runtmp.sourmash("sketch", "translate", "-p", "scaled=1000000000", testdata1)
+    assert (
+        "WARNING: scaled value should be <= 1e6. Continuing anyway."
+        in runtmp.last_result.err
+    )
 
 
 def test_do_sourmash_sketch_check_num_bounds_negative(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
+    testdata1 = utils.get_test_data("short.fa")
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sketch', 'translate', '-p', 'num=-5', testdata1)
+        runtmp.sourmash("sketch", "translate", "-p", "num=-5", testdata1)
     assert "ERROR: num value must be positive" in runtmp.last_result.err
 
 
 def test_do_sourmash_sketch_check_num_bounds_less_than_minimum(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    runtmp.sourmash('sketch', 'translate', '-p', 'num=25', testdata1)
-    assert "WARNING: num value should be >= 50. Continuing anyway." in runtmp.last_result.err
+    testdata1 = utils.get_test_data("short.fa")
+    runtmp.sourmash("sketch", "translate", "-p", "num=25", testdata1)
+    assert (
+        "WARNING: num value should be >= 50. Continuing anyway."
+        in runtmp.last_result.err
+    )
 
 
 def test_do_sourmash_sketch_check_num_bounds_more_than_maximum(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    runtmp.sourmash('sketch', 'translate', '-p', 'num=100000', testdata1)
-    assert "WARNING: num value should be <= 50000. Continuing anyway." in runtmp.last_result.err
+    testdata1 = utils.get_test_data("short.fa")
+    runtmp.sourmash("sketch", "translate", "-p", "num=100000", testdata1)
+    assert (
+        "WARNING: num value should be <= 50000. Continuing anyway."
+        in runtmp.last_result.err
+    )
 
 
 def test_empty_factory():
     with pytest.raises(ValueError):
-        factory = _signatures_for_sketch_factory([], None)
+        _signatures_for_sketch_factory([], None)
 
 
 def test_no_default_moltype_factory_nonempty():
     with pytest.raises(ValueError):
-        factory = _signatures_for_sketch_factory(["k=31"], None)
+        _signatures_for_sketch_factory(["k=31"], None)
 
 
 def test_factory_no_default_moltype_dna():
-    factory = _signatures_for_sketch_factory(['dna'], None)
+    factory = _signatures_for_sketch_factory(["dna"], None)
     params_list = list(factory.get_compute_params())
     assert len(params_list) == 1
 
@@ -88,7 +99,7 @@ def test_factory_no_default_moltype_dna():
 
 
 def test_factory_no_default_moltype_protein():
-    factory = _signatures_for_sketch_factory(['protein'], None)
+    factory = _signatures_for_sketch_factory(["protein"], None)
     params_list = list(factory.get_compute_params())
     assert len(params_list) == 1
 
@@ -97,16 +108,16 @@ def test_factory_no_default_moltype_protein():
 
 
 def test_factory_dna_nosplit():
-    factory = _signatures_for_sketch_factory(['k=31,k=51'], 'dna')
+    factory = _signatures_for_sketch_factory(["k=31,k=51"], "dna")
     params_list = list(factory.get_compute_params(split_ksizes=False))
     assert len(params_list) == 1
 
     params = params_list[0]
-    assert params.ksizes == [31,51]
+    assert params.ksizes == [31, 51]
 
 
 def test_factory_dna_split():
-    factory = _signatures_for_sketch_factory(['k=31,k=51'], 'dna')
+    factory = _signatures_for_sketch_factory(["k=31,k=51"], "dna")
     params_list = list(factory.get_compute_params(split_ksizes=True))
     assert len(params_list) == 2
 
@@ -117,7 +128,7 @@ def test_factory_dna_split():
 
 
 def test_factory_protein_nosplit():
-    factory = _signatures_for_sketch_factory(['k=10,k=9'], 'protein')
+    factory = _signatures_for_sketch_factory(["k=10,k=9"], "protein")
     params_list = list(factory.get_compute_params(split_ksizes=False))
     assert len(params_list) == 1
 
@@ -126,7 +137,7 @@ def test_factory_protein_nosplit():
 
 
 def test_factory_protein_split():
-    factory = _signatures_for_sketch_factory(['k=10,k=9'], 'protein')
+    factory = _signatures_for_sketch_factory(["k=10,k=9"], "protein")
     params_list = list(factory.get_compute_params(split_ksizes=True))
     assert len(params_list) == 2
 
@@ -137,12 +148,12 @@ def test_factory_protein_split():
 
 
 def test_factory_dna_equal():
-    factory1 = _signatures_for_sketch_factory(['dna'], None)
+    factory1 = _signatures_for_sketch_factory(["dna"], None)
     params_list1 = list(factory1.get_compute_params())
     assert len(params_list1) == 1
     params1 = params_list1[0]
 
-    factory2 = _signatures_for_sketch_factory([], 'dna')
+    factory2 = _signatures_for_sketch_factory([], "dna")
     params_list2 = list(factory2.get_compute_params())
     assert len(params_list2) == 1
     params2 = params_list2[0]
@@ -152,12 +163,12 @@ def test_factory_dna_equal():
 
 
 def test_factory_protein_equal():
-    factory1 = _signatures_for_sketch_factory(['protein'], None)
+    factory1 = _signatures_for_sketch_factory(["protein"], None)
     params_list1 = list(factory1.get_compute_params())
     assert len(params_list1) == 1
     params1 = params_list1[0]
 
-    factory2 = _signatures_for_sketch_factory([], 'protein')
+    factory2 = _signatures_for_sketch_factory([], "protein")
     params_list2 = list(factory2.get_compute_params())
     assert len(params_list2) == 1
     params2 = params_list2[0]
@@ -167,12 +178,12 @@ def test_factory_protein_equal():
 
 
 def test_factory_dna_multi_ksize_eq():
-    factory1 = _signatures_for_sketch_factory(['k=21,k=31,dna'], None)
+    factory1 = _signatures_for_sketch_factory(["k=21,k=31,dna"], None)
     params_list1 = list(factory1.get_compute_params())
     assert len(params_list1) == 1
     params1 = params_list1[0]
 
-    factory2 = _signatures_for_sketch_factory(['k=21,k=31'], 'dna')
+    factory2 = _signatures_for_sketch_factory(["k=21,k=31"], "dna")
     params_list2 = list(factory2.get_compute_params())
     assert len(params_list2) == 1
     params2 = params_list2[0]
@@ -182,12 +193,12 @@ def test_factory_dna_multi_ksize_eq():
 
 
 def test_factory_protein_multi_ksize_eq():
-    factory1 = _signatures_for_sketch_factory(['k=10,k=11,protein'], None)
+    factory1 = _signatures_for_sketch_factory(["k=10,k=11,protein"], None)
     params_list1 = list(factory1.get_compute_params())
     assert len(params_list1) == 1
     params1 = params_list1[0]
 
-    factory2 = _signatures_for_sketch_factory(['k=10,k=11'], 'protein')
+    factory2 = _signatures_for_sketch_factory(["k=10,k=11"], "protein")
     params_list2 = list(factory2.get_compute_params())
     assert len(params_list2) == 1
     params2 = params_list2[0]
@@ -197,7 +208,7 @@ def test_factory_protein_multi_ksize_eq():
 
 
 def test_dna_defaults():
-    factory = _signatures_for_sketch_factory([], 'dna')
+    factory = _signatures_for_sketch_factory([], "dna")
     params_list = list(factory.get_compute_params())
 
     assert len(params_list) == 1
@@ -219,13 +230,13 @@ def test_dna_defaults():
 
 
 def test_dna_multiple_ksize():
-    factory = _signatures_for_sketch_factory(['k=21,k=31,k=51'], 'dna')
+    factory = _signatures_for_sketch_factory(["k=21,k=31,k=51"], "dna")
     params_list = list(factory.get_compute_params())
 
     assert len(params_list) == 1
     params = params_list[0]
 
-    assert params.ksizes == [21,31,51]
+    assert params.ksizes == [21, 31, 51]
     assert params.num_hashes == 0
     assert params.scaled == 1000
     assert not params.track_abundance
@@ -246,8 +257,7 @@ def test_dna_multiple_ksize():
 
 
 def test_dna_override_1():
-    factory = _signatures_for_sketch_factory(['k=21,scaled=2000,abund'],
-                                             'dna')
+    factory = _signatures_for_sketch_factory(["k=21,scaled=2000,abund"], "dna")
     params_list = list(factory.get_compute_params())
 
     assert len(params_list) == 1
@@ -266,48 +276,47 @@ def test_dna_override_1():
 
 def test_scaled_param_requires_equal():
     with pytest.raises(ValueError):
-        factory = _signatures_for_sketch_factory(['k=21,scaled'], 'dna')
+        _signatures_for_sketch_factory(["k=21,scaled"], "dna")
 
 
 def test_k_param_requires_equal():
     with pytest.raises(ValueError):
-        factory = _signatures_for_sketch_factory(['k'], 'dna')
+        _signatures_for_sketch_factory(["k"], "dna")
 
 
 def test_k_param_requires_equal_2():
-    with pytest.raises(ValueError) as exc:
-        factory = _signatures_for_sketch_factory(['k='], 'dna')
+    with pytest.raises(ValueError):
+        _signatures_for_sketch_factory(["k="], "dna")
 
 
 def test_seed_param_requires_equal():
-    with pytest.raises(ValueError) as exc:
-        factory = _signatures_for_sketch_factory(['seed='], 'dna')
+    with pytest.raises(ValueError):
+        _signatures_for_sketch_factory(["seed="], "dna")
 
 
 def test_num_param_requires_equal():
-    with pytest.raises(ValueError) as exc:
-        factory = _signatures_for_sketch_factory(['num='], 'dna')
+    with pytest.raises(ValueError):
+        _signatures_for_sketch_factory(["num="], "dna")
 
 
 def test_dna_override_bad_1():
     with pytest.raises(ValueError):
-        factory = _signatures_for_sketch_factory(['k=21,scaledFOO=2000,abund'],
-                                                 'dna')
+        _signatures_for_sketch_factory(["k=21,scaledFOO=2000,abund"], "dna")
 
 
 def test_dna_override_bad_2():
     with pytest.raises(ValueError):
-        factory = _signatures_for_sketch_factory(['k=21,protein'], 'dna')
+        _signatures_for_sketch_factory(["k=21,protein"], "dna")
 
 
 def test_protein_defaults():
-    factory = _signatures_for_sketch_factory([], 'protein')
+    factory = _signatures_for_sketch_factory([], "protein")
     params_list = list(factory.get_compute_params())
 
     assert len(params_list) == 1
     params = params_list[0]
 
-    assert params.ksizes == [30]          # x3 for now
+    assert params.ksizes == [30]  # x3 for now
     assert params.num_hashes == 0
     assert params.scaled == 200
     assert not params.track_abundance
@@ -320,14 +329,15 @@ def test_protein_defaults():
 
 def test_protein_override_bad_2():
     with pytest.raises(ValueError):
-        factory = _signatures_for_sketch_factory(['k=21,dna'], 'protein')
+        _signatures_for_sketch_factory(["k=21,dna"], "protein")
+
 
 def test_protein_override_bad_rust_foo():
     # mimic 'sourmash sketch protein -p dna'
-    factory = _signatures_for_sketch_factory([], 'protein')
+    factory = _signatures_for_sketch_factory([], "protein")
 
     # reach in and avoid error checking to construct a bad params_list.
-    factory.params_list = [('dna', {})]
+    factory.params_list = [("dna", {})]
 
     # now, get sigs...
     siglist = factory()
@@ -335,7 +345,7 @@ def test_protein_override_bad_rust_foo():
     sig = siglist[0]
 
     # try adding something
-    testdata1 = utils.get_test_data('ecoli.faa')
+    testdata1 = utils.get_test_data("ecoli.faa")
     with screed.open(testdata1) as f:
         record = next(iter(f))
 
@@ -346,13 +356,13 @@ def test_protein_override_bad_rust_foo():
 
 
 def test_dayhoff_defaults():
-    factory = _signatures_for_sketch_factory([], 'dayhoff')
+    factory = _signatures_for_sketch_factory([], "dayhoff")
     params_list = list(factory.get_compute_params())
 
     assert len(params_list) == 1
     params = params_list[0]
 
-    assert params.ksizes == [48]          # x3 for now
+    assert params.ksizes == [48]  # x3 for now
     assert params.num_hashes == 0
     assert params.scaled == 200
     assert not params.track_abundance
@@ -365,17 +375,17 @@ def test_dayhoff_defaults():
 
 def test_dayhoff_override_bad_2():
     with pytest.raises(ValueError):
-        factory = _signatures_for_sketch_factory(['k=21,dna'], 'dayhoff')
+        _signatures_for_sketch_factory(["k=21,dna"], "dayhoff")
 
 
 def test_hp_defaults():
-    factory = _signatures_for_sketch_factory([], 'hp')
+    factory = _signatures_for_sketch_factory([], "hp")
     params_list = list(factory.get_compute_params())
 
     assert len(params_list) == 1
     params = params_list[0]
 
-    assert params.ksizes == [126]          # x3 for now
+    assert params.ksizes == [126]  # x3 for now
     assert params.num_hashes == 0
     assert params.scaled == 200
     assert not params.track_abundance
@@ -388,21 +398,23 @@ def test_hp_defaults():
 
 def test_hp_override_bad_2():
     with pytest.raises(ValueError):
-        factory = _signatures_for_sketch_factory(['k=21,dna'], 'hp')
+        _signatures_for_sketch_factory(["k=21,dna"], "hp")
 
 
 def test_multiple_moltypes():
-    params_foo = ['k=20,num=500,protein',
-                  'k=19,num=400,dayhoff,abund',
-                  'k=30,scaled=200,hp',
-                  'k=30,scaled=200,seed=58']
-    factory = _signatures_for_sketch_factory(params_foo, 'protein')
+    params_foo = [
+        "k=20,num=500,protein",
+        "k=19,num=400,dayhoff,abund",
+        "k=30,scaled=200,hp",
+        "k=30,scaled=200,seed=58",
+    ]
+    factory = _signatures_for_sketch_factory(params_foo, "protein")
     params_list = list(factory.get_compute_params())
 
     assert len(params_list) == 4
 
     params = params_list[0]
-    assert params.ksizes == [60]          # x3, for now.
+    assert params.ksizes == [60]  # x3, for now.
     assert params.num_hashes == 500
     assert params.scaled == 0
     assert not params.track_abundance
@@ -413,7 +425,7 @@ def test_multiple_moltypes():
     assert params.protein
 
     params = params_list[1]
-    assert params.ksizes == [57]          # x3, for now.
+    assert params.ksizes == [57]  # x3, for now.
     assert params.num_hashes == 400
     assert params.scaled == 0
     assert params.track_abundance
@@ -424,7 +436,7 @@ def test_multiple_moltypes():
     assert not params.protein
 
     params = params_list[2]
-    assert params.ksizes == [90]          # x3, for now.
+    assert params.ksizes == [90]  # x3, for now.
     assert params.num_hashes == 0
     assert params.scaled == 200
     assert not params.track_abundance
@@ -435,7 +447,7 @@ def test_multiple_moltypes():
     assert not params.protein
 
     params = params_list[3]
-    assert params.ksizes == [90]          # x3, for now.
+    assert params.ksizes == [90]  # x3, for now.
     assert params.num_hashes == 0
     assert params.scaled == 200
     assert not params.track_abundance
@@ -446,16 +458,19 @@ def test_multiple_moltypes():
     assert params.protein
 
 
-@pytest.mark.parametrize("input_param_str, expected_output",
-                         [('protein', 'protein,k=10,scaled=200'),
-                          ('dna', 'dna,k=31,scaled=1000'),
-                          ('hp', 'hp,k=42,scaled=200'),
-                          ('dayhoff', 'dayhoff,k=16,scaled=200'),
-                          ('dna,seed=52', 'dna,k=31,scaled=1000,seed=52'),
-                          ('dna,num=500', 'dna,k=31,num=500'),
-                          ('scaled=1100,dna', 'dna,k=31,scaled=1100'),
-                          ('dna,abund', 'dna,k=31,scaled=1000,abund')
-                         ])
+@pytest.mark.parametrize(
+    "input_param_str, expected_output",
+    [
+        ("protein", "protein,k=10,scaled=200"),
+        ("dna", "dna,k=31,scaled=1000"),
+        ("hp", "hp,k=42,scaled=200"),
+        ("dayhoff", "dayhoff,k=16,scaled=200"),
+        ("dna,seed=52", "dna,k=31,scaled=1000,seed=52"),
+        ("dna,num=500", "dna,k=31,num=500"),
+        ("scaled=1100,dna", "dna,k=31,scaled=1100"),
+        ("dna,abund", "dna,k=31,scaled=1000,abund"),
+    ],
+)
 def test_compute_parameters_to_param_str(input_param_str, expected_output):
     factory = _signatures_for_sketch_factory([input_param_str], None)
     params_list = list(factory.get_compute_params())
@@ -464,22 +479,18 @@ def test_compute_parameters_to_param_str(input_param_str, expected_output):
 
     actual_output_str = params.to_param_str()
 
-    assert actual_output_str == expected_output, (actual_output_str,
-                                                  expected_output)
+    assert actual_output_str == expected_output, (actual_output_str, expected_output)
 
 
 def test_manifest_row_to_compute_parameters_1():
     # test ComputeParameters.from_manifest_row with moltype 'DNA'
-    row = dict(moltype='DNA',
-               ksize=21,
-               num=0, scaled=1000,
-               with_abundance=1)
+    row = dict(moltype="DNA", ksize=21, num=0, scaled=1000, with_abundance=1)
     p = ComputeParameters.from_manifest_row(row)
     assert p.dna
     assert not p.protein
     assert not p.dayhoff
     assert not p.hp
-    assert p.moltype == 'DNA'
+    assert p.moltype == "DNA"
     assert p.num_hashes == 0
     assert p.scaled == 1000
     assert p.ksizes == [21]
@@ -489,14 +500,11 @@ def test_manifest_row_to_compute_parameters_1():
 
 def test_manifest_row_to_compute_parameters_2():
     # test ComputeParameters.from_manifest_row with moltype 'protein'
-    row = dict(moltype='protein',
-               ksize=10,
-               num=0, scaled=200,
-               with_abundance=1)
+    row = dict(moltype="protein", ksize=10, num=0, scaled=200, with_abundance=1)
     p = ComputeParameters.from_manifest_row(row)
     assert not p.dna
     assert p.protein
-    assert p.moltype == 'protein'
+    assert p.moltype == "protein"
     assert not p.dayhoff
     assert not p.hp
     assert p.num_hashes == 0
@@ -508,15 +516,12 @@ def test_manifest_row_to_compute_parameters_2():
 
 def test_manifest_row_to_compute_parameters_3():
     # test ComputeParameters.from_manifest_row with moltype 'dayhoff'
-    row = dict(moltype='dayhoff',
-               ksize=12,
-               num=0, scaled=200,
-               with_abundance=0)
+    row = dict(moltype="dayhoff", ksize=12, num=0, scaled=200, with_abundance=0)
     p = ComputeParameters.from_manifest_row(row)
     assert not p.dna
     assert not p.protein
     assert p.dayhoff
-    assert p.moltype == 'dayhoff'
+    assert p.moltype == "dayhoff"
     assert not p.hp
     assert p.num_hashes == 0
     assert p.scaled == 200
@@ -527,16 +532,13 @@ def test_manifest_row_to_compute_parameters_3():
 
 def test_manifest_row_to_compute_parameters_4():
     # test ComputeParameters.from_manifest_row with moltype 'hp'
-    row = dict(moltype='hp',
-               ksize=32,
-               num=0, scaled=200,
-               with_abundance=0)
+    row = dict(moltype="hp", ksize=32, num=0, scaled=200, with_abundance=0)
     p = ComputeParameters.from_manifest_row(row)
     assert not p.dna
     assert not p.protein
     assert not p.dayhoff
     assert p.hp
-    assert p.moltype == 'hp'
+    assert p.moltype == "hp"
     assert p.num_hashes == 0
     assert p.scaled == 200
     assert p.ksizes == [96]
@@ -545,8 +547,17 @@ def test_manifest_row_to_compute_parameters_4():
 
 
 def test_bad_compute_parameters():
-    p = ComputeParameters(ksizes=[31], seed=42, dna=0, protein=0, dayhoff=0,
-                          hp=0, num_hashes=0, track_abundance=True, scaled=1000)
+    p = ComputeParameters(
+        ksizes=[31],
+        seed=42,
+        dna=0,
+        protein=0,
+        dayhoff=0,
+        hp=0,
+        num_hashes=0,
+        track_abundance=True,
+        scaled=1000,
+    )
     with pytest.raises(AssertionError):
         p.moltype
 
@@ -557,434 +568,484 @@ def test_bad_compute_parameters():
 @utils.in_thisdir
 def test_do_sourmash_sketchdna_empty(c):
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('sketch', 'dna')
-    assert 'error: no input filenames provided! nothing to do - exiting.' in c.last_result.err
+        c.run_sourmash("sketch", "dna")
+    assert (
+        "error: no input filenames provided! nothing to do - exiting."
+        in c.last_result.err
+    )
 
 
 @utils.in_thisdir
 def test_do_sourmash_sketchprotein_empty(c):
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('sketch', 'protein')
-    assert 'error: no input filenames provided! nothing to do - exiting.' in c.last_result.err
+        c.run_sourmash("sketch", "protein")
+    assert (
+        "error: no input filenames provided! nothing to do - exiting."
+        in c.last_result.err
+    )
 
 
 @utils.in_thisdir
 def test_do_sourmash_sketchtranslate_empty(c):
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('sketch', 'translate')
-    assert 'error: no input filenames provided! nothing to do - exiting.' in c.last_result.err
+        c.run_sourmash("sketch", "translate")
+    assert (
+        "error: no input filenames provided! nothing to do - exiting."
+        in c.last_result.err
+    )
 
 
 def test_do_sourmash_sketchdna(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    runtmp.sourmash('sketch', 'dna', testdata1)
+    testdata1 = utils.get_test_data("short.fa")
+    runtmp.sourmash("sketch", "dna", testdata1)
 
-    sigfile = runtmp.output('short.fa.sig')
+    sigfile = runtmp.output("short.fa.sig")
     assert os.path.exists(sigfile)
 
     sig = next(signature.load_signatures(sigfile))
-    assert str(sig).endswith('short.fa')
+    assert str(sig).endswith("short.fa")
 
 
 def test_do_sourmash_sketchdna_check_sequence_succeed(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    runtmp.sourmash('sketch', 'dna', testdata1, '--check-sequence')
+    testdata1 = utils.get_test_data("short.fa")
+    runtmp.sourmash("sketch", "dna", testdata1, "--check-sequence")
 
-    sigfile = runtmp.output('short.fa.sig')
+    sigfile = runtmp.output("short.fa.sig")
     assert os.path.exists(sigfile)
 
     sig = next(signature.load_signatures(sigfile))
-    assert str(sig).endswith('short.fa')
+    assert str(sig).endswith("short.fa")
 
 
 def test_do_sourmash_sketchdna_check_sequence_fail(runtmp):
-    testdata1 = utils.get_test_data('shewanella.faa')
+    testdata1 = utils.get_test_data("shewanella.faa")
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.sourmash('sketch', 'dna', testdata1, '--check-sequence')
+    with pytest.raises(SourmashCommandFailed):
+        runtmp.sourmash("sketch", "dna", testdata1, "--check-sequence")
 
     err = runtmp.last_result.err
     print(err)
     assert "ERROR when reading from " in err
-    assert "invalid DNA character in input k-mer: MCGIVGAVAQRDVAEILVEGLRRLEYRGYDS" in err
+    assert (
+        "invalid DNA character in input k-mer: MCGIVGAVAQRDVAEILVEGLRRLEYRGYDS" in err
+    )
 
 
 def test_do_sourmash_sketchdna_check_sequence_fail_singleton(runtmp):
-    testdata1 = utils.get_test_data('shewanella.faa')
+    testdata1 = utils.get_test_data("shewanella.faa")
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.sourmash('sketch', 'dna', testdata1, '--check-sequence',
-                        '--singleton')
+    with pytest.raises(SourmashCommandFailed):
+        runtmp.sourmash("sketch", "dna", testdata1, "--check-sequence", "--singleton")
 
     err = runtmp.last_result.err
     print(err)
     assert "ERROR when reading from " in err
-    assert "invalid DNA character in input k-mer: MCGIVGAVAQRDVAEILVEGLRRLEYRGYDS" in err
+    assert (
+        "invalid DNA character in input k-mer: MCGIVGAVAQRDVAEILVEGLRRLEYRGYDS" in err
+    )
 
 
 def test_do_sourmash_sketchdna_from_file(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
+    testdata1 = utils.get_test_data("short.fa")
 
     file_list = runtmp.output("filelist.txt")
-    with open(file_list, 'wt') as fp:
+    with open(file_list, "w") as fp:
         print(testdata1, file=fp)
 
-    runtmp.sourmash('sketch', 'dna', '--from-file', file_list)
+    runtmp.sourmash("sketch", "dna", "--from-file", file_list)
 
-    sigfile = runtmp.output('short.fa.sig')
+    sigfile = runtmp.output("short.fa.sig")
     assert os.path.exists(sigfile)
 
     sig = next(signature.load_signatures(sigfile))
-    assert str(sig).endswith('short.fa')
+    assert str(sig).endswith("short.fa")
 
 
 @utils.in_tempdir
 def test_do_sourmash_sketchdna_noinput(c):
     data = ""
 
-    cmd = ['sketch', 'dna', '-', '-o', c.output('xxx.sig')]
+    cmd = ["sketch", "dna", "-", "-o", c.output("xxx.sig")]
     c.run_sourmash(*cmd, stdin_data=data)
 
     print(c.last_result.out)
     print(c.last_result.err)
 
-    sigfile = c.output('xxx.sig')
+    sigfile = c.output("xxx.sig")
     assert not os.path.exists(sigfile)
-    assert 'no sequences found' in c.last_result.err
+    assert "no sequences found" in c.last_result.err
 
 
 @utils.in_tempdir
 def test_do_sourmash_sketchdna_noinput_singleton(c):
     data = ""
 
-    cmd = ['sketch', 'dna', '-', '-o', c.output('xxx.sig'), '--singleton']
+    cmd = ["sketch", "dna", "-", "-o", c.output("xxx.sig"), "--singleton"]
     c.run_sourmash(*cmd, stdin_data=data)
 
-    sigfile = c.output('xxx.sig')
+    sigfile = c.output("xxx.sig")
     assert not os.path.exists(sigfile)
-    assert 'no sequences found' in c.last_result.err
+    assert "no sequences found" in c.last_result.err
 
 
 @utils.in_tempdir
 def test_do_sourmash_sketchdna_noinput_merge(c):
     data = ""
 
-    cmd = ['sketch', 'dna', '-', '-o', c.output('xxx.sig'), '--merge', 'name']
+    cmd = ["sketch", "dna", "-", "-o", c.output("xxx.sig"), "--merge", "name"]
     c.run_sourmash(*cmd, stdin_data=data)
 
-    sigfile = c.output('xxx.sig')
+    sigfile = c.output("xxx.sig")
     assert not os.path.exists(sigfile)
-    assert 'no sequences found' in c.last_result.err
+    assert "no sequences found" in c.last_result.err
 
 
 @utils.in_tempdir
 def test_do_sourmash_sketchdna_outdir(c):
-    testdata1 = utils.get_test_data('short.fa')
-    status, out, err = utils.runscript('sourmash',
-                                       ['sketch', 'dna', testdata1,
-                                        '--outdir', c.location])
+    testdata1 = utils.get_test_data("short.fa")
+    status, out, err = utils.runscript(
+        "sourmash", ["sketch", "dna", testdata1, "--outdir", c.location]
+    )
 
-    sigfile = os.path.join(c.location, 'short.fa.sig')
+    sigfile = os.path.join(c.location, "short.fa.sig")
     assert os.path.exists(sigfile)
 
     sig = next(signature.load_signatures(sigfile))
-    assert str(sig).endswith('short.fa')
+    assert str(sig).endswith("short.fa")
 
 
 @utils.in_tempdir
 def test_do_sourmash_sketchdna_output_dir(c):
     # test via --output-dir not --outdir
-    testdata1 = utils.get_test_data('short.fa')
-    status, out, err = utils.runscript('sourmash',
-                                       ['sketch', 'dna', testdata1,
-                                        '--output-dir', c.location])
+    testdata1 = utils.get_test_data("short.fa")
+    status, out, err = utils.runscript(
+        "sourmash", ["sketch", "dna", testdata1, "--output-dir", c.location]
+    )
 
-    sigfile = os.path.join(c.location, 'short.fa.sig')
+    sigfile = os.path.join(c.location, "short.fa.sig")
     assert os.path.exists(sigfile)
 
     sig = next(signature.load_signatures(sigfile))
-    assert str(sig).endswith('short.fa')
+    assert str(sig).endswith("short.fa")
 
 
 def test_do_sourmash_sketchdna_output_valid_file(runtmp):
-    """ Trigger bug #123 """
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    testdata3 = utils.get_test_data('short3.fa')
-    sigfile = runtmp.output('short.fa.sig')
+    """Trigger bug #123"""
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    testdata3 = utils.get_test_data("short3.fa")
+    sigfile = runtmp.output("short.fa.sig")
 
-    runtmp.sourmash('sketch', 'dna', '-o', sigfile, testdata1, testdata2, testdata3)
+    runtmp.sourmash("sketch", "dna", "-o", sigfile, testdata1, testdata2, testdata3)
 
     assert os.path.exists(sigfile)
-    assert not runtmp.last_result.out # stdout should be empty
+    assert not runtmp.last_result.out  # stdout should be empty
 
     # is it valid json?
-    with open(sigfile, 'r') as f:
+    with open(sigfile) as f:
         data = json.load(f)
 
-    filesigs = [sig['filename'] for sig in data]
-    assert all(testdata in filesigs
-                for testdata in (testdata1, testdata2, testdata3))
+    filesigs = [sig["filename"] for sig in data]
+    assert all(testdata in filesigs for testdata in (testdata1, testdata2, testdata3))
 
 
 def test_do_sourmash_sketchdna_output_zipfile(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    testdata3 = utils.get_test_data('short3.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    testdata3 = utils.get_test_data("short3.fa")
 
-    outfile = runtmp.output('shorts.zip')
+    outfile = runtmp.output("shorts.zip")
 
-    runtmp.sourmash('sketch', 'dna', '-o', outfile, testdata1, testdata2, testdata3)
+    runtmp.sourmash("sketch", "dna", "-o", outfile, testdata1, testdata2, testdata3)
 
     assert os.path.exists(outfile)
-    assert not runtmp.last_result.out # stdout should be empty
+    assert not runtmp.last_result.out  # stdout should be empty
 
     sigs = list(sourmash.load_file_as_signatures(outfile))
     assert len(sigs) == 3
 
 
 def test_do_sourmash_sketchdna_output_stdout_valid(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    testdata3 = utils.get_test_data('short3.fa')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    testdata3 = utils.get_test_data("short3.fa")
 
-    runtmp.sourmash('sketch', 'dna', '-o', '-',  testdata1, testdata2, testdata3)
+    runtmp.sourmash("sketch", "dna", "-o", "-", testdata1, testdata2, testdata3)
 
     # is it valid json?
     data = json.loads(runtmp.last_result.out)
 
-    filesigs = [sig['filename'] for sig in data]
-    assert all(testdata in filesigs
-                for testdata in (testdata1, testdata2, testdata3))
+    filesigs = [sig["filename"] for sig in data]
+    assert all(testdata in filesigs for testdata in (testdata1, testdata2, testdata3))
 
 
 @utils.in_tempdir
 def test_do_sourmash_sketchdna_output_and_name_valid_file(c):
     # test --merge of multiple input files
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    testdata3 = utils.get_test_data('short3.fa')
-    sigfile = c.output('short.fa.sig')
-
-    c.run_sourmash('sketch', 'dna', '-p', 'num=500', '-o', sigfile, '--merge',
-                   '"name"', testdata1, testdata2, testdata3)
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    testdata3 = utils.get_test_data("short3.fa")
+    sigfile = c.output("short.fa.sig")
+
+    c.run_sourmash(
+        "sketch",
+        "dna",
+        "-p",
+        "num=500",
+        "-o",
+        sigfile,
+        "--merge",
+        '"name"',
+        testdata1,
+        testdata2,
+        testdata3,
+    )
 
     assert os.path.exists(sigfile)
-    assert 'calculated 1 signature for 4 sequences taken from 3 files' in c.last_result.err
+    assert (
+        "calculated 1 signature for 4 sequences taken from 3 files" in c.last_result.err
+    )
 
     # is it valid json?
-    with open(sigfile, 'r') as f:
+    with open(sigfile) as f:
         data = json.load(f)
 
     assert len(data) == 1
 
-    sigfile_merged = c.output('short.all.fa.sig')
-    c.run_sourmash('sketch', 'dna', '-p', 'num=500', '-o', sigfile_merged,
-                   '--merge', '"name"', testdata1, testdata2, testdata3)
-
-    with open(sigfile_merged, 'r') as f:
+    sigfile_merged = c.output("short.all.fa.sig")
+    c.run_sourmash(
+        "sketch",
+        "dna",
+        "-p",
+        "num=500",
+        "-o",
+        sigfile_merged,
+        "--merge",
+        '"name"',
+        testdata1,
+        testdata2,
+        testdata3,
+    )
+
+    with open(sigfile_merged) as f:
         data_merged = json.load(f)
 
-    assert data[0]['signatures'][0]['mins'] == data_merged[0]['signatures'][0]['mins']
+    assert data[0]["signatures"][0]["mins"] == data_merged[0]["signatures"][0]["mins"]
 
 
 @utils.in_tempdir
 def test_do_sourmash_sketchdna_output_and_name_valid_file_outdir(c):
-    testdata1 = utils.get_test_data('short.fa')
-    testdata2 = utils.get_test_data('short2.fa')
-    testdata3 = utils.get_test_data('short3.fa')
-    sigfile = os.path.join(c.location, 'short.fa.sig')
+    testdata1 = utils.get_test_data("short.fa")
+    testdata2 = utils.get_test_data("short2.fa")
+    testdata3 = utils.get_test_data("short3.fa")
+    sigfile = os.path.join(c.location, "short.fa.sig")
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('sketch', 'dna', '-o', sigfile,
-                       '--merge', '"name"',
-                       testdata1, testdata2, testdata3,
-                       '--outdir', c.location)
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash(
+            "sketch",
+            "dna",
+            "-o",
+            sigfile,
+            "--merge",
+            '"name"',
+            testdata1,
+            testdata2,
+            testdata3,
+            "--outdir",
+            c.location,
+        )
 
     errmsg = c.last_result.err
     assert "ERROR: --output-dir doesn't make sense with -o/--output" in errmsg
 
 
 def test_do_sourmash_sketchdna_singleton(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    runtmp.sourmash('sketch', 'dna', '--singleton', testdata1)
+    testdata1 = utils.get_test_data("short.fa")
+    runtmp.sourmash("sketch", "dna", "--singleton", testdata1)
 
-    sigfile = runtmp.output('short.fa.sig')
+    sigfile = runtmp.output("short.fa.sig")
     assert os.path.exists(sigfile)
 
     sig = next(signature.load_signatures(sigfile))
-    assert str(sig).endswith('shortName')
+    assert str(sig).endswith("shortName")
 
 
 def test_do_sourmash_sketchdna_name(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    runtmp.sourmash('sketch', 'dna', '--merge', 'foo', testdata1, '-o', 'foo.sig')
+    testdata1 = utils.get_test_data("short.fa")
+    runtmp.sourmash("sketch", "dna", "--merge", "foo", testdata1, "-o", "foo.sig")
 
-    sigfile = runtmp.output('foo.sig')
+    sigfile = runtmp.output("foo.sig")
     assert os.path.exists(sigfile)
 
     sig = next(signature.load_signatures(sigfile))
-    assert sig.name == 'foo'
-    
-    runtmp.sourmash('sketch', 'dna', '--name', 'foo', testdata1, '-o', 'foo2.sig')
+    assert sig.name == "foo"
 
-    sigfile2 = runtmp.output('foo2.sig')
+    runtmp.sourmash("sketch", "dna", "--name", "foo", testdata1, "-o", "foo2.sig")
+
+    sigfile2 = runtmp.output("foo2.sig")
     assert os.path.exists(sigfile2)
 
     sig2 = next(signature.load_signatures(sigfile))
-    assert sig2.name == 'foo'
+    assert sig2.name == "foo"
     assert sig.name == sig2.name
 
 
 def test_do_sourmash_sketchdna_name_fail_no_output(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
+    testdata1 = utils.get_test_data("short.fa")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sketch', 'dna', '--merge', 'foo', testdata1)
+        runtmp.sourmash("sketch", "dna", "--merge", "foo", testdata1)
 
     assert runtmp.last_result.status == -1
 
 
 def test_do_sourmash_sketchdna_fail_no_output(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
+    testdata1 = utils.get_test_data("short.fa")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sketch', 'dna', '--merge', 'foo', testdata1)
+        runtmp.sourmash("sketch", "dna", "--merge", "foo", testdata1)
 
     assert runtmp.last_result.status == -1
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sketch', 'dna', '--name', 'foo', testdata1)
+        runtmp.sourmash("sketch", "dna", "--name", "foo", testdata1)
 
     assert runtmp.last_result.status == -1
 
 
 def test_do_sourmash_sketchdna_name_from_first(runtmp):
-    testdata1 = utils.get_test_data('short3.fa')
-    runtmp.sourmash('sketch', 'dna', '--name-from-first', testdata1)
+    testdata1 = utils.get_test_data("short3.fa")
+    runtmp.sourmash("sketch", "dna", "--name-from-first", testdata1)
 
-    sigfile = runtmp.output('short3.fa.sig')
+    sigfile = runtmp.output("short3.fa.sig")
     assert os.path.exists(sigfile)
 
     sig = next(signature.load_signatures(sigfile))
-    assert sig.name == 'firstname'
+    assert sig.name == "firstname"
 
 
 def test_do_sourmash_sketchdna_multik(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    runtmp.sourmash('sketch', 'dna', '-p', 'k=31,k=21', testdata1)
+    testdata1 = utils.get_test_data("short.fa")
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,k=21", testdata1)
 
-    outfile = runtmp.output('short.fa.sig')
+    outfile = runtmp.output("short.fa.sig")
     assert os.path.exists(outfile)
 
     siglist = list(signature.load_signatures(outfile))
     assert len(siglist) == 2
-    ksizes = set([ x.minhash.ksize for x in siglist ])
+    ksizes = set([x.minhash.ksize for x in siglist])
     assert 21 in ksizes
     assert 31 in ksizes
 
 
 def test_do_sourmash_sketchdna_multik_output(runtmp, sig_save_extension):
-    testdata1 = utils.get_test_data('short.fa')
-    outfile = runtmp.output(f'out.{sig_save_extension}')
-    runtmp.sourmash('sketch', 'dna', '-p', 'k=31,k=21', testdata1,
-                    '-o', outfile)
+    testdata1 = utils.get_test_data("short.fa")
+    outfile = runtmp.output(f"out.{sig_save_extension}")
+    runtmp.sourmash("sketch", "dna", "-p", "k=31,k=21", testdata1, "-o", outfile)
 
     print("saved to file/path with extension:", outfile)
     assert os.path.exists(outfile)
 
     siglist = list(sourmash.load_file_as_signatures(outfile))
     assert len(siglist) == 2
-    ksizes = set([ x.minhash.ksize for x in siglist ])
+    ksizes = set([x.minhash.ksize for x in siglist])
     assert 21 in ksizes
     assert 31 in ksizes
 
 
 def test_do_sketch_dna_override_protein_fail(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
+    testdata1 = utils.get_test_data("short.fa")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sketch', 'dna', '-p', 'k=7,num=500,protein', testdata1)
+        runtmp.sourmash("sketch", "dna", "-p", "k=7,num=500,protein", testdata1)
 
     assert runtmp.last_result.status != 0
-    assert 'Error creating signatures: Incompatible sketch type' in runtmp.last_result.err
+    assert (
+        "Error creating signatures: Incompatible sketch type" in runtmp.last_result.err
+    )
 
 
 def test_do_sketch_protein_override_dna_fail(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
+    testdata1 = utils.get_test_data("short.fa")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sketch', 'protein', '-p', 'k=7,num=500,dna', testdata1)
+        runtmp.sourmash("sketch", "protein", "-p", "k=7,num=500,dna", testdata1)
 
     assert runtmp.last_result.status != 0
-    assert 'Error creating signatures: Incompatible sketch type' in runtmp.last_result.err
+    assert (
+        "Error creating signatures: Incompatible sketch type" in runtmp.last_result.err
+    )
 
 
 def test_do_sketch_translate_multik_with_protein(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=7,k=10,num=500', testdata1)
+    testdata1 = utils.get_test_data("short.fa")
+    runtmp.sourmash("sketch", "translate", "-p", "k=7,k=10,num=500", testdata1)
 
-    outfile = runtmp.output('short.fa.sig')
+    outfile = runtmp.output("short.fa.sig")
     assert os.path.exists(outfile)
 
-    with open(outfile, 'rt') as fp:
+    with open(outfile) as fp:
         sigdata = fp.read()
         siglist = list(signature.load_signatures(sigdata))
         assert len(siglist) == 2
-        ksizes = set([ x.minhash.ksize for x in siglist ])
+        ksizes = set([x.minhash.ksize for x in siglist])
         assert 7 in ksizes
         assert 10 in ksizes
 
 
 def test_do_sketch_translate_multik_with_protein_from_file(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
+    testdata1 = utils.get_test_data("short.fa")
 
     file_list = runtmp.output("filelist.txt")
-    with open(file_list, 'wt') as fp:
+    with open(file_list, "w") as fp:
         print(testdata1, file=fp)
 
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=7,k=10,num=500', '--from-file', file_list)
+    runtmp.sourmash(
+        "sketch", "translate", "-p", "k=7,k=10,num=500", "--from-file", file_list
+    )
 
-    outfile = runtmp.output('short.fa.sig')
+    outfile = runtmp.output("short.fa.sig")
     assert os.path.exists(outfile)
 
-    with open(outfile, 'rt') as fp:
+    with open(outfile) as fp:
         sigdata = fp.read()
         siglist = list(signature.load_signatures(sigdata))
         assert len(siglist) == 2
-        ksizes = set([ x.minhash.ksize for x in siglist ])
+        ksizes = set([x.minhash.ksize for x in siglist])
         assert 7 in ksizes
         assert 10 in ksizes
 
 
 def test_do_sketch_translate_multik_with_dayhoff(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=7,k=10,num=500', '--dayhoff', testdata1)
+    testdata1 = utils.get_test_data("short.fa")
+    runtmp.sourmash(
+        "sketch", "translate", "-p", "k=7,k=10,num=500", "--dayhoff", testdata1
+    )
 
-    outfile = runtmp.output('short.fa.sig')
+    outfile = runtmp.output("short.fa.sig")
     assert os.path.exists(outfile)
 
-    with open(outfile, 'rt') as fp:
+    with open(outfile) as fp:
         sigdata = fp.read()
         siglist = list(signature.load_signatures(sigdata))
         assert len(siglist) == 2
-        ksizes = set([ x.minhash.ksize for x in siglist ])
+        ksizes = set([x.minhash.ksize for x in siglist])
         assert 7 in ksizes
         assert 10 in ksizes
         assert all(x.minhash.dayhoff for x in siglist)
 
 
 def test_do_sketch_translate_multik_with_hp(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=7,k=10,num=500', '--hp', testdata1)
+    testdata1 = utils.get_test_data("short.fa")
+    runtmp.sourmash("sketch", "translate", "-p", "k=7,k=10,num=500", "--hp", testdata1)
 
-    outfile = runtmp.output('short.fa.sig')
+    outfile = runtmp.output("short.fa.sig")
     assert os.path.exists(outfile)
 
-    with open(outfile, 'rt') as fp:
+    with open(outfile) as fp:
         sigdata = fp.read()
         siglist = list(signature.load_signatures(sigdata))
         assert len(siglist) == 2
-        ksizes = set([ x.minhash.ksize for x in siglist ])
+        ksizes = set([x.minhash.ksize for x in siglist])
         assert 7 in ksizes
         assert 10 in ksizes
         assert all(x.minhash.hp for x in siglist)
@@ -993,202 +1054,220 @@ def test_do_sketch_translate_multik_with_hp(runtmp):
 @utils.in_tempdir
 def test_do_sourmash_sketch_translate_multik_only_protein(c):
     # check sourmash sketch_translate with only protein, no nucl
-    testdata1 = utils.get_test_data('short.fa')
-    c.run_sourmash('sketch', 'translate', '-p', 'k=7,k=10,num=500',
-                   testdata1)
-    outfile = os.path.join(c.location, 'short.fa.sig')
+    testdata1 = utils.get_test_data("short.fa")
+    c.run_sourmash("sketch", "translate", "-p", "k=7,k=10,num=500", testdata1)
+    outfile = os.path.join(c.location, "short.fa.sig")
     assert os.path.exists(outfile)
 
-    with open(outfile, 'rt') as fp:
+    with open(outfile) as fp:
         sigdata = fp.read()
         siglist = list(signature.load_signatures(sigdata))
         assert len(siglist) == 2
-        ksizes = set([ x.minhash.ksize for x in siglist ])
+        ksizes = set([x.minhash.ksize for x in siglist])
         assert 7 in ksizes
         assert 10 in ksizes
 
 
 def test_do_sourmash_sketch_translate_bad_sequences(runtmp):
     """Proper error handling when Ns in dna sequence"""
-    testdata1 = utils.get_test_data('short.bad.fa')
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=7,k=10,num=500', testdata1)
+    testdata1 = utils.get_test_data("short.bad.fa")
+    runtmp.sourmash("sketch", "translate", "-p", "k=7,k=10,num=500", testdata1)
 
-    outfile = runtmp.output('short.bad.fa.sig')
+    outfile = runtmp.output("short.bad.fa.sig")
     assert os.path.exists(outfile)
 
-    with open(outfile, 'rt') as fp:
+    with open(outfile) as fp:
         sigdata = fp.read()
         siglist = list(signature.load_signatures(sigdata))
         assert len(siglist) == 2
-        ksizes = set([ x.minhash.ksize for x in siglist ])
+        ksizes = set([x.minhash.ksize for x in siglist])
         assert 7 in ksizes
         assert 10 in ksizes
 
 
 def test_do_sketch_protein_multik_input(runtmp):
-    testdata1 = utils.get_test_data('ecoli.faa')
-    runtmp.sourmash('sketch', 'protein', '-p', 'k=7,k=10,num=500', testdata1)
+    testdata1 = utils.get_test_data("ecoli.faa")
+    runtmp.sourmash("sketch", "protein", "-p", "k=7,k=10,num=500", testdata1)
 
-    outfile = runtmp.output('ecoli.faa.sig')
+    outfile = runtmp.output("ecoli.faa.sig")
     assert os.path.exists(outfile)
 
-    with open(outfile, 'rt') as fp:
+    with open(outfile) as fp:
         sigdata = fp.read()
         siglist = list(signature.load_signatures(sigdata))
         assert len(siglist) == 2
-        ksizes = set([ x.minhash.ksize for x in siglist ])
+        ksizes = set([x.minhash.ksize for x in siglist])
         assert 7 in ksizes
         assert 10 in ksizes
 
-        moltype = set([ x.minhash.moltype == 'protein'
-                        for x in siglist ])
+        moltype = set([x.minhash.moltype == "protein" for x in siglist])
         assert len(moltype) == 1
         assert True in moltype
 
 
 def test_do_sketch_protein_multik_input_from_file(runtmp):
-    testdata1 = utils.get_test_data('ecoli.faa')
+    testdata1 = utils.get_test_data("ecoli.faa")
 
     file_list = runtmp.output("filelist.txt")
-    with open(file_list, 'wt') as fp:
+    with open(file_list, "w") as fp:
         print(testdata1, file=fp)
 
-    runtmp.sourmash('sketch', 'protein', '-p', 'k=7,k=10,num=500', '--from-file', file_list)
+    runtmp.sourmash(
+        "sketch", "protein", "-p", "k=7,k=10,num=500", "--from-file", file_list
+    )
 
-    outfile = runtmp.output('ecoli.faa.sig')
+    outfile = runtmp.output("ecoli.faa.sig")
     assert os.path.exists(outfile)
 
-    with open(outfile, 'rt') as fp:
+    with open(outfile) as fp:
         sigdata = fp.read()
         siglist = list(signature.load_signatures(sigdata))
         assert len(siglist) == 2
-        ksizes = set([ x.minhash.ksize for x in siglist ])
+        ksizes = set([x.minhash.ksize for x in siglist])
         assert 7 in ksizes
         assert 10 in ksizes
 
-        moltype = set([ x.minhash.moltype == 'protein'
-                        for x in siglist ])
+        moltype = set([x.minhash.moltype == "protein" for x in siglist])
         assert len(moltype) == 1
         assert True in moltype
 
 
 def test_do_sourmash_sketchdna_multik_outfile(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    outfile = runtmp.output('FOO.xxx')
+    testdata1 = utils.get_test_data("short.fa")
+    outfile = runtmp.output("FOO.xxx")
 
-    runtmp.sourmash('sketch', 'dna', '-p', 'k=21,k=31', testdata1, '-o', outfile)
+    runtmp.sourmash("sketch", "dna", "-p", "k=21,k=31", testdata1, "-o", outfile)
 
     assert os.path.exists(outfile)
 
     siglist = list(signature.load_signatures(outfile))
     assert len(siglist) == 2
-    ksizes = set([ x.minhash.ksize for x in siglist ])
+    ksizes = set([x.minhash.ksize for x in siglist])
     assert 21 in ksizes
     assert 31 in ksizes
 
 
 def test_do_sourmash_sketchdna_with_scaled_1(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    outfile = runtmp.output('FOO.xxx')
+    testdata1 = utils.get_test_data("short.fa")
+    outfile = runtmp.output("FOO.xxx")
 
-    runtmp.sourmash('sketch', 'dna', '-p', 'k=21,k=31,scaled=1', testdata1, '-o', outfile)
+    runtmp.sourmash(
+        "sketch", "dna", "-p", "k=21,k=31,scaled=1", testdata1, "-o", outfile
+    )
 
     assert os.path.exists(outfile)
 
     siglist = list(signature.load_signatures(outfile))
     assert len(siglist) == 2
 
-    scaled_vals = [ x.minhash.scaled for x in siglist ]
+    scaled_vals = [x.minhash.scaled for x in siglist]
     assert len(scaled_vals) == 2
-    assert set(scaled_vals) == { 1 }
+    assert set(scaled_vals) == {1}
 
 
 def test_do_sourmash_sketchdna_with_scaled_2(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    outfile = runtmp.output('FOO.xxx')
+    testdata1 = utils.get_test_data("short.fa")
+    outfile = runtmp.output("FOO.xxx")
 
-    runtmp.sourmash('sketch', 'dna', '-p', 'k=21,k=31,scaled=2', testdata1, '-o', outfile)
+    runtmp.sourmash(
+        "sketch", "dna", "-p", "k=21,k=31,scaled=2", testdata1, "-o", outfile
+    )
 
     assert os.path.exists(outfile)
 
     siglist = list(signature.load_signatures(outfile))
     assert len(siglist) == 2
 
-    max_hashes = [ x.minhash._max_hash for x in siglist ]
+    max_hashes = [x.minhash._max_hash for x in siglist]
     assert len(max_hashes) == 2
-    assert set(max_hashes) == set([ int(2**64 /2.) ])
+    assert set(max_hashes) == set([int(2**64 / 2.0)])
 
 
 def test_do_sourmash_sketchdna_with_scaled(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    outfile = runtmp.output('FOO.xxx')
+    testdata1 = utils.get_test_data("short.fa")
+    outfile = runtmp.output("FOO.xxx")
 
-    runtmp.sourmash('sketch', 'dna', '-p', 'k=21,k=31,scaled=100', testdata1, '-o', outfile)
+    runtmp.sourmash(
+        "sketch", "dna", "-p", "k=21,k=31,scaled=100", testdata1, "-o", outfile
+    )
 
     assert os.path.exists(outfile)
 
     siglist = list(signature.load_signatures(outfile))
     assert len(siglist) == 2
 
-    max_hashes = [ x.minhash._max_hash for x in siglist ]
+    max_hashes = [x.minhash._max_hash for x in siglist]
     assert len(max_hashes) == 2
-    assert set(max_hashes) == set([ int(2**64 /100.) ])
+    assert set(max_hashes) == set([int(2**64 / 100.0)])
 
 
 def test_do_sourmash_sketchdna_with_bad_scaled(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    outfile = runtmp.output('FOO.xxx')
+    testdata1 = utils.get_test_data("short.fa")
+    outfile = runtmp.output("FOO.xxx")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sketch', 'dna', '-p', 'k=21,k=31,scaled=-1', testdata1, '-o', outfile)
+        runtmp.sourmash(
+            "sketch", "dna", "-p", "k=21,k=31,scaled=-1", testdata1, "-o", outfile
+        )
 
     assert runtmp.last_result.status != 0
     print(runtmp.last_result.err)
-    assert 'ERROR: scaled value must be positive' in runtmp.last_result.err
+    assert "ERROR: scaled value must be positive" in runtmp.last_result.err
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sketch', 'dna', '-p', 'k=21,k=31,scaled=1000.5', testdata1, '-o', outfile)
+        runtmp.sourmash(
+            "sketch", "dna", "-p", "k=21,k=31,scaled=1000.5", testdata1, "-o", outfile
+        )
 
     assert runtmp.last_result.status != 0
     assert "cannot parse scaled='1000.5' as an integer" in runtmp.last_result.err
 
-    runtmp.sourmash('sketch', 'dna', '-p', 'k=21,k=31,scaled=1000000000', testdata1, '-o', outfile)
+    runtmp.sourmash(
+        "sketch", "dna", "-p", "k=21,k=31,scaled=1000000000", testdata1, "-o", outfile
+    )
 
     assert runtmp.last_result.status == 0
-    print('XXX')
+    print("XXX")
     print(runtmp.last_result.err)
-    assert 'WARNING: scaled value should be <= 1e6. Continuing anyway.' in runtmp.last_result.err
+    assert (
+        "WARNING: scaled value should be <= 1e6. Continuing anyway."
+        in runtmp.last_result.err
+    )
 
 
 def test_do_sketch_with_seed(runtmp):
-    testdata1 = utils.get_test_data('short.fa')
-    outfile = runtmp.output('FOO.xxx')
+    testdata1 = utils.get_test_data("short.fa")
+    outfile = runtmp.output("FOO.xxx")
 
-    runtmp.sourmash('sketch', 'dna', '-p', 'k=21,k=31,seed=43', testdata1, '-o', outfile)
+    runtmp.sourmash(
+        "sketch", "dna", "-p", "k=21,k=31,seed=43", testdata1, "-o", outfile
+    )
 
     assert os.path.exists(outfile)
 
     siglist = list(signature.load_signatures(outfile))
     assert len(siglist) == 2
 
-    seeds = [ x.minhash.seed for x in siglist ]
+    seeds = [x.minhash.seed for x in siglist]
     assert len(seeds) == 2
-    assert set(seeds) == set([ 43 ])
+    assert set(seeds) == set([43])
 
 
 def test_do_sourmash_check_protein_comparisons(runtmp):
     # this test checks 2 x 2 protein comparisons with E. coli genes.
-    testdata1 = utils.get_test_data('ecoli.faa')
+    testdata1 = utils.get_test_data("ecoli.faa")
 
-    runtmp.sourmash('sketch', 'protein', '-p', 'k=7,num=500', '--singleton', testdata1)
+    runtmp.sourmash("sketch", "protein", "-p", "k=7,num=500", "--singleton", testdata1)
 
-    sig1 = runtmp.output('ecoli.faa.sig')
+    sig1 = runtmp.output("ecoli.faa.sig")
     assert os.path.exists(sig1)
 
-    testdata2 = utils.get_test_data('ecoli.genes.fna')
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=7,num=500', '--singleton', testdata2)
+    testdata2 = utils.get_test_data("ecoli.genes.fna")
+    runtmp.sourmash(
+        "sketch", "translate", "-p", "k=7,num=500", "--singleton", testdata2
+    )
 
-    sig2 = runtmp.output('ecoli.genes.fna.sig')
+    sig2 = runtmp.output("ecoli.genes.fna.sig")
     assert os.path.exists(sig2)
 
     # I'm not sure why load_signatures is randomizing order, but ok.
@@ -1199,13 +1278,13 @@ def test_do_sourmash_check_protein_comparisons(runtmp):
     sig1_trans, sig2_trans = sorted(x, key=lambda x: x.name)
 
     name1 = sig1_aa.name.split()[0]
-    assert name1 == 'NP_414543.1'
+    assert name1 == "NP_414543.1"
     name2 = sig2_aa.name.split()[0]
-    assert name2 == 'NP_414544.1'
+    assert name2 == "NP_414544.1"
     name3 = sig1_trans.name.split()[0]
-    assert name3 == 'gi|556503834:2801-3733'
+    assert name3 == "gi|556503834:2801-3733"
     name4 = sig2_trans.name.split()[0]
-    assert name4 == 'gi|556503834:337-2799'
+    assert name4 == "gi|556503834:337-2799"
 
     print(name1, name3, round(sig1_aa.similarity(sig1_trans), 3))
     print(name2, name3, round(sig2_aa.similarity(sig1_trans), 3))
@@ -1222,10 +1301,9 @@ def test_do_sourmash_check_protein_comparisons(runtmp):
 def test_do_sourmash_check_knowngood_dna_comparisons(c):
     # this test checks against a known good signature calculated
     # by utils/compute-dna-mh-another-way.py
-    testdata1 = utils.get_test_data('ecoli.genes.fna')
-    c.run_sourmash('sketch', 'dna', '-p', 'k=21,num=500',
-                   '--singleton', testdata1)
-    sig1 = c.output('ecoli.genes.fna.sig')
+    testdata1 = utils.get_test_data("ecoli.genes.fna")
+    c.run_sourmash("sketch", "dna", "-p", "k=21,num=500", "--singleton", testdata1)
+    sig1 = c.output("ecoli.genes.fna.sig")
     assert os.path.exists(sig1)
 
     x = list(signature.load_signatures(sig1))
@@ -1234,7 +1312,7 @@ def test_do_sourmash_check_knowngood_dna_comparisons(c):
     print(sig1.name)
     print(sig2.name)
 
-    knowngood = utils.get_test_data('benchmark.dna.sig')
+    knowngood = utils.get_test_data("benchmark.dna.sig")
     good = list(signature.load_signatures(knowngood))[0]
 
     assert sig2.similarity(good) == 1.0
@@ -1243,16 +1321,15 @@ def test_do_sourmash_check_knowngood_dna_comparisons(c):
 @utils.in_tempdir
 def test_do_sourmash_check_knowngood_dna_comparisons_use_rna(c):
     # check the rna ; otherwise identical to previous test.
-    testdata1 = utils.get_test_data('ecoli.genes.fna')
-    c.run_sourmash('sketch', 'rna', '-p', 'k=21,num=500', '--singleton',
-                   testdata1)
-    sig1 = c.output('ecoli.genes.fna.sig')
+    testdata1 = utils.get_test_data("ecoli.genes.fna")
+    c.run_sourmash("sketch", "rna", "-p", "k=21,num=500", "--singleton", testdata1)
+    sig1 = c.output("ecoli.genes.fna.sig")
     assert os.path.exists(sig1)
 
     x = list(signature.load_signatures(sig1))
     sig1, sig2 = sorted(x, key=lambda x: x.name)
 
-    knowngood = utils.get_test_data('benchmark.dna.sig')
+    knowngood = utils.get_test_data("benchmark.dna.sig")
     good = list(signature.load_signatures(knowngood))[0]
 
     assert sig2.similarity(good) == 1.0
@@ -1261,17 +1338,17 @@ def test_do_sourmash_check_knowngood_dna_comparisons_use_rna(c):
 def test_do_sourmash_check_knowngood_input_protein_comparisons(runtmp):
     # this test checks against a known good signature calculated
     # by utils/compute-input-prot-another-way.py
-    testdata1 = utils.get_test_data('ecoli.faa')
+    testdata1 = utils.get_test_data("ecoli.faa")
 
-    runtmp.sourmash('sketch', 'protein', '-p', 'k=7,num=500', '--singleton', testdata1)
+    runtmp.sourmash("sketch", "protein", "-p", "k=7,num=500", "--singleton", testdata1)
 
-    sig1 = runtmp.output('ecoli.faa.sig')
+    sig1 = runtmp.output("ecoli.faa.sig")
     assert os.path.exists(sig1)
 
     x = list(signature.load_signatures(sig1))
     sig1_aa, sig2_aa = sorted(x, key=lambda x: x.name)
 
-    knowngood = utils.get_test_data('benchmark.input_prot.sig')
+    knowngood = utils.get_test_data("benchmark.input_prot.sig")
     good_aa = list(signature.load_signatures(knowngood))[0]
 
     assert sig1_aa.similarity(good_aa) == 1.0
@@ -1280,17 +1357,19 @@ def test_do_sourmash_check_knowngood_input_protein_comparisons(runtmp):
 def test_do_sourmash_check_knowngood_protein_comparisons(runtmp):
     # this test checks against a known good signature calculated
     # by utils/compute-prot-mh-another-way.py
-    testdata1 = utils.get_test_data('ecoli.genes.fna')
+    testdata1 = utils.get_test_data("ecoli.genes.fna")
 
-    runtmp.sourmash('sketch', 'translate', '-p', 'k=7,num=500', '--singleton', testdata1)
+    runtmp.sourmash(
+        "sketch", "translate", "-p", "k=7,num=500", "--singleton", testdata1
+    )
 
-    sig1 = runtmp.output('ecoli.genes.fna.sig')
+    sig1 = runtmp.output("ecoli.genes.fna.sig")
     assert os.path.exists(sig1)
 
     x = list(signature.load_signatures(sig1))
     sig1_trans, sig2_trans = sorted(x, key=lambda x: x.name)
 
-    knowngood = utils.get_test_data('benchmark.prot.sig')
+    knowngood = utils.get_test_data("benchmark.prot.sig")
     good_trans = list(signature.load_signatures(knowngood))[0]
 
     assert sig2_trans.similarity(good_trans) == 1.0
@@ -1298,19 +1377,26 @@ def test_do_sourmash_check_knowngood_protein_comparisons(runtmp):
 
 def test_do_sourmash_singleton_multiple_files_no_out_specified(runtmp):
     # this test checks that --singleton -o works
-    testdata1 = utils.get_test_data('ecoli.faa')
-    testdata2 = utils.get_test_data('shewanella.faa')
+    testdata1 = utils.get_test_data("ecoli.faa")
+    testdata2 = utils.get_test_data("shewanella.faa")
 
-    runtmp.sourmash('sketch', 'protein', '-p', 'k=7', '--singleton',
-                    testdata1, testdata2)
+    runtmp.sourmash(
+        "sketch", "protein", "-p", "k=7", "--singleton", testdata1, testdata2
+    )
 
     print(runtmp.last_result.err)
-    assert "saved 2 signature(s) to 'ecoli.faa.sig'. Note: signature license is CC0." in runtmp.last_result.err
-    assert "saved 2 signature(s) to 'shewanella.faa.sig'. Note: signature license is CC0." in runtmp.last_result.err
-
-    sig1 = runtmp.output('ecoli.faa.sig')
+    assert (
+        "saved 2 signature(s) to 'ecoli.faa.sig'. Note: signature license is CC0."
+        in runtmp.last_result.err
+    )
+    assert (
+        "saved 2 signature(s) to 'shewanella.faa.sig'. Note: signature license is CC0."
+        in runtmp.last_result.err
+    )
+
+    sig1 = runtmp.output("ecoli.faa.sig")
     assert os.path.exists(sig1)
-    sig2 = runtmp.output('shewanella.faa.sig')
+    sig2 = runtmp.output("shewanella.faa.sig")
     assert os.path.exists(sig2)
 
     x = list(signature.load_signatures(sig1))
@@ -1324,27 +1410,39 @@ def test_do_sourmash_singleton_multiple_files_no_out_specified(runtmp):
     assert len(x) == 2
     assert len(y) == 2
 
-    idents = [ ss.name.split()[0] for ss in x ]
+    idents = [ss.name.split()[0] for ss in x]
     print(idents)
-    assert set(['NP_414543.1', 'NP_414544.1' ]) == set(idents)
+    assert set(["NP_414543.1", "NP_414544.1"]) == set(idents)
 
-    idents = [ ss.name.split()[0] for ss in y ]
+    idents = [ss.name.split()[0] for ss in y]
     print(idents)
-    assert set(['WP_006079348.1', 'WP_006079351.1']) == set(idents)
+    assert set(["WP_006079348.1", "WP_006079351.1"]) == set(idents)
 
 
 def test_do_sourmash_singleton_multiple_files_output(runtmp):
     # this test checks that --singleton -o works
-    testdata1 = utils.get_test_data('ecoli.faa')
-    testdata2 = utils.get_test_data('shewanella.faa')
-
-    runtmp.sourmash('sketch', 'protein', '-p', 'k=7', '--singleton',
-                    testdata1, testdata2, '-o', 'output.sig')
+    testdata1 = utils.get_test_data("ecoli.faa")
+    testdata2 = utils.get_test_data("shewanella.faa")
+
+    runtmp.sourmash(
+        "sketch",
+        "protein",
+        "-p",
+        "k=7",
+        "--singleton",
+        testdata1,
+        testdata2,
+        "-o",
+        "output.sig",
+    )
 
     print(runtmp.last_result.err)
-    assert "saved 4 signature(s) to 'output.sig'. Note: signature license is CC0." in runtmp.last_result.err
+    assert (
+        "saved 4 signature(s) to 'output.sig'. Note: signature license is CC0."
+        in runtmp.last_result.err
+    )
 
-    sig1 = runtmp.output('output.sig')
+    sig1 = runtmp.output("output.sig")
     assert os.path.exists(sig1)
 
     x = list(signature.load_signatures(sig1))
@@ -1353,23 +1451,37 @@ def test_do_sourmash_singleton_multiple_files_output(runtmp):
 
     assert len(x) == 4
 
-    idents = [ ss.name.split()[0] for ss in x ]
+    idents = [ss.name.split()[0] for ss in x]
     print(idents)
-    assert set(['NP_414543.1', 'NP_414544.1', 'WP_006079348.1', 'WP_006079351.1']) == set(idents)
+    assert set(
+        ["NP_414543.1", "NP_414544.1", "WP_006079348.1", "WP_006079351.1"]
+    ) == set(idents)
 
 
 def test_do_sourmash_singleton_multiple_files_output_zip(runtmp):
     # this test checks that --singleton -o works
-    testdata1 = utils.get_test_data('ecoli.faa')
-    testdata2 = utils.get_test_data('shewanella.faa')
-
-    runtmp.sourmash('sketch', 'protein', '-p', 'k=7', '--singleton',
-                    testdata1, testdata2, '-o', 'output.zip')
+    testdata1 = utils.get_test_data("ecoli.faa")
+    testdata2 = utils.get_test_data("shewanella.faa")
+
+    runtmp.sourmash(
+        "sketch",
+        "protein",
+        "-p",
+        "k=7",
+        "--singleton",
+        testdata1,
+        testdata2,
+        "-o",
+        "output.zip",
+    )
 
     print(runtmp.last_result.err)
-    assert "saved 4 signature(s) to 'output.zip'. Note: signature license is CC0." in runtmp.last_result.err
+    assert (
+        "saved 4 signature(s) to 'output.zip'. Note: signature license is CC0."
+        in runtmp.last_result.err
+    )
 
-    sig1 = runtmp.output('output.zip')
+    sig1 = runtmp.output("output.zip")
     assert os.path.exists(sig1)
 
     x = list(sourmash.load_file_as_signatures(sig1))
@@ -1378,35 +1490,37 @@ def test_do_sourmash_singleton_multiple_files_output_zip(runtmp):
 
     assert len(x) == 4
 
-    idents = [ ss.name.split()[0] for ss in x ]
+    idents = [ss.name.split()[0] for ss in x]
     print(idents)
-    assert set(['NP_414543.1', 'NP_414544.1', 'WP_006079348.1', 'WP_006079351.1']) == set(idents)
+    assert set(
+        ["NP_414543.1", "NP_414544.1", "WP_006079348.1", "WP_006079351.1"]
+    ) == set(idents)
 
 
 def test_protein_with_stop_codons(runtmp):
     # compare protein seq with/without stop codons, via cli and also python
     # apis
 
-    testdata1 = utils.get_test_data('ecoli.faa')
+    testdata1 = utils.get_test_data("ecoli.faa")
     with screed.open(testdata1) as f:
-        ecoli_seq = [ record.sequence for record in f ]
+        ecoli_seq = [record.sequence for record in f]
 
     # first, via CLI w/o stop codons
-    runtmp.sourmash('sketch', 'protein', '-p', 'k=7,scaled=1', testdata1)
-    sig1 = runtmp.output('ecoli.faa.sig')
+    runtmp.sourmash("sketch", "protein", "-p", "k=7,scaled=1", testdata1)
+    sig1 = runtmp.output("ecoli.faa.sig")
     assert os.path.exists(sig1)
 
     x = signature.load_one_signature(sig1)
     cli_mh1 = x.minhash
 
     # second, via CLI w/stop codons
-    ecoli_stop = runtmp.output('ecoli.stop.faa')
-    with open(ecoli_stop, 'wt') as fp:
+    ecoli_stop = runtmp.output("ecoli.stop.faa")
+    with open(ecoli_stop, "w") as fp:
         for seq in ecoli_seq:
-            fp.write(f'>seq\n{seq}*\n')
+            fp.write(f">seq\n{seq}*\n")
 
-    runtmp.sourmash('sketch', 'protein', '-p', 'k=7,scaled=1', ecoli_stop)
-    sig2 = runtmp.output('ecoli.stop.faa.sig')
+    runtmp.sourmash("sketch", "protein", "-p", "k=7,scaled=1", ecoli_stop)
+    sig2 = runtmp.output("ecoli.stop.faa.sig")
     assert os.path.exists(sig2)
 
     x = signature.load_one_signature(sig2)
@@ -1420,7 +1534,7 @@ def test_protein_with_stop_codons(runtmp):
     # now calculate sketch with MinHash and stop codons...
     py_mh2 = MinHash(n=0, ksize=7, is_protein=True, scaled=1)
     for seq in ecoli_seq:
-        py_mh2.add_protein(seq + '*')
+        py_mh2.add_protein(seq + "*")
 
     # and, last, calculate hashes separately with seq_to_hashes
     h_mh1 = MinHash(n=0, ksize=7, is_protein=True, scaled=1)
@@ -1430,7 +1544,7 @@ def test_protein_with_stop_codons(runtmp):
         h = h_mh1.seq_to_hashes(seq, is_protein=1)
         h_mh1.add_many(h)
 
-        h = h_mh2.seq_to_hashes(seq + '*', is_protein=1)
+        h = h_mh2.seq_to_hashes(seq + "*", is_protein=1)
         h_mh2.add_many(h)
 
     # check!
@@ -1453,26 +1567,26 @@ def test_hp_with_stop_codons(runtmp):
     # compare hp seq with/without stop codons, via cli and also python
     # apis
 
-    testdata1 = utils.get_test_data('ecoli.faa')
+    testdata1 = utils.get_test_data("ecoli.faa")
     with screed.open(testdata1) as f:
-        ecoli_seq = [ record.sequence for record in f ]
+        ecoli_seq = [record.sequence for record in f]
 
     # first, via CLI w/o stop codons
-    runtmp.sourmash('sketch', 'protein', '-p', 'k=7,scaled=1,hp', testdata1)
-    sig1 = runtmp.output('ecoli.faa.sig')
+    runtmp.sourmash("sketch", "protein", "-p", "k=7,scaled=1,hp", testdata1)
+    sig1 = runtmp.output("ecoli.faa.sig")
     assert os.path.exists(sig1)
 
     x = signature.load_one_signature(sig1)
     cli_mh1 = x.minhash
 
     # second, via CLI w/stop codons
-    ecoli_stop = runtmp.output('ecoli.stop.faa')
-    with open(ecoli_stop, 'wt') as fp:
+    ecoli_stop = runtmp.output("ecoli.stop.faa")
+    with open(ecoli_stop, "w") as fp:
         for seq in ecoli_seq:
-            fp.write(f'>seq\n{seq}*\n')
+            fp.write(f">seq\n{seq}*\n")
 
-    runtmp.sourmash('sketch', 'protein', '-p', 'k=7,scaled=1,hp', ecoli_stop)
-    sig2 = runtmp.output('ecoli.stop.faa.sig')
+    runtmp.sourmash("sketch", "protein", "-p", "k=7,scaled=1,hp", ecoli_stop)
+    sig2 = runtmp.output("ecoli.stop.faa.sig")
     assert os.path.exists(sig2)
 
     x = signature.load_one_signature(sig2)
@@ -1486,7 +1600,7 @@ def test_hp_with_stop_codons(runtmp):
     # now calculate sketch with MinHash and stop codons...
     py_mh2 = MinHash(n=0, ksize=7, hp=True, scaled=1)
     for seq in ecoli_seq:
-        py_mh2.add_protein(seq + '*')
+        py_mh2.add_protein(seq + "*")
 
     # and, last, calculate hashes separately with seq_to_hashes
     h_mh1 = MinHash(n=0, ksize=7, hp=True, scaled=1)
@@ -1496,7 +1610,7 @@ def test_hp_with_stop_codons(runtmp):
         h = h_mh1.seq_to_hashes(seq, is_protein=1)
         h_mh1.add_many(h)
 
-        h = h_mh2.seq_to_hashes(seq + '*', is_protein=1)
+        h = h_mh2.seq_to_hashes(seq + "*", is_protein=1)
         h_mh2.add_many(h)
 
     # check!
@@ -1519,26 +1633,26 @@ def test_dayhoff_with_stop_codons(runtmp):
     # compare dayhoff seq with/without stop codons, via cli and also python
     # apis
 
-    testdata1 = utils.get_test_data('ecoli.faa')
+    testdata1 = utils.get_test_data("ecoli.faa")
     with screed.open(testdata1) as f:
-        ecoli_seq = [ record.sequence for record in f]
+        ecoli_seq = [record.sequence for record in f]
 
     # first, via CLI w/o stop codons
-    runtmp.sourmash('sketch', 'protein', '-p', 'k=7,scaled=1,dayhoff', testdata1)
-    sig1 = runtmp.output('ecoli.faa.sig')
+    runtmp.sourmash("sketch", "protein", "-p", "k=7,scaled=1,dayhoff", testdata1)
+    sig1 = runtmp.output("ecoli.faa.sig")
     assert os.path.exists(sig1)
 
     x = signature.load_one_signature(sig1)
     cli_mh1 = x.minhash
 
     # second, via CLI w/stop codons
-    ecoli_stop = runtmp.output('ecoli.stop.faa')
-    with open(ecoli_stop, 'wt') as fp:
+    ecoli_stop = runtmp.output("ecoli.stop.faa")
+    with open(ecoli_stop, "w") as fp:
         for seq in ecoli_seq:
-            fp.write(f'>seq\n{seq}*\n')
+            fp.write(f">seq\n{seq}*\n")
 
-    runtmp.sourmash('sketch', 'protein', '-p', 'k=7,scaled=1,dayhoff', ecoli_stop)
-    sig2 = runtmp.output('ecoli.stop.faa.sig')
+    runtmp.sourmash("sketch", "protein", "-p", "k=7,scaled=1,dayhoff", ecoli_stop)
+    sig2 = runtmp.output("ecoli.stop.faa.sig")
     assert os.path.exists(sig2)
 
     x = signature.load_one_signature(sig2)
@@ -1552,7 +1666,7 @@ def test_dayhoff_with_stop_codons(runtmp):
     # now calculate sketch with MinHash and stop codons...
     py_mh2 = MinHash(n=0, ksize=7, dayhoff=True, scaled=1)
     for seq in ecoli_seq:
-        py_mh2.add_protein(seq + '*')
+        py_mh2.add_protein(seq + "*")
 
     # and, last, calculate hashes separately with seq_to_hashes
     h_mh1 = MinHash(n=0, ksize=7, dayhoff=True, scaled=1)
@@ -1562,7 +1676,7 @@ def test_dayhoff_with_stop_codons(runtmp):
         h = h_mh1.seq_to_hashes(seq, is_protein=1)
         h_mh1.add_many(h)
 
-        h = h_mh2.seq_to_hashes(seq + '*', is_protein=1)
+        h = h_mh2.seq_to_hashes(seq + "*", is_protein=1)
         h_mh2.add_many(h)
 
     # check!
@@ -1586,66 +1700,84 @@ def test_dayhoff_with_stop_codons(runtmp):
 
 def test_fromfile_dna(runtmp):
     # does it run? yes, hopefully.
-    test_inp = utils.get_test_data('sketch_fromfile')
-    shutil.copytree(test_inp, runtmp.output('sketch_fromfile'))
-
-    runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv',
-                    '-o', 'out.zip', '-p', 'dna')
+    test_inp = utils.get_test_data("sketch_fromfile")
+    shutil.copytree(test_inp, runtmp.output("sketch_fromfile"))
+
+    runtmp.sourmash(
+        "sketch",
+        "fromfile",
+        "sketch_fromfile/salmonella.csv",
+        "-o",
+        "out.zip",
+        "-p",
+        "dna",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert os.path.exists(runtmp.output('out.zip'))
-    idx = sourmash.load_file_as_index(runtmp.output('out.zip'))
+    assert os.path.exists(runtmp.output("out.zip"))
+    idx = sourmash.load_file_as_index(runtmp.output("out.zip"))
     siglist = list(idx.signatures())
 
     assert len(siglist) == 1
     ss = siglist[0]
-    assert ss.name == 'GCA_903797575 Salmonella enterica'
-    assert ss.minhash.moltype == 'DNA'
+    assert ss.name == "GCA_903797575 Salmonella enterica"
+    assert ss.minhash.moltype == "DNA"
     assert "** 1 total requested; output 1, skipped 0" in runtmp.last_result.err
 
 
 def test_fromfile_dna_csv_gz(runtmp):
     # test with a gzipped csv
-    test_inp = utils.get_test_data('sketch_fromfile')
-    shutil.copytree(test_inp, runtmp.output('sketch_fromfile'))
+    test_inp = utils.get_test_data("sketch_fromfile")
+    shutil.copytree(test_inp, runtmp.output("sketch_fromfile"))
 
     # gzip the CSV file
-    with open(runtmp.output('sketch_fromfile/salmonella.csv'), 'rb') as infp:
-        with gzip.open(runtmp.output('salmonella.csv.gz'), 'w') as outfp:
+    with open(runtmp.output("sketch_fromfile/salmonella.csv"), "rb") as infp:
+        with gzip.open(runtmp.output("salmonella.csv.gz"), "w") as outfp:
             outfp.write(infp.read())
 
-    runtmp.sourmash('sketch', 'fromfile', 'salmonella.csv.gz',
-                    '-o', 'out.zip', '-p', 'dna')
+    runtmp.sourmash(
+        "sketch", "fromfile", "salmonella.csv.gz", "-o", "out.zip", "-p", "dna"
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert os.path.exists(runtmp.output('out.zip'))
-    idx = sourmash.load_file_as_index(runtmp.output('out.zip'))
+    assert os.path.exists(runtmp.output("out.zip"))
+    idx = sourmash.load_file_as_index(runtmp.output("out.zip"))
     siglist = list(idx.signatures())
 
     assert len(siglist) == 1
     ss = siglist[0]
-    assert ss.name == 'GCA_903797575 Salmonella enterica'
-    assert ss.minhash.moltype == 'DNA'
+    assert ss.name == "GCA_903797575 Salmonella enterica"
+    assert ss.minhash.moltype == "DNA"
     assert "** 1 total requested; output 1, skipped 0" in runtmp.last_result.err
 
 
 def test_fromfile_dna_empty(runtmp):
     # test what happens on empty files.
-    test_inp = utils.get_test_data('sketch_fromfile')
-    shutil.copytree(test_inp, runtmp.output('sketch_fromfile'))
+    test_inp = utils.get_test_data("sketch_fromfile")
+    shutil.copytree(test_inp, runtmp.output("sketch_fromfile"))
 
     # zero out the file
-    with gzip.open(runtmp.output('sketch_fromfile/GCA_903797575.1_PARATYPHIC668_genomic.fna.gz'), 'w') as fp:
+    with gzip.open(
+        runtmp.output("sketch_fromfile/GCA_903797575.1_PARATYPHIC668_genomic.fna.gz"),
+        "w",
+    ):
         pass
 
     # now what happens?
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv',
-                        '-o', 'out.zip', '-p', 'dna')
+        runtmp.sourmash(
+            "sketch",
+            "fromfile",
+            "sketch_fromfile/salmonella.csv",
+            "-o",
+            "out.zip",
+            "-p",
+            "dna",
+        )
 
     print(runtmp.last_result.out)
     err = runtmp.last_result.err
@@ -1656,125 +1788,172 @@ def test_fromfile_dna_empty(runtmp):
 
 def test_fromfile_dna_check_sequence_succeed(runtmp):
     # does it run? yes, hopefully.
-    test_inp = utils.get_test_data('sketch_fromfile')
-    shutil.copytree(test_inp, runtmp.output('sketch_fromfile'))
-
-    runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv',
-                    '-o', 'out.zip', '-p', 'dna', '--check-sequence')
+    test_inp = utils.get_test_data("sketch_fromfile")
+    shutil.copytree(test_inp, runtmp.output("sketch_fromfile"))
+
+    runtmp.sourmash(
+        "sketch",
+        "fromfile",
+        "sketch_fromfile/salmonella.csv",
+        "-o",
+        "out.zip",
+        "-p",
+        "dna",
+        "--check-sequence",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert os.path.exists(runtmp.output('out.zip'))
-    idx = sourmash.load_file_as_index(runtmp.output('out.zip'))
+    assert os.path.exists(runtmp.output("out.zip"))
+    idx = sourmash.load_file_as_index(runtmp.output("out.zip"))
     siglist = list(idx.signatures())
 
     assert len(siglist) == 1
     ss = siglist[0]
-    assert ss.name == 'GCA_903797575 Salmonella enterica'
-    assert ss.minhash.moltype == 'DNA'
+    assert ss.name == "GCA_903797575 Salmonella enterica"
+    assert ss.minhash.moltype == "DNA"
     assert "** 1 total requested; output 1, skipped 0" in runtmp.last_result.err
 
 
 def test_fromfile_dna_check_sequence_fail(runtmp):
     # does it run? yes, hopefully.
-    test_inp = utils.get_test_data('sketch_fromfile')
-    shutil.copytree(test_inp, runtmp.output('sketch_fromfile'))
+    test_inp = utils.get_test_data("sketch_fromfile")
+    shutil.copytree(test_inp, runtmp.output("sketch_fromfile"))
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sketch', 'fromfile',
-                        'sketch_fromfile/salmonella-badseq.csv',
-                        '-o', 'out.zip', '-p', 'dna', '--check-sequence')
+        runtmp.sourmash(
+            "sketch",
+            "fromfile",
+            "sketch_fromfile/salmonella-badseq.csv",
+            "-o",
+            "out.zip",
+            "-p",
+            "dna",
+            "--check-sequence",
+        )
 
     print(runtmp.last_result.out)
     err = runtmp.last_result.err
     print(err)
 
     assert "ERROR when reading from " in err
-    assert "invalid DNA character in input k-mer: MTNILKLFSRKAGEPLDSLAVKSVRQHLSGD" in err
+    assert (
+        "invalid DNA character in input k-mer: MTNILKLFSRKAGEPLDSLAVKSVRQHLSGD" in err
+    )
 
 
 def test_fromfile_dna_and_protein(runtmp):
     # does it run and produce DNA _and_ protein signatures?
-    test_inp = utils.get_test_data('sketch_fromfile')
-    shutil.copytree(test_inp, runtmp.output('sketch_fromfile'))
-
-    runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv',
-                    '-o', 'out.zip', '-p', 'dna', '-p', 'protein')
+    test_inp = utils.get_test_data("sketch_fromfile")
+    shutil.copytree(test_inp, runtmp.output("sketch_fromfile"))
+
+    runtmp.sourmash(
+        "sketch",
+        "fromfile",
+        "sketch_fromfile/salmonella.csv",
+        "-o",
+        "out.zip",
+        "-p",
+        "dna",
+        "-p",
+        "protein",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert os.path.exists(runtmp.output('out.zip'))
-    idx = sourmash.load_file_as_index(runtmp.output('out.zip'))
+    assert os.path.exists(runtmp.output("out.zip"))
+    idx = sourmash.load_file_as_index(runtmp.output("out.zip"))
     siglist = list(idx.signatures())
 
     assert len(siglist) == 2
 
-    prot_sig = [ ss for ss in siglist if ss.minhash.moltype == 'protein' ]
+    prot_sig = [ss for ss in siglist if ss.minhash.moltype == "protein"]
     assert len(prot_sig) == 1
     prot_sig = prot_sig[0]
-    assert prot_sig.name == 'GCA_903797575 Salmonella enterica'
+    assert prot_sig.name == "GCA_903797575 Salmonella enterica"
 
-    dna_sig = [ ss for ss in siglist if ss.minhash.moltype == 'DNA' ]
+    dna_sig = [ss for ss in siglist if ss.minhash.moltype == "DNA"]
     assert len(dna_sig) == 1
     dna_sig = dna_sig[0]
-    assert dna_sig.name == 'GCA_903797575 Salmonella enterica'
+    assert dna_sig.name == "GCA_903797575 Salmonella enterica"
 
     assert "** 2 total requested; output 2, skipped 0" in runtmp.last_result.err
 
 
 def test_fromfile_dna_and_protein_and_hp_and_dayhoff(runtmp):
     # does it run and produce DNA _and_ protein signatures?
-    test_inp = utils.get_test_data('sketch_fromfile')
-    shutil.copytree(test_inp, runtmp.output('sketch_fromfile'))
-
-    runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv',
-                    '-o', 'out.zip', '-p', 'dna', '-p', 'dna,k=25',
-                    '-p', 'protein',
-                    '-p', 'hp', '-p', 'dayhoff')
+    test_inp = utils.get_test_data("sketch_fromfile")
+    shutil.copytree(test_inp, runtmp.output("sketch_fromfile"))
+
+    runtmp.sourmash(
+        "sketch",
+        "fromfile",
+        "sketch_fromfile/salmonella.csv",
+        "-o",
+        "out.zip",
+        "-p",
+        "dna",
+        "-p",
+        "dna,k=25",
+        "-p",
+        "protein",
+        "-p",
+        "hp",
+        "-p",
+        "dayhoff",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert os.path.exists(runtmp.output('out.zip'))
-    idx = sourmash.load_file_as_index(runtmp.output('out.zip'))
+    assert os.path.exists(runtmp.output("out.zip"))
+    idx = sourmash.load_file_as_index(runtmp.output("out.zip"))
     siglist = list(idx.signatures())
 
     assert len(siglist) == 5
 
-    prot_sig = [ ss for ss in siglist if ss.minhash.moltype == 'protein' ]
+    prot_sig = [ss for ss in siglist if ss.minhash.moltype == "protein"]
     assert len(prot_sig) == 1
     prot_sig = prot_sig[0]
-    assert prot_sig.name == 'GCA_903797575 Salmonella enterica'
+    assert prot_sig.name == "GCA_903797575 Salmonella enterica"
 
-    prot_sig = [ ss for ss in siglist if ss.minhash.moltype == 'hp' ]
+    prot_sig = [ss for ss in siglist if ss.minhash.moltype == "hp"]
     assert len(prot_sig) == 1
     prot_sig = prot_sig[0]
-    assert prot_sig.name == 'GCA_903797575 Salmonella enterica'
+    assert prot_sig.name == "GCA_903797575 Salmonella enterica"
 
-    prot_sig = [ ss for ss in siglist if ss.minhash.moltype == 'dayhoff' ]
+    prot_sig = [ss for ss in siglist if ss.minhash.moltype == "dayhoff"]
     assert len(prot_sig) == 1
     prot_sig = prot_sig[0]
-    assert prot_sig.name == 'GCA_903797575 Salmonella enterica'
+    assert prot_sig.name == "GCA_903797575 Salmonella enterica"
 
-    dna_sig = [ ss for ss in siglist if ss.minhash.moltype == 'DNA' ]
+    dna_sig = [ss for ss in siglist if ss.minhash.moltype == "DNA"]
     assert len(dna_sig) == 2
     dna_sig = dna_sig[0]
-    assert dna_sig.name == 'GCA_903797575 Salmonella enterica'
+    assert dna_sig.name == "GCA_903797575 Salmonella enterica"
 
     assert "** 5 total requested; output 5, skipped 0" in runtmp.last_result.err
 
 
 def test_fromfile_dna_and_protein_noname(runtmp):
     # nothing in the name column
-    test_inp = utils.get_test_data('sketch_fromfile')
-    shutil.copytree(test_inp, runtmp.output('sketch_fromfile'))
+    test_inp = utils.get_test_data("sketch_fromfile")
+    shutil.copytree(test_inp, runtmp.output("sketch_fromfile"))
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sketch', 'fromfile',
-                        'sketch_fromfile/salmonella-noname.csv',
-                        '-o', 'out.zip', '-p', 'dna', '-p', 'protein')
+        runtmp.sourmash(
+            "sketch",
+            "fromfile",
+            "sketch_fromfile/salmonella-noname.csv",
+            "-o",
+            "out.zip",
+            "-p",
+            "dna",
+            "-p",
+            "protein",
+        )
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -1786,14 +1965,22 @@ def test_fromfile_dna_and_protein_noname(runtmp):
 
 def test_fromfile_dna_and_protein_dup_name(runtmp):
     # duplicate names
-    test_inp = utils.get_test_data('sketch_fromfile')
-    shutil.copytree(test_inp, runtmp.output('sketch_fromfile'))
+    test_inp = utils.get_test_data("sketch_fromfile")
+    shutil.copytree(test_inp, runtmp.output("sketch_fromfile"))
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sketch', 'fromfile',
-                        'sketch_fromfile/salmonella.csv',
-                        'sketch_fromfile/salmonella.csv',
-                        '-o', 'out.zip', '-p', 'dna', '-p', 'protein')
+        runtmp.sourmash(
+            "sketch",
+            "fromfile",
+            "sketch_fromfile/salmonella.csv",
+            "sketch_fromfile/salmonella.csv",
+            "-o",
+            "out.zip",
+            "-p",
+            "dna",
+            "-p",
+            "protein",
+        )
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -1806,15 +1993,23 @@ def test_fromfile_dna_and_protein_dup_name(runtmp):
 
 def test_fromfile_dna_and_protein_dup_name_report(runtmp):
     # duplicate names
-    test_inp = utils.get_test_data('sketch_fromfile')
-    shutil.copytree(test_inp, runtmp.output('sketch_fromfile'))
+    test_inp = utils.get_test_data("sketch_fromfile")
+    shutil.copytree(test_inp, runtmp.output("sketch_fromfile"))
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sketch', 'fromfile',
-                        'sketch_fromfile/salmonella.csv',
-                        'sketch_fromfile/salmonella.csv',
-                        '--report-duplicated',
-                        '-o', 'out.zip', '-p', 'dna', '-p', 'protein')
+        runtmp.sourmash(
+            "sketch",
+            "fromfile",
+            "sketch_fromfile/salmonella.csv",
+            "sketch_fromfile/salmonella.csv",
+            "--report-duplicated",
+            "-o",
+            "out.zip",
+            "-p",
+            "dna",
+            "-p",
+            "protein",
+        )
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -1827,13 +2022,19 @@ def test_fromfile_dna_and_protein_dup_name_report(runtmp):
 
 def test_fromfile_dna_and_protein_missing(runtmp):
     # test what happens when missing protein.
-    test_inp = utils.get_test_data('sketch_fromfile')
-    shutil.copytree(test_inp, runtmp.output('sketch_fromfile'))
+    test_inp = utils.get_test_data("sketch_fromfile")
+    shutil.copytree(test_inp, runtmp.output("sketch_fromfile"))
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sketch', 'fromfile',
-                        'sketch_fromfile/salmonella-missing.csv',
-                        '-o', 'out.zip', '-p', 'protein')
+        runtmp.sourmash(
+            "sketch",
+            "fromfile",
+            "sketch_fromfile/salmonella-missing.csv",
+            "-o",
+            "out.zip",
+            "-p",
+            "protein",
+        )
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -1841,19 +2042,29 @@ def test_fromfile_dna_and_protein_missing(runtmp):
     print(out)
     print(err)
 
-    assert "WARNING: fromfile entry 'GCA_903797575 Salmonella enterica' is missing a proteome" in err
+    assert (
+        "WARNING: fromfile entry 'GCA_903797575 Salmonella enterica' is missing a proteome"
+        in err
+    )
     assert "** ERROR: we cannot build some of the requested signatures." in err
     assert "** 1 total signatures (for 1 names) cannot be built." in err
 
 
 def test_fromfile_dna_and_protein_missing_ignore(runtmp):
     # test what happens when missing protein + --ignore-missing
-    test_inp = utils.get_test_data('sketch_fromfile')
-    shutil.copytree(test_inp, runtmp.output('sketch_fromfile'))
-
-    runtmp.sourmash('sketch', 'fromfile',
-                    'sketch_fromfile/salmonella-missing.csv',
-                    '-o', 'out.zip', '-p', 'protein', '--ignore-missing')
+    test_inp = utils.get_test_data("sketch_fromfile")
+    shutil.copytree(test_inp, runtmp.output("sketch_fromfile"))
+
+    runtmp.sourmash(
+        "sketch",
+        "fromfile",
+        "sketch_fromfile/salmonella-missing.csv",
+        "-o",
+        "out.zip",
+        "-p",
+        "protein",
+        "--ignore-missing",
+    )
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -1861,7 +2072,10 @@ def test_fromfile_dna_and_protein_missing_ignore(runtmp):
     print(out)
     print(err)
 
-    assert "WARNING: fromfile entry 'GCA_903797575 Salmonella enterica' is missing a proteome" in err
+    assert (
+        "WARNING: fromfile entry 'GCA_903797575 Salmonella enterica' is missing a proteome"
+        in err
+    )
 
     assert "** ERROR: we cannot build some of the requested signatures." in err
     assert "** 1 total signatures (for 1 names) cannot be built." in err
@@ -1872,21 +2086,35 @@ def test_fromfile_dna_and_protein_missing_ignore(runtmp):
 
 def test_fromfile_no_overwrite(runtmp):
     # test --force-output-already-exists
-    test_inp = utils.get_test_data('sketch_fromfile')
-    shutil.copytree(test_inp, runtmp.output('sketch_fromfile'))
-
-    runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv',
-                    '-o', 'out.zip', '-p', 'dna')
+    test_inp = utils.get_test_data("sketch_fromfile")
+    shutil.copytree(test_inp, runtmp.output("sketch_fromfile"))
+
+    runtmp.sourmash(
+        "sketch",
+        "fromfile",
+        "sketch_fromfile/salmonella.csv",
+        "-o",
+        "out.zip",
+        "-p",
+        "dna",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert os.path.exists(runtmp.output('out.zip'))
+    assert os.path.exists(runtmp.output("out.zip"))
 
     # now run again; will fail since already exists
-    with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv',
-                        '-o', 'out.zip', '-p', 'protein')
+    with pytest.raises(SourmashCommandFailed):
+        runtmp.sourmash(
+            "sketch",
+            "fromfile",
+            "sketch_fromfile/salmonella.csv",
+            "-o",
+            "out.zip",
+            "-p",
+            "protein",
+        )
 
     err = runtmp.last_result.err
 
@@ -1896,55 +2124,81 @@ def test_fromfile_no_overwrite(runtmp):
 
 def test_fromfile_force_overwrite(runtmp):
     # test --force-output-already-exists
-    test_inp = utils.get_test_data('sketch_fromfile')
-    shutil.copytree(test_inp, runtmp.output('sketch_fromfile'))
-
-    runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv',
-                    '-o', 'out.zip', '-p', 'dna')
+    test_inp = utils.get_test_data("sketch_fromfile")
+    shutil.copytree(test_inp, runtmp.output("sketch_fromfile"))
+
+    runtmp.sourmash(
+        "sketch",
+        "fromfile",
+        "sketch_fromfile/salmonella.csv",
+        "-o",
+        "out.zip",
+        "-p",
+        "dna",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert os.path.exists(runtmp.output('out.zip'))
+    assert os.path.exists(runtmp.output("out.zip"))
 
     # now run again, with --force
-    runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv',
-                    '-o', 'out.zip', '-p', 'protein', '--force-output')
+    runtmp.sourmash(
+        "sketch",
+        "fromfile",
+        "sketch_fromfile/salmonella.csv",
+        "-o",
+        "out.zip",
+        "-p",
+        "protein",
+        "--force-output",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert os.path.exists(runtmp.output('out.zip'))
-    idx = sourmash.load_file_as_index(runtmp.output('out.zip'))
+    assert os.path.exists(runtmp.output("out.zip"))
+    idx = sourmash.load_file_as_index(runtmp.output("out.zip"))
     siglist = list(idx.signatures())
 
     assert len(siglist) == 2
-    names = list(set([ ss.name for ss in siglist ]))
-    assert names[0] == 'GCA_903797575 Salmonella enterica'
+    names = list(set([ss.name for ss in siglist]))
+    assert names[0] == "GCA_903797575 Salmonella enterica"
     assert "** 1 total requested; output 1, skipped 0" in runtmp.last_result.err
 
 
 def test_fromfile_need_params(runtmp):
     # check that we need a -p
-    test_inp = utils.get_test_data('sketch_fromfile')
-    shutil.copytree(test_inp, runtmp.output('sketch_fromfile'))
+    test_inp = utils.get_test_data("sketch_fromfile")
+    shutil.copytree(test_inp, runtmp.output("sketch_fromfile"))
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv',
-                        '-o', 'out.zip')
+        runtmp.sourmash(
+            "sketch", "fromfile", "sketch_fromfile/salmonella.csv", "-o", "out.zip"
+        )
 
     print(str(exc))
-    assert "Error creating signatures: No default moltype and none specified in param string" in str(exc)
+    assert (
+        "Error creating signatures: No default moltype and none specified in param string"
+        in str(exc)
+    )
 
 
 def test_fromfile_seed_not_allowed(runtmp):
     # check that we cannot adjust 'seed'
-    test_inp = utils.get_test_data('sketch_fromfile')
-    shutil.copytree(test_inp, runtmp.output('sketch_fromfile'))
+    test_inp = utils.get_test_data("sketch_fromfile")
+    shutil.copytree(test_inp, runtmp.output("sketch_fromfile"))
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv',
-                        '-o', 'out.zip', '-p', 'dna,seed=43')
+        runtmp.sourmash(
+            "sketch",
+            "fromfile",
+            "sketch_fromfile/salmonella.csv",
+            "-o",
+            "out.zip",
+            "-p",
+            "dna,seed=43",
+        )
     print(str(exc))
 
     assert "ERROR: cannot set 'seed' in 'sketch fromfile'" in str(exc)
@@ -1952,32 +2206,49 @@ def test_fromfile_seed_not_allowed(runtmp):
 
 def test_fromfile_license_not_allowed(runtmp):
     # check that license is CC0
-    test_inp = utils.get_test_data('sketch_fromfile')
-    shutil.copytree(test_inp, runtmp.output('sketch_fromfile'))
+    test_inp = utils.get_test_data("sketch_fromfile")
+    shutil.copytree(test_inp, runtmp.output("sketch_fromfile"))
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv',
-                        '-o', 'out.zip', '-p', 'dna',
-                        '--license', 'BSD')
+        runtmp.sourmash(
+            "sketch",
+            "fromfile",
+            "sketch_fromfile/salmonella.csv",
+            "-o",
+            "out.zip",
+            "-p",
+            "dna",
+            "--license",
+            "BSD",
+        )
 
     print(str(exc))
-    assert 'sourmash only supports CC0-licensed signatures' in str(exc)
+    assert "sourmash only supports CC0-licensed signatures" in str(exc)
 
 
 def test_fromfile_dna_and_protein_csv_output(runtmp):
     # does it run and produce DNA _and_ protein signatures?
-    test_inp = utils.get_test_data('sketch_fromfile')
-    shutil.copytree(test_inp, runtmp.output('sketch_fromfile'))
-
-    runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv',
-                    '--output-csv', 'out.csv', '-p', 'dna', '-p', 'protein')
+    test_inp = utils.get_test_data("sketch_fromfile")
+    shutil.copytree(test_inp, runtmp.output("sketch_fromfile"))
+
+    runtmp.sourmash(
+        "sketch",
+        "fromfile",
+        "sketch_fromfile/salmonella.csv",
+        "--output-csv",
+        "out.csv",
+        "-p",
+        "dna",
+        "-p",
+        "protein",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert os.path.exists(runtmp.output('out.csv'))
+    assert os.path.exists(runtmp.output("out.csv"))
 
-    with open(runtmp.output('out.csv'), newline='') as fp:
+    with open(runtmp.output("out.csv"), newline="") as fp:
         r = csv.DictReader(fp)
         # filename,sketchtype,output_index,name,param_strs
 
@@ -1985,88 +2256,120 @@ def test_fromfile_dna_and_protein_csv_output(runtmp):
         for row in r:
             x.append(row)
 
-        x.sort(key=lambda x: x['filename'])
+        x.sort(key=lambda x: x["filename"])
 
         assert len(x) == 2
-        assert x[0]['sketchtype'] == 'dna'
-        assert x[0]['param_strs'] == '-p dna,k=31,scaled=1000'
-        assert x[0]['filename'] == 'sketch_fromfile/GCA_903797575.1_PARATYPHIC668_genomic.fna.gz'
-
-        assert x[1]['sketchtype'] == 'protein'
-        assert x[1]['param_strs'] == '-p protein,k=10,scaled=200'
-        assert x[1]['filename'] == 'sketch_fromfile/GCA_903797575.1_PARATYPHIC668_protein.faa.gz'
+        assert x[0]["sketchtype"] == "dna"
+        assert x[0]["param_strs"] == "-p dna,k=31,scaled=1000"
+        assert (
+            x[0]["filename"]
+            == "sketch_fromfile/GCA_903797575.1_PARATYPHIC668_genomic.fna.gz"
+        )
+
+        assert x[1]["sketchtype"] == "protein"
+        assert x[1]["param_strs"] == "-p protein,k=10,scaled=200"
+        assert (
+            x[1]["filename"]
+            == "sketch_fromfile/GCA_903797575.1_PARATYPHIC668_protein.faa.gz"
+        )
 
         # same name...
-        assert x[0]['name'] == x[1]['name'] == "GCA_903797575 Salmonella enterica"
+        assert x[0]["name"] == x[1]["name"] == "GCA_903797575 Salmonella enterica"
         # ...different output index.
-        assert x[1]['output_index'] != x[0]['output_index']
+        assert x[1]["output_index"] != x[0]["output_index"]
 
 
 def test_fromfile_dna_and_protein_already_exists(runtmp):
     # does it properly ignore existing (--already-done) sigs?
-    test_inp = utils.get_test_data('sketch_fromfile')
-    already_done = utils.get_test_data('sketch_fromfile/salmonella-dna-protein.zip')
-    shutil.copytree(test_inp, runtmp.output('sketch_fromfile'))
-
-    runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv',
-                    '-p', 'dna', '-p', 'protein',
-                    '--already-done', already_done,
-                    '--output-manifest', 'matching.csv')
+    test_inp = utils.get_test_data("sketch_fromfile")
+    already_done = utils.get_test_data("sketch_fromfile/salmonella-dna-protein.zip")
+    shutil.copytree(test_inp, runtmp.output("sketch_fromfile"))
+
+    runtmp.sourmash(
+        "sketch",
+        "fromfile",
+        "sketch_fromfile/salmonella.csv",
+        "-p",
+        "dna",
+        "-p",
+        "protein",
+        "--already-done",
+        already_done,
+        "--output-manifest",
+        "matching.csv",
+    )
 
     print(runtmp.last_result.out)
     err = runtmp.last_result.err
     print(err)
 
-    assert 'Loaded 1 pre-existing names from manifest(s)' in err
-    assert 'Read 1 rows, requesting that 2 signatures be built.' in err
-    assert '** 0 new signatures to build from 0 files;' in err
-    assert '** Nothing to build. Exiting!' in err
+    assert "Loaded 1 pre-existing names from manifest(s)" in err
+    assert "Read 1 rows, requesting that 2 signatures be built." in err
+    assert "** 0 new signatures to build from 0 files;" in err
+    assert "** Nothing to build. Exiting!" in err
 
-    assert "output 2 already-done signatures to 'matching.csv' in manifest format." in err
-    mf = manifest.CollectionManifest.load_from_filename(runtmp.output('matching.csv'))
+    assert (
+        "output 2 already-done signatures to 'matching.csv' in manifest format." in err
+    )
+    mf = manifest.CollectionManifest.load_from_filename(runtmp.output("matching.csv"))
     assert len(mf) == 2
 
 
 def test_fromfile_dna_and_protein_partly_already_exists(runtmp):
     # does it properly ignore existing (--already-done) sigs?
-    test_inp = utils.get_test_data('sketch_fromfile')
-    already_done = utils.get_test_data('sketch_fromfile/salmonella-dna-protein.zip')
-    shutil.copytree(test_inp, runtmp.output('sketch_fromfile'))
-
-    runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella-mult.csv',
-                    '-p', 'dna', '-p', 'protein',
-                    '--already-done', already_done)
+    test_inp = utils.get_test_data("sketch_fromfile")
+    already_done = utils.get_test_data("sketch_fromfile/salmonella-dna-protein.zip")
+    shutil.copytree(test_inp, runtmp.output("sketch_fromfile"))
+
+    runtmp.sourmash(
+        "sketch",
+        "fromfile",
+        "sketch_fromfile/salmonella-mult.csv",
+        "-p",
+        "dna",
+        "-p",
+        "protein",
+        "--already-done",
+        already_done,
+    )
 
     print(runtmp.last_result.out)
     err = runtmp.last_result.err
     print(err)
 
-    assert 'Loaded 1 pre-existing names from manifest(s)' in err
-    assert 'Read 2 rows, requesting that 4 signatures be built.' in err
-    assert '** 2 new signatures to build from 2 files;' in err
+    assert "Loaded 1 pre-existing names from manifest(s)" in err
+    assert "Read 2 rows, requesting that 4 signatures be built." in err
+    assert "** 2 new signatures to build from 2 files;" in err
     assert "** 2 already exist, so skipping those." in err
     assert "** 4 total requested; output 2, skipped 2" in err
 
 
 def test_fromfile_dna_and_protein_already_exists_noname(runtmp):
     # check that no name in already_exists is handled
-    test_inp = utils.get_test_data('sketch_fromfile')
-    already_done = utils.get_test_data('sketch_fromfile/salmonella-dna-protein.zip')
-    shutil.copytree(test_inp, runtmp.output('sketch_fromfile'))
+    test_inp = utils.get_test_data("sketch_fromfile")
+    already_done = utils.get_test_data("sketch_fromfile/salmonella-dna-protein.zip")
+    shutil.copytree(test_inp, runtmp.output("sketch_fromfile"))
 
     # run rename to get rid of names
-    runtmp.sourmash('sig', 'rename', already_done, '',
-                    '-o', 'already-done.zip')
-
-    runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv',
-                    '-p', 'dna', '-p', 'protein',
-                    '--already-done', 'already-done.zip')
+    runtmp.sourmash("sig", "rename", already_done, "", "-o", "already-done.zip")
+
+    runtmp.sourmash(
+        "sketch",
+        "fromfile",
+        "sketch_fromfile/salmonella.csv",
+        "-p",
+        "dna",
+        "-p",
+        "protein",
+        "--already-done",
+        "already-done.zip",
+    )
 
     print(runtmp.last_result.out)
     err = runtmp.last_result.err
     print(err)
 
-    assert 'Loaded 0 pre-existing names from manifest(s)' in err
-    assert 'Read 1 rows, requesting that 2 signatures be built.' in err
-    assert '** 2 new signatures to build from 2 files;' in err
-    assert '** 2 total requested; output 2, skipped 0' in err
+    assert "Loaded 0 pre-existing names from manifest(s)" in err
+    assert "Read 1 rows, requesting that 2 signatures be built." in err
+    assert "** 2 new signatures to build from 2 files;" in err
+    assert "** 2 total requested; output 2, skipped 0" in err
diff --git a/tests/test_sqlite_index.py b/tests/test_sqlite_index.py
index 74c4692c06..816719e602 100644
--- a/tests/test_sqlite_index.py
+++ b/tests/test_sqlite_index.py
@@ -6,9 +6,12 @@
 
 import sourmash
 from sourmash.exceptions import IndexNotSupported
-from sourmash.index.sqlite_index import (SqliteIndex, load_sqlite_index,
-                                         SqliteCollectionManifest,
-                                         LCA_SqliteDatabase)
+from sourmash.index.sqlite_index import (
+    SqliteIndex,
+    load_sqlite_index,
+    SqliteCollectionManifest,
+    LCA_SqliteDatabase,
+)
 
 from sourmash.index import StandaloneManifestIndex
 from sourmash import load_one_signature, SourmashSignature
@@ -23,7 +26,7 @@
 
 def test_sqlite_index_prefetch_empty():
     # check that an exception is raised upon for an empty database
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
 
     sqlidx = SqliteIndex.create(":memory:")
@@ -41,26 +44,27 @@ def test_sqlite_index_bad_version(runtmp):
     # create a sqlite database with a bad index version in the
     # sourmash_internal table, see what happens :)
 
-    dbfile = runtmp.output('xyz.sqldb')
+    dbfile = runtmp.output("xyz.sqldb")
     conn = sqlite3.connect(dbfile)
     c = conn.cursor()
 
     SqliteIndex._create_tables(c)
 
     # 0.9 doesn't exist/is bad version
-    c.execute('UPDATE sourmash_internal SET value=? WHERE key=?',
-              ('0.9', 'SqliteIndex'))
+    c.execute(
+        "UPDATE sourmash_internal SET value=? WHERE key=?", ("0.9", "SqliteIndex")
+    )
 
     conn.commit()
 
     with pytest.raises(IndexNotSupported):
-        idx = sourmash.load_file_as_index(dbfile)
+        sourmash.load_file_as_index(dbfile)
 
 
 def test_sqlite_index_bad_version_unique(runtmp):
     # try to insert duplicate sqlite index info into sourmash_internal; fail
 
-    dbfile = runtmp.output('xyz.sqldb')
+    dbfile = runtmp.output("xyz.sqldb")
     conn = sqlite3.connect(dbfile)
     c = conn.cursor()
 
@@ -68,13 +72,17 @@ def test_sqlite_index_bad_version_unique(runtmp):
 
     # can't insert duplicate key
     with pytest.raises(sqlite3.IntegrityError):
-        c.execute('INSERT INTO sourmash_internal (value, key) VALUES (?, ?)',
-                  ('1.1', 'SqliteIndex'))
+        c.execute(
+            "INSERT INTO sourmash_internal (value, key) VALUES (?, ?)",
+            ("1.1", "SqliteIndex"),
+        )
 
 
 def test_index_search_subj_scaled_is_lower():
     # check that subject sketches are appropriately downsampled
-    sigfile = utils.get_test_data('scaled100/GCF_000005845.2_ASM584v2_genomic.fna.gz.sig.gz')
+    sigfile = utils.get_test_data(
+        "scaled100/GCF_000005845.2_ASM584v2_genomic.fna.gz.sig.gz"
+    )
     ss = sourmash.load_one_signature(sigfile)
 
     # double check :)
@@ -95,15 +103,15 @@ def test_index_search_subj_scaled_is_lower():
 
 
 def test_sqlite_index_save_load(runtmp):
-    sig2 = utils.get_test_data('2.fa.sig')
-    sig47 = utils.get_test_data('47.fa.sig')
-    sig63 = utils.get_test_data('63.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
+    sig47 = utils.get_test_data("47.fa.sig")
+    sig63 = utils.get_test_data("63.fa.sig")
 
     ss2 = sourmash.load_one_signature(sig2, ksize=31)
     ss47 = sourmash.load_one_signature(sig47)
     ss63 = sourmash.load_one_signature(sig63)
 
-    filename = runtmp.output('foo')
+    filename = runtmp.output("foo")
     sqlidx = SqliteIndex.create(filename)
     sqlidx.insert(ss2)
     sqlidx.insert(ss47)
@@ -122,7 +130,7 @@ def test_sqlite_index_save_load(runtmp):
 
 def test_sqlite_index_multik_select():
     # this loads three ksizes, 21/31/51
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     siglist = sourmash.load_file_as_signatures(sig2)
 
     sqlidx = SqliteIndex.create(":memory:")
@@ -130,11 +138,11 @@ def test_sqlite_index_multik_select():
         sqlidx.insert(ss)
 
     # select most specifically
-    sqlidx2 = sqlidx.select(ksize=31, moltype='DNA')
+    sqlidx2 = sqlidx.select(ksize=31, moltype="DNA")
     assert len(sqlidx2) == 1
 
     # all are DNA:
-    sqlidx2 = sqlidx.select(moltype='DNA')
+    sqlidx2 = sqlidx.select(moltype="DNA")
     assert len(sqlidx2) == 3
 
 
@@ -156,7 +164,7 @@ def test_sqlite_index_insert_num_fail():
     # cannot insert 'num' signatures
     sqlidx = SqliteIndex.create(":memory:")
 
-    sig47 = utils.get_test_data('num/47.fa.sig')
+    sig47 = utils.get_test_data("num/47.fa.sig")
     ss47 = sourmash.load_one_signature(sig47, ksize=31)
     assert ss47.minhash.num != 0
 
@@ -170,7 +178,7 @@ def test_sqlite_index_insert_abund_fail():
     # cannot insert 'num' signatures
     sqlidx = SqliteIndex.create(":memory:")
 
-    sig47 = utils.get_test_data('track_abund/47.fa.sig')
+    sig47 = utils.get_test_data("track_abund/47.fa.sig")
     ss47 = sourmash.load_one_signature(sig47, ksize=31)
 
     with pytest.raises(ValueError) as exc:
@@ -183,7 +191,7 @@ def test_sqlite_index_moltype_multi_fail():
     # check that we cannot store sigs with multiple scaled values.
 
     # this loads multiple ksizes (19, 31) and moltypes (DNA, protein, hp, etc)
-    filename = utils.get_test_data('prot/all.zip')
+    filename = utils.get_test_data("prot/all.zip")
     siglist = sourmash.load_file_as_signatures(filename)
     siglist = list(siglist)
 
@@ -203,7 +211,7 @@ def test_sqlite_index_picklist_select():
     # test select with a picklist
 
     # this loads three ksizes, 21/31/51
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     siglist = sourmash.load_file_as_signatures(sig2)
 
     sqlidx = SqliteIndex.create(":memory:")
@@ -211,22 +219,22 @@ def test_sqlite_index_picklist_select():
         sqlidx.insert(ss)
 
     # construct a picklist...
-    picklist = SignaturePicklist('md5prefix8')
-    picklist.init(['f3a90d4e'])
+    picklist = SignaturePicklist("md5prefix8")
+    picklist.init(["f3a90d4e"])
 
     # select on picklist
     sqlidx2 = sqlidx.select(picklist=picklist)
     assert len(sqlidx2) == 1
     ss = list(sqlidx2.signatures())[0]
     assert ss.minhash.ksize == 31
-    assert ss.md5sum().startswith('f3a90d4e55')
+    assert ss.md5sum().startswith("f3a90d4e55")
 
 
 def test_sqlite_index_picklist_select_exclude():
     # test select with a picklist, but exclude
 
     # this loads three ksizes, 21/31/51
-    sig2 = utils.get_test_data('2.fa.sig')
+    sig2 = utils.get_test_data("2.fa.sig")
     siglist = sourmash.load_file_as_signatures(sig2)
 
     sqlidx = SqliteIndex.create(":memory:")
@@ -234,8 +242,8 @@ def test_sqlite_index_picklist_select_exclude():
         sqlidx.insert(ss)
 
     # construct a picklist...
-    picklist = SignaturePicklist('md5prefix8', pickstyle=PickStyle.EXCLUDE)
-    picklist.init(['f3a90d4e'])
+    picklist = SignaturePicklist("md5prefix8", pickstyle=PickStyle.EXCLUDE)
+    picklist.init(["f3a90d4e"])
 
     # select on picklist
     sqlidx2 = sqlidx.select(picklist=picklist)
@@ -245,8 +253,10 @@ def test_sqlite_index_picklist_select_exclude():
     for ss in list(sqlidx2.signatures()):
         md5s.add(ss.md5sum())
         ksizes.add(ss.minhash.ksize)
-    assert md5s == set(['f372e47893edd349e5956f8b0d8dcbf7','43f3b48e59443092850964d355a20ac0'])
-    assert ksizes == set([21,51])
+    assert md5s == set(
+        ["f372e47893edd349e5956f8b0d8dcbf7", "43f3b48e59443092850964d355a20ac0"]
+    )
+    assert ksizes == set([21, 51])
 
 
 def test_sqlite_jaccard_ordering():
@@ -265,10 +275,10 @@ def test_sqlite_jaccard_ordering():
     def _intersect(x, y):
         return x.intersection_and_union_size(y)[0]
 
-    print('a intersect b:', _intersect(a, b))
-    print('a intersect c:', _intersect(a, c))
-    print('a jaccard b:', a.jaccard(b))
-    print('a jaccard c:', a.jaccard(c))
+    print("a intersect b:", _intersect(a, b))
+    print("a intersect c:", _intersect(a, c))
+    print("a jaccard b:", a.jaccard(b))
+    print("a jaccard c:", a.jaccard(c))
     assert _intersect(a, b) > _intersect(a, c)
     assert a.jaccard(b) < a.jaccard(c)
 
@@ -277,9 +287,9 @@ def _intersect(x, y):
     assert a.jaccard(c) > 0.15
 
     # now - make signatures, try out :)
-    ss_a = sourmash.SourmashSignature(a, name='A')
-    ss_b = sourmash.SourmashSignature(b, name='B')
-    ss_c = sourmash.SourmashSignature(c, name='C')
+    ss_a = sourmash.SourmashSignature(a, name="A")
+    ss_b = sourmash.SourmashSignature(b, name="B")
+    ss_c = sourmash.SourmashSignature(c, name="C")
 
     sqlidx = SqliteIndex.create(":memory:")
     sqlidx.insert(ss_a)
@@ -303,7 +313,7 @@ def test_sqlite_index_scaled1():
     mh1.add_hash(2**64 - 1)
     mh1.add_hash(2**64 - 2)
     mh1.add_hash(2**64 - 3)
-    ss1 = sourmash.SourmashSignature(mh1, name='ss 1')
+    ss1 = sourmash.SourmashSignature(mh1, name="ss 1")
 
     mh2 = sourmash.MinHash(0, 31, scaled=1)
     mh2.add_hash(2**64 - 1)
@@ -312,7 +322,7 @@ def test_sqlite_index_scaled1():
     mh2.add_hash(0)
     mh2.add_hash(1)
     mh2.add_hash(2)
-    ss2 = sourmash.SourmashSignature(mh2, name='ss 2')
+    ss2 = sourmash.SourmashSignature(mh2, name="ss 2")
 
     sqlidx.insert(ss1)
     sqlidx.insert(ss2)
@@ -340,7 +350,7 @@ def test_sqlite_index_scaled1():
 
 def test_sqlite_index_load_existing():
     # try loading an existing sqlite index
-    filename = utils.get_test_data('sqlite/index.sqldb')
+    filename = utils.get_test_data("sqlite/index.sqldb")
     sqlidx = sourmash.load_file_as_index(filename)
     assert isinstance(sqlidx, SqliteIndex)
 
@@ -350,11 +360,11 @@ def test_sqlite_index_load_existing():
 
 def test_sqlite_index_create_load_existing(runtmp):
     # try creating then loading an existing sqlite index; create from CLI
-    filename = runtmp.output('idx.sqldb')
-    sig1 = utils.get_test_data('47.fa.sig')
-    sig2 = utils.get_test_data('63.fa.sig')
+    filename = runtmp.output("idx.sqldb")
+    sig1 = utils.get_test_data("47.fa.sig")
+    sig2 = utils.get_test_data("63.fa.sig")
 
-    runtmp.sourmash('sig', 'cat', sig1, sig2, '-o', filename)
+    runtmp.sourmash("sig", "cat", sig1, sig2, "-o", filename)
 
     sqlidx = sourmash.load_file_as_index(filename)
     assert isinstance(sqlidx, SqliteIndex)
@@ -365,12 +375,12 @@ def test_sqlite_index_create_load_existing(runtmp):
 
 def test_sqlite_index_create_load_insert_existing(runtmp):
     # try creating, loading, inserting into an existing sqlite index
-    filename = runtmp.output('idx.sqldb')
-    sig1 = utils.get_test_data('47.fa.sig')
-    sig2 = utils.get_test_data('63.fa.sig')
-    sig3 = utils.get_test_data('2.fa.sig')
+    filename = runtmp.output("idx.sqldb")
+    sig1 = utils.get_test_data("47.fa.sig")
+    sig2 = utils.get_test_data("63.fa.sig")
+    sig3 = utils.get_test_data("2.fa.sig")
 
-    runtmp.sourmash('sig', 'cat', sig1, sig2, '-o', filename)
+    runtmp.sourmash("sig", "cat", sig1, sig2, "-o", filename)
 
     sqlidx = sourmash.load_file_as_index(filename)
     assert isinstance(sqlidx, SqliteIndex)
@@ -382,7 +392,7 @@ def test_sqlite_index_create_load_insert_existing(runtmp):
     sqlidx.insert(ss3)
     sqlidx.commit()
 
-    runtmp.sourmash('sig', 'describe', filename)
+    runtmp.sourmash("sig", "describe", filename)
     print(runtmp.last_result.out)
     assert "md5: f3a90d4e5528864a5bcc8434b0d0c3b1" in runtmp.last_result.out
 
@@ -390,12 +400,12 @@ def test_sqlite_index_create_load_insert_existing(runtmp):
 def test_sqlite_index_create_load_insert_existing_cli(runtmp):
     # try creating, loading, inserting into an existing sqlite index from cli
     # (aka "append" to existing database)
-    filename = runtmp.output('idx.sqldb')
-    sig1 = utils.get_test_data('47.fa.sig')
-    sig2 = utils.get_test_data('63.fa.sig')
-    sig3 = utils.get_test_data('2.fa.sig')
+    filename = runtmp.output("idx.sqldb")
+    sig1 = utils.get_test_data("47.fa.sig")
+    sig2 = utils.get_test_data("63.fa.sig")
+    sig3 = utils.get_test_data("2.fa.sig")
 
-    runtmp.sourmash('sig', 'cat', sig1, sig2, '-o', filename)
+    runtmp.sourmash("sig", "cat", sig1, sig2, "-o", filename)
 
     sqlidx = sourmash.load_file_as_index(filename)
     assert isinstance(sqlidx, SqliteIndex)
@@ -404,7 +414,7 @@ def test_sqlite_index_create_load_insert_existing_cli(runtmp):
     assert len(siglist) == 2
 
     # add a third
-    runtmp.sourmash('sig', 'cat', sig3, '-o', filename, '-k', '31')
+    runtmp.sourmash("sig", "cat", sig3, "-o", filename, "-k", "31")
 
     siglist = list(sqlidx.signatures())
     assert len(siglist) == 3
@@ -414,26 +424,27 @@ def test_sqlite_manifest_bad_version(runtmp):
     # create a sqlite database with a bad manifest version in the
     # sourmash_internal table, see what happens :)
 
-    dbfile = runtmp.output('xyz.sqlmf')
+    dbfile = runtmp.output("xyz.sqlmf")
     conn = sqlite3.connect(dbfile)
     c = conn.cursor()
 
     SqliteCollectionManifest._create_tables(c)
 
     # 0.9 doesn't exist/bad version
-    c.execute('UPDATE sourmash_internal SET value=? WHERE key=?',
-              ('0.9', 'SqliteManifest'))
+    c.execute(
+        "UPDATE sourmash_internal SET value=? WHERE key=?", ("0.9", "SqliteManifest")
+    )
 
     conn.commit()
 
     with pytest.raises(IndexNotSupported):
-        mf = CollectionManifest.load_from_filename(dbfile)
+        CollectionManifest.load_from_filename(dbfile)
 
 
 def test_sqlite_manifest_bad_version_unique(runtmp):
     # try to insert duplicate sqlite manifest info into sourmash_internal; fail
 
-    dbfile = runtmp.output('xyz.sqldb')
+    dbfile = runtmp.output("xyz.sqldb")
     conn = sqlite3.connect(dbfile)
     c = conn.cursor()
 
@@ -441,15 +452,17 @@ def test_sqlite_manifest_bad_version_unique(runtmp):
 
     # can't insert duplicate key
     with pytest.raises(sqlite3.IntegrityError):
-        c.execute('INSERT INTO sourmash_internal (value, key) VALUES (?, ?)',
-                  ('1.1', 'SqliteManifest'))
+        c.execute(
+            "INSERT INTO sourmash_internal (value, key) VALUES (?, ?)",
+            ("1.1", "SqliteManifest"),
+        )
 
 
 def test_sqlite_manifest_basic():
     # test some features of the SQLite-based manifest.
-    sig2 = load_one_signature(utils.get_test_data('2.fa.sig'), ksize=31)
-    sig47 = load_one_signature(utils.get_test_data('47.fa.sig'), ksize=31)
-    sig63 = load_one_signature(utils.get_test_data('63.fa.sig'), ksize=31)
+    sig2 = load_one_signature(utils.get_test_data("2.fa.sig"), ksize=31)
+    sig47 = load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
+    sig63 = load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31)
 
     sqlidx = SqliteIndex.create(":memory:")
 
@@ -479,15 +492,17 @@ def test_sqlite_manifest_basic():
 
 def test_sqlite_manifest_round_trip():
     # check that we can go from regular mf -> sqlite mf -> regular again.
-    sig2 = load_one_signature(utils.get_test_data('2.fa.sig'), ksize=31)
-    sig47 = load_one_signature(utils.get_test_data('47.fa.sig'), ksize=31)
-    sig63 = load_one_signature(utils.get_test_data('63.fa.sig'), ksize=31)
+    sig2 = load_one_signature(utils.get_test_data("2.fa.sig"), ksize=31)
+    sig47 = load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
+    sig63 = load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31)
 
     rows = []
-    rows.append(CollectionManifest.make_manifest_row(sig47, None,
-                                                     include_signature=False))
-    rows.append(CollectionManifest.make_manifest_row(sig63, None,
-                                                     include_signature=False))
+    rows.append(
+        CollectionManifest.make_manifest_row(sig47, None, include_signature=False)
+    )
+    rows.append(
+        CollectionManifest.make_manifest_row(sig63, None, include_signature=False)
+    )
     nosql_mf = CollectionManifest(rows)
 
     sqlite_mf = SqliteCollectionManifest.load_from_manifest(nosql_mf)
@@ -507,13 +522,12 @@ def test_sqlite_manifest_round_trip():
 
 def test_sqlite_manifest_create(runtmp):
     # test creation and summarization of a manifest of prot.zip
-    zipfile = utils.get_test_data('prot/all.zip')
+    zipfile = utils.get_test_data("prot/all.zip")
 
     # create manifest
-    runtmp.sourmash('sig', 'manifest', '-F', 'sql', zipfile,
-                    '-o', 'mf.sqlmf')
+    runtmp.sourmash("sig", "manifest", "-F", "sql", zipfile, "-o", "mf.sqlmf")
 
-    sqlmf = runtmp.output('mf.sqlmf')
+    sqlmf = runtmp.output("mf.sqlmf")
     assert os.path.exists(sqlmf)
 
     # verify it's loadable as the right type
@@ -521,7 +535,7 @@ def test_sqlite_manifest_create(runtmp):
     assert isinstance(idx, StandaloneManifestIndex)
 
     # summarize
-    runtmp.sourmash('sig', 'fileinfo', 'mf.sqlmf')
+    runtmp.sourmash("sig", "fileinfo", "mf.sqlmf")
 
     out = runtmp.last_result.out
     print(out)
@@ -540,41 +554,38 @@ def test_sqlite_manifest_create(runtmp):
 
 def test_sqlite_manifest_create_noload_sigs(runtmp):
     # sigs should not be loadable from manifest this way...
-    zipfile = utils.get_test_data('prot/all.zip')
+    zipfile = utils.get_test_data("prot/all.zip")
 
     # create manifest
-    runtmp.sourmash('sig', 'manifest', '-F', 'sql', zipfile,
-                    '-o', 'mf.sqlmf')
+    runtmp.sourmash("sig", "manifest", "-F", "sql", zipfile, "-o", "mf.sqlmf")
 
     # 'describe' should not be able to load the sqlmf b/c prefix is wrong
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('sig', 'describe', 'mf.sqlmf')
+        runtmp.sourmash("sig", "describe", "mf.sqlmf")
 
 
 def test_sqlite_manifest_create_yesload_sigs(runtmp):
     # should be able to load after copying files
-    zipfile = utils.get_test_data('prot/all.zip')
-    shutil.copytree(utils.get_test_data('prot'), runtmp.output('prot'))
+    zipfile = utils.get_test_data("prot/all.zip")
+    shutil.copytree(utils.get_test_data("prot"), runtmp.output("prot"))
 
     # create manifest
-    runtmp.sourmash('sig', 'manifest', '-F', 'sql', zipfile,
-                    '-o', 'prot/mf.sqlmf')
+    runtmp.sourmash("sig", "manifest", "-F", "sql", zipfile, "-o", "prot/mf.sqlmf")
 
     # 'describe' should now be able to load the sqlmf, which is cool
-    runtmp.sourmash('sig', 'describe', 'prot/mf.sqlmf')
+    runtmp.sourmash("sig", "describe", "prot/mf.sqlmf")
     print(runtmp.last_result.out)
 
 
 def test_sqlite_manifest_num(runtmp):
     # should be able to produce sql manifests with 'num' sketches in them
-    numsig = utils.get_test_data('num/47.fa.sig')
+    numsig = utils.get_test_data("num/47.fa.sig")
 
     # create mf
-    runtmp.sourmash('sig', 'manifest', '-F', 'sql', numsig,
-                    '-o', 'mf.sqlmf')
+    runtmp.sourmash("sig", "manifest", "-F", "sql", numsig, "-o", "mf.sqlmf")
 
     # do summarize:
-    runtmp.sourmash('sig', 'summarize', 'mf.sqlmf')
+    runtmp.sourmash("sig", "summarize", "mf.sqlmf")
     out = runtmp.last_result.out
 
     print(out)
@@ -586,14 +597,13 @@ def test_sqlite_manifest_num(runtmp):
 
 def test_sqlite_manifest_num_select(runtmp):
     # should be able to _select_ sql manifests with 'num' sketches in them
-    numsig = utils.get_test_data('num/47.fa.sig')
+    numsig = utils.get_test_data("num/47.fa.sig")
 
     # create mf
-    runtmp.sourmash('sig', 'manifest', '-F', 'sql', numsig,
-                    '-o', 'mf.sqlmf')
+    runtmp.sourmash("sig", "manifest", "-F", "sql", numsig, "-o", "mf.sqlmf")
 
     # load as index
-    idx = sourmash.load_file_as_index(runtmp.output('mf.sqlmf'))
+    idx = sourmash.load_file_as_index(runtmp.output("mf.sqlmf"))
 
     # select
     print(list(idx.manifest.rows))
@@ -604,25 +614,24 @@ def test_sqlite_manifest_num_select(runtmp):
 
 def test_sqlite_manifest_locations(runtmp):
     # check what locations returns... may return too many, that's ok.
-    prot = utils.get_test_data('prot')
+    prot = utils.get_test_data("prot")
 
-    runtmp.sourmash('sig', 'manifest', '-F', 'sql', prot,
-                    '-o', 'mf.sqlmf')
+    runtmp.sourmash("sig", "manifest", "-F", "sql", prot, "-o", "mf.sqlmf")
 
     # load as index
-    idx = sourmash.load_file_as_index(runtmp.output('mf.sqlmf'))
+    idx = sourmash.load_file_as_index(runtmp.output("mf.sqlmf"))
 
-    picklist = SignaturePicklist('identprefix')
-    picklist.pickset = set(['GCA_001593925'])
+    picklist = SignaturePicklist("identprefix")
+    picklist.pickset = set(["GCA_001593925"])
     idx = idx.select(picklist=picklist)
 
     sql_locations = set(idx.manifest.locations())
-    row_locations = set(row['internal_location'] for row in idx.manifest.rows)
+    row_locations = set(row["internal_location"] for row in idx.manifest.rows)
 
     assert sql_locations.issuperset(row_locations)
 
-    assert 'dna-sig.sig.gz' in sql_locations # this is unnecessary...
-    assert 'dna-sig.sig.gz' not in row_locations # ...this is correct :)
+    assert "dna-sig.sig.gz" in sql_locations  # this is unnecessary...
+    assert "dna-sig.sig.gz" not in row_locations  # ...this is correct :)
 
 
 def test_sqlite_manifest_create_insert(runtmp):
@@ -631,126 +640,125 @@ def test_sqlite_manifest_create_insert(runtmp):
     mfname = runtmp.output("some.sqlmf")
     mf = SqliteCollectionManifest.create(mfname)
 
-    sigfile = utils.get_test_data('47.fa.sig')
+    sigfile = utils.get_test_data("47.fa.sig")
     ss = sourmash.load_one_signature(sigfile)
 
-    mf._insert_row(mf.conn.cursor(), mf.make_manifest_row(ss, 'some.sig'))
+    mf._insert_row(mf.conn.cursor(), mf.make_manifest_row(ss, "some.sig"))
     mf.conn.commit()
 
     # copy sig in since we want it to resolve...
-    shutil.copyfile(sigfile, runtmp.output('some.sig'))
+    shutil.copyfile(sigfile, runtmp.output("some.sig"))
 
     # 'describe' should work here, to resolve actual sigs.
-    runtmp.sourmash('sig', 'describe', mfname)
+    runtmp.sourmash("sig", "describe", mfname)
     print(runtmp.last_result.out)
-    assert 'md5: 09a08691ce52952152f0e866a59f6261' in runtmp.last_result.out
+    assert "md5: 09a08691ce52952152f0e866a59f6261" in runtmp.last_result.out
 
 
 def test_sqlite_manifest_create_insert_2(runtmp):
     # try out creating a sqlite manifest from cli and then _insert_row into it
 
     # copy sig in since we want it to resolve...
-    sigfile = utils.get_test_data('47.fa.sig')
-    shutil.copyfile(sigfile, runtmp.output('some.sig'))
+    sigfile = utils.get_test_data("47.fa.sig")
+    shutil.copyfile(sigfile, runtmp.output("some.sig"))
 
-    runtmp.sourmash('sig', 'manifest', 'some.sig', '-F', 'sql',
-                    '-o', 'some.sqlmf')
+    runtmp.sourmash("sig", "manifest", "some.sig", "-F", "sql", "-o", "some.sqlmf")
     mfname = runtmp.output("some.sqlmf")
 
     mf = CollectionManifest.load_from_filename(mfname)
-    ss = sourmash.load_one_signature(runtmp.output('some.sig'))
-    mf._insert_row(mf.conn.cursor(), mf.make_manifest_row(ss, 'some.sig'))
+    ss = sourmash.load_one_signature(runtmp.output("some.sig"))
+    mf._insert_row(mf.conn.cursor(), mf.make_manifest_row(ss, "some.sig"))
     mf.conn.commit()
 
     # 'describe' should work here, to resolve actual sigs.
-    runtmp.sourmash('sig', 'describe', mfname)
+    runtmp.sourmash("sig", "describe", mfname)
     print(runtmp.last_result.out)
-    assert 'md5: 09a08691ce52952152f0e866a59f6261' in runtmp.last_result.out
+    assert "md5: 09a08691ce52952152f0e866a59f6261" in runtmp.last_result.out
 
 
 def test_sqlite_manifest_existing(runtmp):
     # try out an existing sqlite manifest
 
-    prefix = runtmp.output('protdir')
-    mf = runtmp.output('protdir/prot.sqlmf')
-    shutil.copytree(utils.get_test_data('prot'), prefix)
-    shutil.copyfile(utils.get_test_data('sqlite/prot.sqlmf'), mf)
+    prefix = runtmp.output("protdir")
+    mf = runtmp.output("protdir/prot.sqlmf")
+    shutil.copytree(utils.get_test_data("prot"), prefix)
+    shutil.copyfile(utils.get_test_data("sqlite/prot.sqlmf"), mf)
 
-    runtmp.sourmash('sig', 'describe', mf)
+    runtmp.sourmash("sig", "describe", mf)
     print(runtmp.last_result.out)
 
 
 def test_sqlite_manifest_existing_insert(runtmp):
     # try out an existing sqlite manifest - insert into it
 
-    prefix = runtmp.output('protdir')
-    shutil.copytree(utils.get_test_data('prot'), prefix)
+    prefix = runtmp.output("protdir")
+    shutil.copytree(utils.get_test_data("prot"), prefix)
 
-    mfname = runtmp.output('protdir/prot.sqlmf')
-    shutil.copyfile(utils.get_test_data('sqlite/prot.sqlmf'), mfname)
+    mfname = runtmp.output("protdir/prot.sqlmf")
+    shutil.copyfile(utils.get_test_data("sqlite/prot.sqlmf"), mfname)
     mf = CollectionManifest.load_from_filename(mfname)
     assert isinstance(mf, SqliteCollectionManifest)
 
-    sigfile = utils.get_test_data('47.fa.sig')
+    sigfile = utils.get_test_data("47.fa.sig")
     ss = sourmash.load_one_signature(sigfile)
 
-    mf._insert_row(mf.conn.cursor(), mf.make_manifest_row(ss, 'some.sig'))
+    mf._insert_row(mf.conn.cursor(), mf.make_manifest_row(ss, "some.sig"))
     mf.conn.commit()
 
     # copy sig in since we want it to resolve...
-    shutil.copyfile(sigfile, runtmp.output('protdir/some.sig'))
+    shutil.copyfile(sigfile, runtmp.output("protdir/some.sig"))
 
     # 'describe' should work here.
-    runtmp.sourmash('sig', 'describe', mfname)
+    runtmp.sourmash("sig", "describe", mfname)
     print(runtmp.last_result.out)
 
 
 def test_sqlite_manifest_existing_mf_only(runtmp):
     # try out an existing sqlite manifest, but without underlying files -> fail
 
-    mf = runtmp.output('prot.sqlmf')
-    shutil.copyfile(utils.get_test_data('sqlite/prot.sqlmf'), mf)
+    mf = runtmp.output("prot.sqlmf")
+    shutil.copyfile(utils.get_test_data("sqlite/prot.sqlmf"), mf)
 
     # 'fileinfo' should work...
-    runtmp.sourmash('sig', 'fileinfo', mf)
+    runtmp.sourmash("sig", "fileinfo", mf)
     print(runtmp.last_result.out)
-    assert 'num signatures: 7' in runtmp.last_result.out
+    assert "num signatures: 7" in runtmp.last_result.out
 
     # ...but 'describe' should fail, since it needs actual sigs.
-    with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.sourmash('sig', 'describe', mf)
+    with pytest.raises(SourmashCommandFailed):
+        runtmp.sourmash("sig", "describe", mf)
 
     print(runtmp.last_result.err)
-    assert 'ERROR: Error while reading signatures from' in runtmp.last_result.err
+    assert "ERROR: Error while reading signatures from" in runtmp.last_result.err
 
 
 def test_sqlite_manifest_existing_mfonly_insert(runtmp):
     # try out an existing sqlite manifest - insert into it, but fail describe
 
-    mfname = runtmp.output('prot.sqlmf')
-    shutil.copyfile(utils.get_test_data('sqlite/prot.sqlmf'), mfname)
+    mfname = runtmp.output("prot.sqlmf")
+    shutil.copyfile(utils.get_test_data("sqlite/prot.sqlmf"), mfname)
     mf = CollectionManifest.load_from_filename(mfname)
     assert isinstance(mf, SqliteCollectionManifest)
 
-    sigfile = utils.get_test_data('47.fa.sig')
+    sigfile = utils.get_test_data("47.fa.sig")
     ss = sourmash.load_one_signature(sigfile)
 
     mf._insert_row(mf.conn.cursor(), mf.make_manifest_row(ss, sigfile))
     mf.conn.commit()
 
     # 'fileinfo' should work...
-    runtmp.sourmash('sig', 'fileinfo', mfname)
+    runtmp.sourmash("sig", "fileinfo", mfname)
     print(runtmp.last_result.out)
-    assert 'num signatures: 8' in runtmp.last_result.out
+    assert "num signatures: 8" in runtmp.last_result.out
 
     # ...but 'describe' should fail, since it needs actual sigs.
-    with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.sourmash('sig', 'describe', mfname)
+    with pytest.raises(SourmashCommandFailed):
+        runtmp.sourmash("sig", "describe", mfname)
 
 
 def test_sqlite_manifest_load_existing_index():
     # try loading an existing sqlite index as a manifest
-    filename = utils.get_test_data('sqlite/index.sqldb')
+    filename = utils.get_test_data("sqlite/index.sqldb")
     mf = CollectionManifest.load_from_filename(filename)
     assert isinstance(mf, SqliteCollectionManifest)
 
@@ -759,14 +767,14 @@ def test_sqlite_manifest_load_existing_index():
 
 def test_sqlite_manifest_load_existing_index_insert_fail():
     # try loading an existing sqlite index as a manifest; insert should fail
-    filename = utils.get_test_data('sqlite/index.sqldb')
+    filename = utils.get_test_data("sqlite/index.sqldb")
     mf = CollectionManifest.load_from_filename(filename)
     assert isinstance(mf, SqliteCollectionManifest)
 
     assert len(mf) == 2
 
     # try insert - should fail
-    sigfile = utils.get_test_data('47.fa.sig')
+    sigfile = utils.get_test_data("47.fa.sig")
     ss = sourmash.load_one_signature(sigfile)
 
     with pytest.raises(Exception) as exc:
@@ -787,7 +795,7 @@ def test_sqlite_manifest_create_load_empty(runtmp):
 
 def test_sqlite_lca_db_load_existing():
     # try loading an existing sqlite index
-    filename = utils.get_test_data('sqlite/lca.sqldb')
+    filename = utils.get_test_data("sqlite/lca.sqldb")
     sqlidx = sourmash.load_file_as_index(filename)
     assert isinstance(sqlidx, LCA_SqliteDatabase)
 
@@ -797,27 +805,26 @@ def test_sqlite_lca_db_load_existing():
 
 def test_sqlite_lca_db_select():
     # try loading an existing sqlite index
-    filename = utils.get_test_data('sqlite/lca.sqldb')
+    filename = utils.get_test_data("sqlite/lca.sqldb")
     sqlidx = sourmash.load_file_as_index(filename)
     assert isinstance(sqlidx, LCA_SqliteDatabase)
 
     sqlidx2 = sqlidx.select(ksize=31)
-    x = list(sqlidx2.hashvals)  # only on LCA_SqliteDatabase
+    list(sqlidx2.hashvals)  # only on LCA_SqliteDatabase
     assert isinstance(sqlidx2, LCA_SqliteDatabase)
 
 
 def test_sqlite_lca_db_create_load_existing(runtmp):
     # try creating (from CLI) then loading (from API) an LCA db
-    filename = runtmp.output('lca.sqldb')
-    sig1 = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
-    sig2 = utils.get_test_data('lca/TARA_PSW_MAG_00136.sig')
+    filename = runtmp.output("lca.sqldb")
+    sig1 = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
+    sig2 = utils.get_test_data("lca/TARA_PSW_MAG_00136.sig")
 
-    runtmp.sourmash('sig', 'flatten', sig1, sig2, '-o', filename, '-k', '31')
+    runtmp.sourmash("sig", "flatten", sig1, sig2, "-o", filename, "-k", "31")
 
     # load tax
-    tax_csv = utils.get_test_data('sqlite/delmont-6.csv')
-    runtmp.sourmash('tax', 'prepare', '-t', tax_csv,
-                    '-o', filename, '-F', 'sql')
+    tax_csv = utils.get_test_data("sqlite/delmont-6.csv")
+    runtmp.sourmash("tax", "prepare", "-t", tax_csv, "-o", filename, "-F", "sql")
 
     sqlidx = sourmash.load_file_as_index(filename)
     assert isinstance(sqlidx, LCA_SqliteDatabase)
@@ -829,63 +836,62 @@ def test_sqlite_lca_db_create_load_existing(runtmp):
 def test_sqlite_lca_db_load_empty(runtmp):
     # try creating then loading an _empty_ LCA_SqliteDatabase
 
-    dbname = runtmp.output('empty.sqldb')
+    dbname = runtmp.output("empty.sqldb")
 
     # create empty SqliteIndex...
-    runtmp.sourmash('sig', 'cat', '-o', dbname)
+    runtmp.sourmash("sig", "cat", "-o", dbname)
     assert os.path.exists(dbname)
 
     # ...and create empty sourmash_taxonomy tables in there...
-    empty_tax = utils.get_test_data('scaled/empty-lineage.csv')
-    runtmp.sourmash('tax', 'prepare', '-F', 'sql', '-t', empty_tax,
-                    '-o', dbname)
+    empty_tax = utils.get_test_data("scaled/empty-lineage.csv")
+    runtmp.sourmash("tax", "prepare", "-F", "sql", "-t", empty_tax, "-o", dbname)
 
-    runtmp.sourmash('sig', 'describe', dbname)
-    assert 'loaded 0 signatures' in runtmp.last_result.err
+    runtmp.sourmash("sig", "describe", dbname)
+    assert "loaded 0 signatures" in runtmp.last_result.err
 
 
 def test_sqlite_lca_db_create_readonly(runtmp):
     # try running 'prepare' on a read-only sqlite db, check error message.
 
-    dbname = runtmp.output('empty.sqldb')
+    dbname = runtmp.output("empty.sqldb")
 
     # create empty SqliteIndex...
-    runtmp.sourmash('sig', 'cat', '-o', dbname)
+    runtmp.sourmash("sig", "cat", "-o", dbname)
     assert os.path.exists(dbname)
 
     # make it read only...
     from stat import S_IREAD, S_IRGRP, S_IROTH
-    os.chmod(dbname, S_IREAD|S_IRGRP|S_IROTH)
+
+    os.chmod(dbname, S_IREAD | S_IRGRP | S_IROTH)
 
     # ...and try creating empty sourmash_taxonomy tables in there...
-    empty_tax = utils.get_test_data('scaled/empty-lineage.csv')
+    empty_tax = utils.get_test_data("scaled/empty-lineage.csv")
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.sourmash('tax', 'prepare', '-F', 'sql', '-t', empty_tax,
-                        '-o', dbname)
+    with pytest.raises(SourmashCommandFailed):
+        runtmp.sourmash("tax", "prepare", "-F", "sql", "-t", empty_tax, "-o", dbname)
 
     err = runtmp.last_result.err
     print(err)
 
-    assert not "taxonomy table already exists in" in err
+    assert "taxonomy table already exists in" not in err
     assert "attempt to write a readonly database" in err
 
 
 def test_sqlite_lca_db_try_load_sqlite_index():
     # try loading a SqliteIndex with no tax tables from .load classmethod
-    dbname = utils.get_test_data('sqlite/index.sqldb')
+    dbname = utils.get_test_data("sqlite/index.sqldb")
 
     with pytest.raises(ValueError) as exc:
-        db = LCA_SqliteDatabase.load(dbname)
+        LCA_SqliteDatabase.load(dbname)
 
     assert "not a taxonomy database" in str(exc)
 
 
 def test_sqlite_lca_db_supply_lineage_db():
     # try creating an LCA_SqliteDatabase object with a separate lineage DB.
-    dbname = utils.get_test_data('sqlite/index.sqldb')
+    dbname = utils.get_test_data("sqlite/index.sqldb")
 
-    tax_csv = utils.get_test_data('sqlite/shewanella-lineage.csv')
+    tax_csv = utils.get_test_data("sqlite/shewanella-lineage.csv")
     lineage_db = MultiLineageDB.load([tax_csv])
 
     db = LCA_SqliteDatabase(dbname, lineage_db=lineage_db)
@@ -893,21 +899,21 @@ def test_sqlite_lca_db_supply_lineage_db():
     hashval = next(iter(db.hashvals))
     lineages = db.get_lineage_assignments(hashval)
     print(lineages)
-    assert lineages[0][0].rank == 'superkingdom'
-    assert lineages[0][0].name == 'd__Bacteria'
-    assert lineages[0][-1].rank == 'species'
-    assert lineages[0][-1].name == 's__Shewanella baltica'
-    assert lineages[1][0].rank == 'superkingdom'
-    assert lineages[1][0].name == 'd__Bacteria'
-    assert lineages[0][-1].rank == 'species'
-    assert lineages[0][-1].name == 's__Shewanella baltica'
+    assert lineages[0][0].rank == "superkingdom"
+    assert lineages[0][0].name == "d__Bacteria"
+    assert lineages[0][-1].rank == "species"
+    assert lineages[0][-1].name == "s__Shewanella baltica"
+    assert lineages[1][0].rank == "superkingdom"
+    assert lineages[1][0].name == "d__Bacteria"
+    assert lineages[0][-1].rank == "species"
+    assert lineages[0][-1].name == "s__Shewanella baltica"
 
 
 def test_bad_sqlite_internal_version():
     # check get_sourmash_internal
-    dbname = utils.get_test_data('sqlite/index.sqldb')
+    dbname = utils.get_test_data("sqlite/index.sqldb")
 
     conn = sqlite_utils.open_sqlite_db(dbname)
     c = conn.cursor()
     with pytest.raises(Exception):
-        sqlite_utils.add_sourmash_internal(c, 'SqliteIndex', '0.9')
+        sqlite_utils.add_sourmash_internal(c, "SqliteIndex", "0.9")
diff --git a/tests/test_tax.py b/tests/test_tax.py
index b37e8eaf6f..3f766f5e37 100644
--- a/tests/test_tax.py
+++ b/tests/test_tax.py
@@ -18,99 +18,249 @@
 from sourmash.exceptions import IndexNotSupported
 from sourmash import sourmash_args
 
+
 ## command line tests
 def test_run_sourmash_tax():
-    status, out, err = utils.runscript('sourmash', ['tax'], fail_ok=True)
-    assert status != 0                    # no args provided, ok ;)
+    status, out, err = utils.runscript("sourmash", ["tax"], fail_ok=True)
+    assert status != 0  # no args provided, ok ;)
 
 
 def test_metagenome_stdout_0(runtmp):
     # test basic metagenome
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
 
-    c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax)
+    c.run_sourmash("tax", "metagenome", "-g", g_csv, "--taxonomy-csv", tax)
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert 'query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,superkingdom,0.204,d__Bacteria,md5,test1.sig,0.131,1024000' in c.last_result.out
-    assert 'test1,superkingdom,0.796,unclassified,md5,test1.sig,0.869,3990000' in c.last_result.out
-    assert 'test1,phylum,0.116,d__Bacteria;p__Bacteroidota,md5,test1.sig,0.073,582000' in c.last_result.out
-    assert 'test1,phylum,0.088,d__Bacteria;p__Proteobacteria,md5,test1.sig,0.058,442000' in c.last_result.out
-    assert 'test1,phylum,0.796,unclassified,md5,test1.sig,0.869,3990000' in c.last_result.out
-    assert 'test1,class,0.116,d__Bacteria;p__Bacteroidota;c__Bacteroidia,md5,test1.sig,0.073,582000' in c.last_result.out
-    assert 'test1,class,0.088,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria,md5,test1.sig,0.058,442000' in c.last_result.out
-    assert 'test1,class,0.796,unclassified,md5,test1.sig,0.869,3990000' in c.last_result.out
-    assert 'test1,order,0.116,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales,md5,test1.sig,0.073,582000' in c.last_result.out
-    assert 'test1,order,0.088,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales,md5,test1.sig,0.058,442000' in c.last_result.out
-    assert 'test1,order,0.796,unclassified,md5,test1.sig,0.869,3990000' in c.last_result.out
-    assert 'test1,family,0.116,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae,md5,test1.sig,0.073,582000' in c.last_result.out
-    assert 'test1,family,0.088,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae,md5,test1.sig,0.058,442000' in c.last_result.out
-    assert 'test1,family,0.796,unclassified,md5,test1.sig,0.869,3990000' in c.last_result.out
-    assert 'test1,genus,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella,md5,test1.sig,0.057,444000' in c.last_result.out
-    assert 'test1,genus,0.088,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia,md5,test1.sig,0.058,442000' in c.last_result.out
-    assert 'test1,genus,0.028,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola,md5,test1.sig,0.016,138000' in c.last_result.out
-    assert 'test1,genus,0.796,unclassified,md5,test1.sig,0.869,3990000' in c.last_result.out
-    assert 'test1,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000' in c.last_result.out
-    assert 'test1,species,0.088,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli,md5,test1.sig,0.058,442000' in c.last_result.out
-    assert 'test1,species,0.028,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola vulgatus,md5,test1.sig,0.016,138000' in c.last_result.out
-    assert 'test1,species,0.796,unclassified,md5,test1.sig,0.869,3990000' in c.last_result.out
+    assert (
+        "query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,superkingdom,0.204,d__Bacteria,md5,test1.sig,0.131,1024000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,superkingdom,0.796,unclassified,md5,test1.sig,0.869,3990000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,phylum,0.116,d__Bacteria;p__Bacteroidota,md5,test1.sig,0.073,582000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,phylum,0.088,d__Bacteria;p__Proteobacteria,md5,test1.sig,0.058,442000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,phylum,0.796,unclassified,md5,test1.sig,0.869,3990000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,class,0.116,d__Bacteria;p__Bacteroidota;c__Bacteroidia,md5,test1.sig,0.073,582000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,class,0.088,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria,md5,test1.sig,0.058,442000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,class,0.796,unclassified,md5,test1.sig,0.869,3990000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,order,0.116,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales,md5,test1.sig,0.073,582000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,order,0.088,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales,md5,test1.sig,0.058,442000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,order,0.796,unclassified,md5,test1.sig,0.869,3990000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,family,0.116,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae,md5,test1.sig,0.073,582000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,family,0.088,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae,md5,test1.sig,0.058,442000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,family,0.796,unclassified,md5,test1.sig,0.869,3990000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,genus,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella,md5,test1.sig,0.057,444000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,genus,0.088,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia,md5,test1.sig,0.058,442000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,genus,0.028,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola,md5,test1.sig,0.016,138000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,genus,0.796,unclassified,md5,test1.sig,0.869,3990000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,species,0.088,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli,md5,test1.sig,0.058,442000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,species,0.028,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola vulgatus,md5,test1.sig,0.016,138000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,species,0.796,unclassified,md5,test1.sig,0.869,3990000"
+        in c.last_result.out
+    )
 
 
 def test_metagenome_stdout_0_db(runtmp):
     # test basic metagenome with sqlite database
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.db')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.db")
 
-    c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax)
+    c.run_sourmash("tax", "metagenome", "-g", g_csv, "--taxonomy-csv", tax)
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert 'query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,superkingdom,0.204,d__Bacteria,md5,test1.sig,0.131,1024000' in c.last_result.out
-    assert 'test1,superkingdom,0.796,unclassified,md5,test1.sig,0.869,3990000' in c.last_result.out
-    assert 'test1,phylum,0.116,d__Bacteria;p__Bacteroidota,md5,test1.sig,0.073,582000' in c.last_result.out
-    assert 'test1,phylum,0.088,d__Bacteria;p__Proteobacteria,md5,test1.sig,0.058,442000' in c.last_result.out
-    assert 'test1,phylum,0.796,unclassified,md5,test1.sig,0.869,3990000' in c.last_result.out
-    assert 'test1,class,0.116,d__Bacteria;p__Bacteroidota;c__Bacteroidia,md5,test1.sig,0.073,582000' in c.last_result.out
-    assert 'test1,class,0.088,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria,md5,test1.sig,0.058,442000' in c.last_result.out
-    assert 'test1,class,0.796,unclassified,md5,test1.sig,0.869,3990000' in c.last_result.out
-    assert 'test1,order,0.116,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales,md5,test1.sig,0.073,582000' in c.last_result.out
-    assert 'test1,order,0.088,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales,md5,test1.sig,0.058,442000' in c.last_result.out
-    assert 'test1,order,0.796,unclassified,md5,test1.sig,0.869,3990000' in c.last_result.out
-    assert 'test1,family,0.116,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae,md5,test1.sig,0.073,582000' in c.last_result.out
-    assert 'test1,family,0.088,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae,md5,test1.sig,0.058,442000' in c.last_result.out
-    assert 'test1,family,0.796,unclassified,md5,test1.sig,0.869,3990000' in c.last_result.out
-    assert 'test1,genus,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella,md5,test1.sig,0.057,444000' in c.last_result.out
-    assert 'test1,genus,0.088,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia,md5,test1.sig,0.058,442000' in c.last_result.out
-    assert 'test1,genus,0.028,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola,md5,test1.sig,0.016,138000' in c.last_result.out
-    assert 'test1,genus,0.796,unclassified,md5,test1.sig,0.869,3990000' in c.last_result.out
-    assert 'test1,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000' in c.last_result.out
-    assert 'test1,species,0.088,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli,md5,test1.sig,0.058,442000' in c.last_result.out
-    assert 'test1,species,0.028,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola vulgatus,md5,test1.sig,0.016,138000' in c.last_result.out
-    assert 'test1,species,0.796,unclassified,md5,test1.sig,0.869,3990000' in c.last_result.out
+    assert (
+        "query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,superkingdom,0.204,d__Bacteria,md5,test1.sig,0.131,1024000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,superkingdom,0.796,unclassified,md5,test1.sig,0.869,3990000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,phylum,0.116,d__Bacteria;p__Bacteroidota,md5,test1.sig,0.073,582000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,phylum,0.088,d__Bacteria;p__Proteobacteria,md5,test1.sig,0.058,442000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,phylum,0.796,unclassified,md5,test1.sig,0.869,3990000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,class,0.116,d__Bacteria;p__Bacteroidota;c__Bacteroidia,md5,test1.sig,0.073,582000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,class,0.088,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria,md5,test1.sig,0.058,442000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,class,0.796,unclassified,md5,test1.sig,0.869,3990000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,order,0.116,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales,md5,test1.sig,0.073,582000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,order,0.088,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales,md5,test1.sig,0.058,442000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,order,0.796,unclassified,md5,test1.sig,0.869,3990000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,family,0.116,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae,md5,test1.sig,0.073,582000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,family,0.088,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae,md5,test1.sig,0.058,442000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,family,0.796,unclassified,md5,test1.sig,0.869,3990000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,genus,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella,md5,test1.sig,0.057,444000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,genus,0.088,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia,md5,test1.sig,0.058,442000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,genus,0.028,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola,md5,test1.sig,0.016,138000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,genus,0.796,unclassified,md5,test1.sig,0.869,3990000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,species,0.088,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli,md5,test1.sig,0.058,442000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,species,0.028,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola vulgatus,md5,test1.sig,0.016,138000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,species,0.796,unclassified,md5,test1.sig,0.869,3990000"
+        in c.last_result.out
+    )
 
 
 def test_metagenome_summary_csv_out(runtmp):
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
     csv_base = "out"
     sum_csv = csv_base + ".summarized.csv"
     csvout = runtmp.output(sum_csv)
     outdir = os.path.dirname(csvout)
 
-    runtmp.run_sourmash('tax', 'metagenome', '--gather-csv', g_csv, '--taxonomy-csv', tax, '-o', csv_base, '--output-dir', outdir)
+    runtmp.run_sourmash(
+        "tax",
+        "metagenome",
+        "--gather-csv",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "-o",
+        csv_base,
+        "--output-dir",
+        outdir,
+    )
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
@@ -121,62 +271,164 @@ def test_metagenome_summary_csv_out(runtmp):
 
     sum_gather_results = [x.rstrip() for x in Path(csvout).read_text().splitlines()]
     assert f"saving 'csv_summary' output to '{csvout}'" in runtmp.last_result.err
-    assert 'query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in sum_gather_results[0]
-    assert 'test1,superkingdom,0.2042281611487834,d__Bacteria,md5,test1.sig,0.13080306238801107,1024000' in  sum_gather_results[1]
-    assert 'test1,superkingdom,0.7957718388512166,unclassified,md5,test1.sig,0.8691969376119889,3990000' in sum_gather_results[2]
-    assert 'test1,phylum,0.11607499002792182,d__Bacteria;p__Bacteroidota,md5,test1.sig,0.07265026877341586,582000' in  sum_gather_results[3]
-    assert 'test1,phylum,0.08815317112086159,d__Bacteria;p__Proteobacteria,md5,test1.sig,0.05815279361459521,442000' in sum_gather_results[4]
-    assert 'test1,phylum,0.7957718388512166,unclassified,md5,test1.sig,0.8691969376119889,3990000' in sum_gather_results[5]
-    assert 'test1,class,0.11607499002792182,d__Bacteria;p__Bacteroidota;c__Bacteroidia,md5,test1.sig,0.07265026877341586,582000' in sum_gather_results[6]
-    assert 'test1,class,0.08815317112086159,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria,md5,test1.sig,0.05815279361459521,442000' in sum_gather_results[7]
-    assert 'test1,class,0.7957718388512166,unclassified,md5,test1.sig,0.8691969376119889,3990000' in sum_gather_results[8]
-    assert 'test1,order,0.11607499002792182,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales,md5,test1.sig,0.07265026877341586,582000' in sum_gather_results[9]
-    assert 'test1,order,0.08815317112086159,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales,md5,test1.sig,0.05815279361459521,442000' in sum_gather_results[10]
-    assert 'test1,order,0.7957718388512166,unclassified,md5,test1.sig,0.8691969376119889,3990000' in sum_gather_results[11]
-    assert 'test1,family,0.11607499002792182,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae,md5,test1.sig,0.07265026877341586,582000' in sum_gather_results[12]
-    assert 'test1,family,0.08815317112086159,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae,md5,test1.sig,0.05815279361459521,442000' in sum_gather_results[13]
-    assert 'test1,family,0.7957718388512166,unclassified,md5,test1.sig,0.8691969376119889,3990000' in sum_gather_results[14]
-    assert 'test1,genus,0.0885520542481053,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella,md5,test1.sig,0.05701254275940707,444000' in sum_gather_results[15]
-    assert 'test1,genus,0.08815317112086159,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia,md5,test1.sig,0.05815279361459521,442000' in sum_gather_results[16]
-    assert 'test1,genus,0.027522935779816515,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola,md5,test1.sig,0.015637726014008795,138000' in sum_gather_results[17]
-    assert 'test1,genus,0.7957718388512166,unclassified,md5,test1.sig,0.8691969376119889,3990000' in sum_gather_results[18]
-    assert 'test1,species,0.0885520542481053,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.05701254275940707,444000' in sum_gather_results[19]
-    assert 'test1,species,0.08815317112086159,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli,md5,test1.sig,0.05815279361459521,442000' in sum_gather_results[20]
-    assert 'test1,species,0.027522935779816515,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola vulgatus,md5,test1.sig,0.015637726014008795,138000' in sum_gather_results[21]
-    assert 'test1,species,0.7957718388512166,unclassified,md5,test1.sig,0.8691969376119889,3990000' in sum_gather_results[22]
+    assert (
+        "query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in sum_gather_results[0]
+    )
+    assert (
+        "test1,superkingdom,0.2042281611487834,d__Bacteria,md5,test1.sig,0.13080306238801107,1024000"
+        in sum_gather_results[1]
+    )
+    assert (
+        "test1,superkingdom,0.7957718388512166,unclassified,md5,test1.sig,0.8691969376119889,3990000"
+        in sum_gather_results[2]
+    )
+    assert (
+        "test1,phylum,0.11607499002792182,d__Bacteria;p__Bacteroidota,md5,test1.sig,0.07265026877341586,582000"
+        in sum_gather_results[3]
+    )
+    assert (
+        "test1,phylum,0.08815317112086159,d__Bacteria;p__Proteobacteria,md5,test1.sig,0.05815279361459521,442000"
+        in sum_gather_results[4]
+    )
+    assert (
+        "test1,phylum,0.7957718388512166,unclassified,md5,test1.sig,0.8691969376119889,3990000"
+        in sum_gather_results[5]
+    )
+    assert (
+        "test1,class,0.11607499002792182,d__Bacteria;p__Bacteroidota;c__Bacteroidia,md5,test1.sig,0.07265026877341586,582000"
+        in sum_gather_results[6]
+    )
+    assert (
+        "test1,class,0.08815317112086159,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria,md5,test1.sig,0.05815279361459521,442000"
+        in sum_gather_results[7]
+    )
+    assert (
+        "test1,class,0.7957718388512166,unclassified,md5,test1.sig,0.8691969376119889,3990000"
+        in sum_gather_results[8]
+    )
+    assert (
+        "test1,order,0.11607499002792182,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales,md5,test1.sig,0.07265026877341586,582000"
+        in sum_gather_results[9]
+    )
+    assert (
+        "test1,order,0.08815317112086159,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales,md5,test1.sig,0.05815279361459521,442000"
+        in sum_gather_results[10]
+    )
+    assert (
+        "test1,order,0.7957718388512166,unclassified,md5,test1.sig,0.8691969376119889,3990000"
+        in sum_gather_results[11]
+    )
+    assert (
+        "test1,family,0.11607499002792182,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae,md5,test1.sig,0.07265026877341586,582000"
+        in sum_gather_results[12]
+    )
+    assert (
+        "test1,family,0.08815317112086159,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae,md5,test1.sig,0.05815279361459521,442000"
+        in sum_gather_results[13]
+    )
+    assert (
+        "test1,family,0.7957718388512166,unclassified,md5,test1.sig,0.8691969376119889,3990000"
+        in sum_gather_results[14]
+    )
+    assert (
+        "test1,genus,0.0885520542481053,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella,md5,test1.sig,0.05701254275940707,444000"
+        in sum_gather_results[15]
+    )
+    assert (
+        "test1,genus,0.08815317112086159,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia,md5,test1.sig,0.05815279361459521,442000"
+        in sum_gather_results[16]
+    )
+    assert (
+        "test1,genus,0.027522935779816515,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola,md5,test1.sig,0.015637726014008795,138000"
+        in sum_gather_results[17]
+    )
+    assert (
+        "test1,genus,0.7957718388512166,unclassified,md5,test1.sig,0.8691969376119889,3990000"
+        in sum_gather_results[18]
+    )
+    assert (
+        "test1,species,0.0885520542481053,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.05701254275940707,444000"
+        in sum_gather_results[19]
+    )
+    assert (
+        "test1,species,0.08815317112086159,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli,md5,test1.sig,0.05815279361459521,442000"
+        in sum_gather_results[20]
+    )
+    assert (
+        "test1,species,0.027522935779816515,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola vulgatus,md5,test1.sig,0.015637726014008795,138000"
+        in sum_gather_results[21]
+    )
+    assert (
+        "test1,species,0.7957718388512166,unclassified,md5,test1.sig,0.8691969376119889,3990000"
+        in sum_gather_results[22]
+    )
 
 
 def test_metagenome_summary_csv_out_empty_gather_force(runtmp):
     # test multiple -g, empty -g file, and --force
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
     csv_base = "out"
     sum_csv = csv_base + ".summarized.csv"
     csvout = runtmp.output(sum_csv)
     outdir = os.path.dirname(csvout)
 
-    gather_empty = runtmp.output('g.csv')
+    gather_empty = runtmp.output("g.csv")
     with open(gather_empty, "w") as fp:
         fp.write("")
     print("g_csv: ", gather_empty)
 
-    runtmp.run_sourmash('tax', 'metagenome', '--gather-csv', g_csv, '-g', gather_empty, '--taxonomy-csv', tax, '-o', csv_base, '--output-dir', outdir, '-f')
+    runtmp.run_sourmash(
+        "tax",
+        "metagenome",
+        "--gather-csv",
+        g_csv,
+        "-g",
+        gather_empty,
+        "--taxonomy-csv",
+        tax,
+        "-o",
+        csv_base,
+        "--output-dir",
+        outdir,
+        "-f",
+    )
     sum_gather_results = [x.rstrip() for x in Path(csvout).read_text().splitlines()]
     assert f"saving 'csv_summary' output to '{csvout}'" in runtmp.last_result.err
-    assert 'query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in sum_gather_results[0]
-    assert 'test1,superkingdom,0.2042281611487834,d__Bacteria,md5,test1.sig,0.13080306238801107,1024000' in  sum_gather_results[1]
+    assert (
+        "query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in sum_gather_results[0]
+    )
+    assert (
+        "test1,superkingdom,0.2042281611487834,d__Bacteria,md5,test1.sig,0.13080306238801107,1024000"
+        in sum_gather_results[1]
+    )
 
 
 def test_metagenome_kreport_out(runtmp):
     # test 'kreport' kraken output format
-    g_csv = utils.get_test_data('tax/test1.gather.v450.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.v450.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
     csv_base = "out"
     sum_csv = csv_base + ".kreport.txt"
     csvout = runtmp.output(sum_csv)
     outdir = os.path.dirname(csvout)
 
-    runtmp.run_sourmash('tax', 'metagenome', '--gather-csv', g_csv, '--taxonomy-csv', tax, '-o', csv_base, '--output-dir', outdir, '-F', "kreport")
+    runtmp.run_sourmash(
+        "tax",
+        "metagenome",
+        "--gather-csv",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "-o",
+        csv_base,
+        "--output-dir",
+        outdir,
+        "-F",
+        "kreport",
+    )
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
@@ -185,37 +437,89 @@ def test_metagenome_kreport_out(runtmp):
     assert runtmp.last_result.status == 0
     assert os.path.exists(csvout)
 
-    kreport_results = [x.rstrip().split('\t') for x in Path(csvout).read_text().splitlines()]
+    kreport_results = [
+        x.rstrip().split("\t") for x in Path(csvout).read_text().splitlines()
+    ]
     assert f"saving 'kreport' output to '{csvout}'" in runtmp.last_result.err
     print(kreport_results)
-    assert ['13.08', '1605999', '0', 'D', '', 'd__Bacteria'] == kreport_results[0]
-    assert ['86.92', '10672000', '10672000', 'U', '', 'unclassified'] == kreport_results[1]
-    assert ['7.27', '892000', '0', 'P', '', 'p__Bacteroidota'] == kreport_results[2]
-    assert ['5.82', '714000', '0', 'P', '', 'p__Proteobacteria'] == kreport_results[3]
-    assert ['7.27', '892000', '0', 'C', '', 'c__Bacteroidia'] == kreport_results[4]
-    assert ['5.82', '714000', '0', 'C', '', 'c__Gammaproteobacteria'] == kreport_results[5]
-    assert ['7.27', '892000', '0', 'O', '', 'o__Bacteroidales'] == kreport_results[6]
-    assert ['5.82', '714000', '0', 'O', '', 'o__Enterobacterales'] == kreport_results[7]
-    assert ['7.27', '892000', '0', 'F', '', 'f__Bacteroidaceae'] == kreport_results[8]
-    assert ['5.82', '714000', '0', 'F', '', 'f__Enterobacteriaceae'] == kreport_results[9]
-    assert ['5.70', '700000', '0', 'G', '', 'g__Prevotella']  == kreport_results[10]
-    assert ['5.82', '714000', '0', 'G', '', 'g__Escherichia'] == kreport_results[11]
-    assert ['1.56', '192000', '0', 'G', '', 'g__Phocaeicola'] == kreport_results[12]
-    assert ['5.70', '700000', '700000', 'S', '', 's__Prevotella copri'] == kreport_results[13]
-    assert ['5.82', '714000', '714000', 'S', '', 's__Escherichia coli']== kreport_results[14]
-    assert ['1.56', '192000', '192000', 'S', '', 's__Phocaeicola vulgatus'] == kreport_results[15]
+    assert ["13.08", "1605999", "0", "D", "", "d__Bacteria"] == kreport_results[0]
+    assert [
+        "86.92",
+        "10672000",
+        "10672000",
+        "U",
+        "",
+        "unclassified",
+    ] == kreport_results[1]
+    assert ["7.27", "892000", "0", "P", "", "p__Bacteroidota"] == kreport_results[2]
+    assert ["5.82", "714000", "0", "P", "", "p__Proteobacteria"] == kreport_results[3]
+    assert ["7.27", "892000", "0", "C", "", "c__Bacteroidia"] == kreport_results[4]
+    assert [
+        "5.82",
+        "714000",
+        "0",
+        "C",
+        "",
+        "c__Gammaproteobacteria",
+    ] == kreport_results[5]
+    assert ["7.27", "892000", "0", "O", "", "o__Bacteroidales"] == kreport_results[6]
+    assert ["5.82", "714000", "0", "O", "", "o__Enterobacterales"] == kreport_results[7]
+    assert ["7.27", "892000", "0", "F", "", "f__Bacteroidaceae"] == kreport_results[8]
+    assert ["5.82", "714000", "0", "F", "", "f__Enterobacteriaceae"] == kreport_results[
+        9
+    ]
+    assert ["5.70", "700000", "0", "G", "", "g__Prevotella"] == kreport_results[10]
+    assert ["5.82", "714000", "0", "G", "", "g__Escherichia"] == kreport_results[11]
+    assert ["1.56", "192000", "0", "G", "", "g__Phocaeicola"] == kreport_results[12]
+    assert [
+        "5.70",
+        "700000",
+        "700000",
+        "S",
+        "",
+        "s__Prevotella copri",
+    ] == kreport_results[13]
+    assert [
+        "5.82",
+        "714000",
+        "714000",
+        "S",
+        "",
+        "s__Escherichia coli",
+    ] == kreport_results[14]
+    assert [
+        "1.56",
+        "192000",
+        "192000",
+        "S",
+        "",
+        "s__Phocaeicola vulgatus",
+    ] == kreport_results[15]
 
 
 def test_metagenome_kreport_ncbi_taxid_out(runtmp):
     # test NCBI taxid output from kreport
-    g_csv = utils.get_test_data('tax/test1.gather.v450.csv')
-    tax = utils.get_test_data('tax/test.ncbi-taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.v450.csv")
+    tax = utils.get_test_data("tax/test.ncbi-taxonomy.csv")
     csv_base = "out"
     sum_csv = csv_base + ".kreport.txt"
     csvout = runtmp.output(sum_csv)
     outdir = os.path.dirname(csvout)
 
-    runtmp.run_sourmash('tax', 'metagenome', '--gather-csv', g_csv, '--taxonomy-csv', tax, '-o', csv_base, '--output-dir', outdir, '-F', "kreport")
+    runtmp.run_sourmash(
+        "tax",
+        "metagenome",
+        "--gather-csv",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "-o",
+        csv_base,
+        "--output-dir",
+        outdir,
+        "-F",
+        "kreport",
+    )
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
@@ -224,38 +528,94 @@ def test_metagenome_kreport_ncbi_taxid_out(runtmp):
     assert runtmp.last_result.status == 0
     assert os.path.exists(csvout)
 
-    kreport_results = [x.rstrip().split('\t') for x in Path(csvout).read_text().splitlines()]
+    kreport_results = [
+        x.rstrip().split("\t") for x in Path(csvout).read_text().splitlines()
+    ]
     assert f"saving 'kreport' output to '{csvout}'" in runtmp.last_result.err
     print(kreport_results)
-    assert ['13.08', '1605999', '0', 'D', '2', 'Bacteria'] == kreport_results[0]
-    assert ['86.92', '10672000', '10672000', 'U', '', 'unclassified'] == kreport_results[1]
-    assert ['7.27', '892000', '0', 'P', '976', 'Bacteroidota'] == kreport_results[2]
-    assert ['5.82', '714000', '0', 'P', '1224', 'Pseudomonadota'] == kreport_results[3]
-    assert ['7.27', '892000', '0', 'C', '200643', 'Bacteroidia'] == kreport_results[4]
-    assert ['5.82', '714000', '0', 'C', '1236', 'Gammaproteobacteria'] == kreport_results[5]
-    assert ['7.27', '892000', '0', 'O', '171549', 'Bacteroidales'] == kreport_results[6]
-    assert ['5.82', '714000', '0', 'O', '91347', 'Enterobacterales'] == kreport_results[7]
-    assert ['5.70', '700000', '0', 'F', '171552', 'Prevotellaceae'] == kreport_results[8]
-    assert ['5.82', '714000', '0', 'F', '543', 'Enterobacteriaceae'] == kreport_results[9]
-    assert ['1.56', '192000', '0', 'F', '815', 'Bacteroidaceae'] == kreport_results[10]
-    assert ['5.70', '700000', '0', 'G', '838', 'Prevotella'] == kreport_results[11]
-    assert ['5.82', '714000', '0', 'G', '561', 'Escherichia'] == kreport_results[12]
-    assert ['1.56', '192000', '0', 'G', '909656', 'Phocaeicola'] == kreport_results[13]
-    assert ['5.70', '700000', '700000', 'S', '165179', 'Prevotella copri'] == kreport_results[14]
-    assert ['5.82', '714000', '714000', 'S', '562', 'Escherichia coli'] == kreport_results[15]
-    assert ['1.56', '192000', '192000', 'S', '821', 'Phocaeicola vulgatus'] == kreport_results[16]
+    assert ["13.08", "1605999", "0", "D", "2", "Bacteria"] == kreport_results[0]
+    assert [
+        "86.92",
+        "10672000",
+        "10672000",
+        "U",
+        "",
+        "unclassified",
+    ] == kreport_results[1]
+    assert ["7.27", "892000", "0", "P", "976", "Bacteroidota"] == kreport_results[2]
+    assert ["5.82", "714000", "0", "P", "1224", "Pseudomonadota"] == kreport_results[3]
+    assert ["7.27", "892000", "0", "C", "200643", "Bacteroidia"] == kreport_results[4]
+    assert [
+        "5.82",
+        "714000",
+        "0",
+        "C",
+        "1236",
+        "Gammaproteobacteria",
+    ] == kreport_results[5]
+    assert ["7.27", "892000", "0", "O", "171549", "Bacteroidales"] == kreport_results[6]
+    assert ["5.82", "714000", "0", "O", "91347", "Enterobacterales"] == kreport_results[
+        7
+    ]
+    assert ["5.70", "700000", "0", "F", "171552", "Prevotellaceae"] == kreport_results[
+        8
+    ]
+    assert ["5.82", "714000", "0", "F", "543", "Enterobacteriaceae"] == kreport_results[
+        9
+    ]
+    assert ["1.56", "192000", "0", "F", "815", "Bacteroidaceae"] == kreport_results[10]
+    assert ["5.70", "700000", "0", "G", "838", "Prevotella"] == kreport_results[11]
+    assert ["5.82", "714000", "0", "G", "561", "Escherichia"] == kreport_results[12]
+    assert ["1.56", "192000", "0", "G", "909656", "Phocaeicola"] == kreport_results[13]
+    assert [
+        "5.70",
+        "700000",
+        "700000",
+        "S",
+        "165179",
+        "Prevotella copri",
+    ] == kreport_results[14]
+    assert [
+        "5.82",
+        "714000",
+        "714000",
+        "S",
+        "562",
+        "Escherichia coli",
+    ] == kreport_results[15]
+    assert [
+        "1.56",
+        "192000",
+        "192000",
+        "S",
+        "821",
+        "Phocaeicola vulgatus",
+    ] == kreport_results[16]
 
 
 def test_metagenome_kreport_out_lemonade(runtmp):
     # test 'kreport' kraken output format against lemonade output
-    g_csv = utils.get_test_data('tax/lemonade-MAG3.x.gtdb.csv')
-    tax = utils.get_test_data('tax/lemonade-MAG3.x.gtdb.matches.tax.csv')
+    g_csv = utils.get_test_data("tax/lemonade-MAG3.x.gtdb.csv")
+    tax = utils.get_test_data("tax/lemonade-MAG3.x.gtdb.matches.tax.csv")
     csv_base = "out"
     sum_csv = csv_base + ".kreport.txt"
     csvout = runtmp.output(sum_csv)
     outdir = os.path.dirname(csvout)
 
-    runtmp.run_sourmash('tax', 'metagenome', '--gather-csv', g_csv, '--taxonomy-csv', tax, '-o', csv_base, '--output-dir', outdir, '-F', "kreport")
+    runtmp.run_sourmash(
+        "tax",
+        "metagenome",
+        "--gather-csv",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "-o",
+        csv_base,
+        "--output-dir",
+        outdir,
+        "-F",
+        "kreport",
+    )
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
@@ -264,44 +624,80 @@ def test_metagenome_kreport_out_lemonade(runtmp):
     assert runtmp.last_result.status == 0
     assert os.path.exists(csvout)
 
-    kreport_results = [x.rstrip().split('\t') for x in Path(csvout).read_text().splitlines()]
+    kreport_results = [
+        x.rstrip().split("\t") for x in Path(csvout).read_text().splitlines()
+    ]
     assert f"saving 'kreport' output to '{csvout}'" in runtmp.last_result.err
     print(kreport_results)
-    assert ['5.35', '116000', '0', 'D', '', 'd__Bacteria'] == kreport_results[0]
-    assert ['94.65', '2054000', '2054000', 'U', '', 'unclassified'] == kreport_results[1]
-    assert ['5.35', '116000', '0', 'P', '', 'p__Bacteroidota'] == kreport_results[2]
-    assert ['5.35', '116000', '0', 'C', '', 'c__Chlorobia'] == kreport_results[3]
-    assert ['5.35', '116000', '0', 'O', '', 'o__Chlorobiales'] == kreport_results[4]
-    assert ['5.35', '116000', '0', 'F', '', 'f__Chlorobiaceae'] == kreport_results[5]
-    assert ['5.35', '116000', '0', 'G', '', 'g__Prosthecochloris'] == kreport_results[6]
-    assert ['5.35', '116000', '116000', 'S', '', 's__Prosthecochloris vibrioformis'] == kreport_results[7]
+    assert ["5.35", "116000", "0", "D", "", "d__Bacteria"] == kreport_results[0]
+    assert ["94.65", "2054000", "2054000", "U", "", "unclassified"] == kreport_results[
+        1
+    ]
+    assert ["5.35", "116000", "0", "P", "", "p__Bacteroidota"] == kreport_results[2]
+    assert ["5.35", "116000", "0", "C", "", "c__Chlorobia"] == kreport_results[3]
+    assert ["5.35", "116000", "0", "O", "", "o__Chlorobiales"] == kreport_results[4]
+    assert ["5.35", "116000", "0", "F", "", "f__Chlorobiaceae"] == kreport_results[5]
+    assert ["5.35", "116000", "0", "G", "", "g__Prosthecochloris"] == kreport_results[6]
+    assert [
+        "5.35",
+        "116000",
+        "116000",
+        "S",
+        "",
+        "s__Prosthecochloris vibrioformis",
+    ] == kreport_results[7]
 
 
 def test_metagenome_kreport_out_fail(runtmp):
     # kreport cannot be generated with gather results from < v4.5.0
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
     csv_base = "out"
     sum_csv = csv_base + ".kreport.txt"
     csvout = runtmp.output(sum_csv)
     outdir = os.path.dirname(csvout)
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.run_sourmash('tax', 'metagenome', '--gather-csv', g_csv, '--taxonomy-csv', tax, '-o', csv_base, '--output-dir', outdir, '-F', "kreport")
+        runtmp.run_sourmash(
+            "tax",
+            "metagenome",
+            "--gather-csv",
+            g_csv,
+            "--taxonomy-csv",
+            tax,
+            "-o",
+            csv_base,
+            "--output-dir",
+            outdir,
+            "-F",
+            "kreport",
+        )
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    assert "ERROR: cannot produce 'kreport' format from gather results before sourmash v4.5.0" in runtmp.last_result.err
+    assert (
+        "ERROR: cannot produce 'kreport' format from gather results before sourmash v4.5.0"
+        in runtmp.last_result.err
+    )
 
 
 def test_metagenome_bioboxes_stdout(runtmp):
     # test CAMI bioboxes format output
-    g_csv = utils.get_test_data('tax/test1.gather.v450.csv')
-    tax = utils.get_test_data('tax/test.ncbi-taxonomy.csv')
-
-    runtmp.run_sourmash('tax', 'metagenome', '--gather-csv', g_csv, '--taxonomy-csv', tax, '-F', "bioboxes")
+    g_csv = utils.get_test_data("tax/test1.gather.v450.csv")
+    tax = utils.get_test_data("tax/test.ncbi-taxonomy.csv")
+
+    runtmp.run_sourmash(
+        "tax",
+        "metagenome",
+        "--gather-csv",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "-F",
+        "bioboxes",
+    )
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
@@ -312,36 +708,97 @@ def test_metagenome_bioboxes_stdout(runtmp):
     assert "# Taxonomic Profiling Output" in runtmp.last_result.out
     assert "@SampleID:test1" in runtmp.last_result.out
     assert "@Version:0.10.0" in runtmp.last_result.out
-    assert "@Ranks:superkingdom|phylum|class|order|family|genus|species|strain" in runtmp.last_result.out
+    assert (
+        "@Ranks:superkingdom|phylum|class|order|family|genus|species|strain"
+        in runtmp.last_result.out
+    )
     assert "@__program__:sourmash" in runtmp.last_result.out
     assert "2	superkingdom	2	Bacteria	13.08" in runtmp.last_result.out
-    assert "976	phylum	2|976	Bacteria|Bacteroidota	7.27" in runtmp.last_result.out
-    assert "1224	phylum	2|1224	Bacteria|Pseudomonadota	5.82" in runtmp.last_result.out
-    assert "200643	class	2|976|200643	Bacteria|Bacteroidota|Bacteroidia	7.27" in runtmp.last_result.out
-    assert "1236	class	2|1224|1236	Bacteria|Pseudomonadota|Gammaproteobacteria	5.82" in runtmp.last_result.out
-    assert "171549	order	2|976|200643|171549	Bacteria|Bacteroidota|Bacteroidia|Bacteroidales	7.27" in runtmp.last_result.out
-    assert "91347	order	2|1224|1236|91347	Bacteria|Pseudomonadota|Gammaproteobacteria|Enterobacterales	5.82" in runtmp.last_result.out
-    assert "171552	family	2|976|200643|171549|171552	Bacteria|Bacteroidota|Bacteroidia|Bacteroidales|Prevotellaceae	5.70" in runtmp.last_result.out
-    assert "543	family	2|1224|1236|91347|543	Bacteria|Pseudomonadota|Gammaproteobacteria|Enterobacterales|Enterobacteriaceae	5.82" in runtmp.last_result.out
-    assert "815	family	2|976|200643|171549|815	Bacteria|Bacteroidota|Bacteroidia|Bacteroidales|Bacteroidaceae	1.56" in runtmp.last_result.out
-    assert "838	genus	2|976|200643|171549|171552|838	Bacteria|Bacteroidota|Bacteroidia|Bacteroidales|Prevotellaceae|Prevotella	5.70" in runtmp.last_result.out
-    assert "561	genus	2|1224|1236|91347|543|561	Bacteria|Pseudomonadota|Gammaproteobacteria|Enterobacterales|Enterobacteriaceae|Escherichia	5.82" in runtmp.last_result.out
-    assert "909656	genus	2|976|200643|171549|815|909656	Bacteria|Bacteroidota|Bacteroidia|Bacteroidales|Bacteroidaceae|Phocaeicola	1.56" in runtmp.last_result.out
-    assert "165179	species	2|976|200643|171549|171552|838|165179	Bacteria|Bacteroidota|Bacteroidia|Bacteroidales|Prevotellaceae|Prevotella|Prevotella copri	5.70" in runtmp.last_result.out
-    assert "562	species	2|1224|1236|91347|543|561|562	Bacteria|Pseudomonadota|Gammaproteobacteria|Enterobacterales|Enterobacteriaceae|Escherichia|Escherichia coli	5.82" in runtmp.last_result.out
-    assert "821	species	2|976|200643|171549|815|909656|821	Bacteria|Bacteroidota|Bacteroidia|Bacteroidales|Bacteroidaceae|Phocaeicola|Phocaeicola vulgatus	1.56" in runtmp.last_result.out
+    assert (
+        "976	phylum	2|976	Bacteria|Bacteroidota	7.27"
+        in runtmp.last_result.out
+    )
+    assert (
+        "1224	phylum	2|1224	Bacteria|Pseudomonadota	5.82"
+        in runtmp.last_result.out
+    )
+    assert (
+        "200643	class	2|976|200643	Bacteria|Bacteroidota|Bacteroidia	7.27"
+        in runtmp.last_result.out
+    )
+    assert (
+        "1236	class	2|1224|1236	Bacteria|Pseudomonadota|Gammaproteobacteria	5.82"
+        in runtmp.last_result.out
+    )
+    assert (
+        "171549	order	2|976|200643|171549	Bacteria|Bacteroidota|Bacteroidia|Bacteroidales	7.27"
+        in runtmp.last_result.out
+    )
+    assert (
+        "91347	order	2|1224|1236|91347	Bacteria|Pseudomonadota|Gammaproteobacteria|Enterobacterales	5.82"
+        in runtmp.last_result.out
+    )
+    assert (
+        "171552	family	2|976|200643|171549|171552	Bacteria|Bacteroidota|Bacteroidia|Bacteroidales|Prevotellaceae	5.70"
+        in runtmp.last_result.out
+    )
+    assert (
+        "543	family	2|1224|1236|91347|543	Bacteria|Pseudomonadota|Gammaproteobacteria|Enterobacterales|Enterobacteriaceae	5.82"
+        in runtmp.last_result.out
+    )
+    assert (
+        "815	family	2|976|200643|171549|815	Bacteria|Bacteroidota|Bacteroidia|Bacteroidales|Bacteroidaceae	1.56"
+        in runtmp.last_result.out
+    )
+    assert (
+        "838	genus	2|976|200643|171549|171552|838	Bacteria|Bacteroidota|Bacteroidia|Bacteroidales|Prevotellaceae|Prevotella	5.70"
+        in runtmp.last_result.out
+    )
+    assert (
+        "561	genus	2|1224|1236|91347|543|561	Bacteria|Pseudomonadota|Gammaproteobacteria|Enterobacterales|Enterobacteriaceae|Escherichia	5.82"
+        in runtmp.last_result.out
+    )
+    assert (
+        "909656	genus	2|976|200643|171549|815|909656	Bacteria|Bacteroidota|Bacteroidia|Bacteroidales|Bacteroidaceae|Phocaeicola	1.56"
+        in runtmp.last_result.out
+    )
+    assert (
+        "165179	species	2|976|200643|171549|171552|838|165179	Bacteria|Bacteroidota|Bacteroidia|Bacteroidales|Prevotellaceae|Prevotella|Prevotella copri	5.70"
+        in runtmp.last_result.out
+    )
+    assert (
+        "562	species	2|1224|1236|91347|543|561|562	Bacteria|Pseudomonadota|Gammaproteobacteria|Enterobacterales|Enterobacteriaceae|Escherichia|Escherichia coli	5.82"
+        in runtmp.last_result.out
+    )
+    assert (
+        "821	species	2|976|200643|171549|815|909656|821	Bacteria|Bacteroidota|Bacteroidia|Bacteroidales|Bacteroidaceae|Phocaeicola|Phocaeicola vulgatus	1.56"
+        in runtmp.last_result.out
+    )
 
 
 def test_metagenome_bioboxes_outfile(runtmp):
     # test CAMI bioboxes format output
-    g_csv = utils.get_test_data('tax/test1.gather.v450.csv')
-    tax = utils.get_test_data('tax/test.ncbi-taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.v450.csv")
+    tax = utils.get_test_data("tax/test.ncbi-taxonomy.csv")
     csv_base = "out"
     sum_csv = csv_base + ".bioboxes.profile"
     csvout = runtmp.output(sum_csv)
     outdir = os.path.dirname(csvout)
 
-    runtmp.run_sourmash('tax', 'metagenome', '--gather-csv', g_csv, '--taxonomy-csv', tax, '-F', "bioboxes", '-o', csv_base, '--output-dir', outdir,)
+    runtmp.run_sourmash(
+        "tax",
+        "metagenome",
+        "--gather-csv",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "-F",
+        "bioboxes",
+        "-o",
+        csv_base,
+        "--output-dir",
+        outdir,
+    )
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
@@ -349,26 +806,46 @@ def test_metagenome_bioboxes_outfile(runtmp):
 
     assert runtmp.last_result.status == 0
 
-    bb_results = [x.rstrip().split('\t') for x in Path(csvout).read_text().splitlines()]
+    bb_results = [x.rstrip().split("\t") for x in Path(csvout).read_text().splitlines()]
     assert f"saving 'bioboxes' output to '{csvout}'" in runtmp.last_result.err
     print(bb_results)
-    assert ['# Taxonomic Profiling Output'] == bb_results[0]
-    assert ['@SampleID:test1'] == bb_results[1]
-    assert ['2', 'superkingdom', '2', 'Bacteria', '13.08'] == bb_results[6]
-    assert ['838', 'genus', '2|976|200643|171549|171552|838', 'Bacteria|Bacteroidota|Bacteroidia|Bacteroidales|Prevotellaceae|Prevotella', '5.70'] == bb_results[16]
+    assert ["# Taxonomic Profiling Output"] == bb_results[0]
+    assert ["@SampleID:test1"] == bb_results[1]
+    assert ["2", "superkingdom", "2", "Bacteria", "13.08"] == bb_results[6]
+    assert [
+        "838",
+        "genus",
+        "2|976|200643|171549|171552|838",
+        "Bacteria|Bacteroidota|Bacteroidia|Bacteroidales|Prevotellaceae|Prevotella",
+        "5.70",
+    ] == bb_results[16]
 
 
 def test_metagenome_krona_tsv_out(runtmp):
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
     csv_base = "out"
     kr_csv = csv_base + ".krona.tsv"
     csvout = runtmp.output(kr_csv)
     outdir = os.path.dirname(csvout)
     print("csvout: ", csvout)
 
-    runtmp.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax, '-o', csv_base,
-                        '--output-format', 'krona', '--rank', 'genus', '--output-dir', outdir)
+    runtmp.run_sourmash(
+        "tax",
+        "metagenome",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "-o",
+        csv_base,
+        "--output-format",
+        "krona",
+        "--rank",
+        "genus",
+        "--output-dir",
+        outdir,
+    )
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
@@ -378,27 +855,82 @@ def test_metagenome_krona_tsv_out(runtmp):
     assert os.path.exists(csvout)
     assert f"saving 'krona' output to '{csvout}'" in runtmp.last_result.err
 
-    gn_krona_results = [x.rstrip().split('\t') for x in Path(csvout).read_text().splitlines()]
+    gn_krona_results = [
+        x.rstrip().split("\t") for x in Path(csvout).read_text().splitlines()
+    ]
     print("species krona results: \n", gn_krona_results)
-    assert ['fraction', 'superkingdom', 'phylum', 'class', 'order', 'family', 'genus'] == gn_krona_results[0]
-    assert ['0.0885520542481053', 'd__Bacteria', 'p__Bacteroidota', 'c__Bacteroidia', 'o__Bacteroidales', 'f__Bacteroidaceae', 'g__Prevotella']  == gn_krona_results[1]
-    assert ['0.08815317112086159', 'd__Bacteria', 'p__Proteobacteria', 'c__Gammaproteobacteria', 'o__Enterobacterales', 'f__Enterobacteriaceae', 'g__Escherichia'] == gn_krona_results[2]
-    assert ['0.027522935779816515', 'd__Bacteria', 'p__Bacteroidota', 'c__Bacteroidia', 'o__Bacteroidales', 'f__Bacteroidaceae', 'g__Phocaeicola'] == gn_krona_results[3]
-    assert ['0.7957718388512166', 'unclassified', 'unclassified', 'unclassified', 'unclassified', 'unclassified', 'unclassified'] == gn_krona_results[4]
+    assert [
+        "fraction",
+        "superkingdom",
+        "phylum",
+        "class",
+        "order",
+        "family",
+        "genus",
+    ] == gn_krona_results[0]
+    assert [
+        "0.0885520542481053",
+        "d__Bacteria",
+        "p__Bacteroidota",
+        "c__Bacteroidia",
+        "o__Bacteroidales",
+        "f__Bacteroidaceae",
+        "g__Prevotella",
+    ] == gn_krona_results[1]
+    assert [
+        "0.08815317112086159",
+        "d__Bacteria",
+        "p__Proteobacteria",
+        "c__Gammaproteobacteria",
+        "o__Enterobacterales",
+        "f__Enterobacteriaceae",
+        "g__Escherichia",
+    ] == gn_krona_results[2]
+    assert [
+        "0.027522935779816515",
+        "d__Bacteria",
+        "p__Bacteroidota",
+        "c__Bacteroidia",
+        "o__Bacteroidales",
+        "f__Bacteroidaceae",
+        "g__Phocaeicola",
+    ] == gn_krona_results[3]
+    assert [
+        "0.7957718388512166",
+        "unclassified",
+        "unclassified",
+        "unclassified",
+        "unclassified",
+        "unclassified",
+        "unclassified",
+    ] == gn_krona_results[4]
 
 
 def test_metagenome_lineage_summary_out(runtmp):
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
     csv_base = "out"
     lin_csv = csv_base + ".lineage_summary.tsv"
     csvout = runtmp.output(lin_csv)
     outdir = os.path.dirname(csvout)
     print("csvout: ", csvout)
 
-    runtmp.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax,
-                        '-o', csv_base, '--output-format', 'lineage_summary', '--rank',
-                        'genus', '--output-dir', outdir)
+    runtmp.run_sourmash(
+        "tax",
+        "metagenome",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "-o",
+        csv_base,
+        "--output-format",
+        "lineage_summary",
+        "--rank",
+        "genus",
+        "--output-dir",
+        outdir,
+    )
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
@@ -408,26 +940,50 @@ def test_metagenome_lineage_summary_out(runtmp):
     assert os.path.exists(csvout)
     assert f"saving 'lineage_summary' output to '{csvout}'" in runtmp.last_result.err
 
-    gn_lineage_summary = [x.rstrip().split('\t') for x in Path(csvout).read_text().splitlines()]
+    gn_lineage_summary = [
+        x.rstrip().split("\t") for x in Path(csvout).read_text().splitlines()
+    ]
     print("species lineage summary results: \n", gn_lineage_summary)
-    assert ['lineage', 'test1'] == gn_lineage_summary[0]
-    assert ['d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola', '0.027522935779816515'] == gn_lineage_summary[1]
-    assert ['d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella', '0.0885520542481053'] == gn_lineage_summary[2]
-    assert ['d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia', '0.08815317112086159'] == gn_lineage_summary[3]
-    assert ['unclassified', '0.7957718388512166']  == gn_lineage_summary[4]
+    assert ["lineage", "test1"] == gn_lineage_summary[0]
+    assert [
+        "d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola",
+        "0.027522935779816515",
+    ] == gn_lineage_summary[1]
+    assert [
+        "d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella",
+        "0.0885520542481053",
+    ] == gn_lineage_summary[2]
+    assert [
+        "d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia",
+        "0.08815317112086159",
+    ] == gn_lineage_summary[3]
+    assert ["unclassified", "0.7957718388512166"] == gn_lineage_summary[4]
 
 
 def test_metagenome_human_format_out(runtmp):
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
     csv_base = "out"
-    csvout = runtmp.output(csv_base + '.human.txt')
+    csvout = runtmp.output(csv_base + ".human.txt")
     outdir = os.path.dirname(csvout)
     print("csvout: ", csvout)
 
-    runtmp.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax,
-                        '-o', csv_base, '--output-format', 'human', '--rank',
-                        'genus', '--output-dir', outdir)
+    runtmp.run_sourmash(
+        "tax",
+        "metagenome",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "-o",
+        csv_base,
+        "--output-format",
+        "human",
+        "--rank",
+        "genus",
+        "--output-dir",
+        outdir,
+    )
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
@@ -441,104 +997,192 @@ def test_metagenome_human_format_out(runtmp):
         outp = fp.readlines()
 
     assert len(outp) == 6
-    outp = [ x.strip() for x in outp ]
+    outp = [x.strip() for x in outp]
     print(outp)
 
-    assert outp[0] == 'sample name    proportion   cANI   lineage'
-    assert outp[1] == '-----------    ----------   ----   -------'
-    assert outp[2] == 'test1             86.9%     -      unclassified'
-    assert outp[3] == 'test1              5.8%     92.5%  d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia'
-    assert outp[4] == 'test1              5.7%     92.5%  d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella'
-    assert outp[5] == 'test1              1.6%     89.1%  d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola'
+    assert outp[0] == "sample name    proportion   cANI   lineage"
+    assert outp[1] == "-----------    ----------   ----   -------"
+    assert outp[2] == "test1             86.9%     -      unclassified"
+    assert (
+        outp[3]
+        == "test1              5.8%     92.5%  d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia"
+    )
+    assert (
+        outp[4]
+        == "test1              5.7%     92.5%  d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella"
+    )
+    assert (
+        outp[5]
+        == "test1              1.6%     89.1%  d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola"
+    )
 
 
 def test_metagenome_no_taxonomy_fail(runtmp):
     c = runtmp
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'metagenome', '-g', g_csv)
-    assert "error: the following arguments are required: -t/--taxonomy-csv" in str(exc.value)
+        c.run_sourmash("tax", "metagenome", "-g", g_csv)
+    assert "error: the following arguments are required: -t/--taxonomy-csv" in str(
+        exc.value
+    )
 
 
 def test_metagenome_no_rank_lineage_summary(runtmp):
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
     csv_base = "out"
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax, '-o', csv_base, '--output-format', 'lineage_summary')
+        runtmp.run_sourmash(
+            "tax",
+            "metagenome",
+            "-g",
+            g_csv,
+            "--taxonomy-csv",
+            tax,
+            "-o",
+            csv_base,
+            "--output-format",
+            "lineage_summary",
+        )
     print(str(exc.value))
-    assert "Rank (--rank) is required for krona, lineage_summary output formats." in str(exc.value)
+    assert (
+        "Rank (--rank) is required for krona, lineage_summary output formats."
+        in str(exc.value)
+    )
 
 
 def test_metagenome_no_rank_krona(runtmp):
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
     csv_base = "out"
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax, '-o', csv_base, '--output-format', 'krona')
+        runtmp.run_sourmash(
+            "tax",
+            "metagenome",
+            "-g",
+            g_csv,
+            "--taxonomy-csv",
+            tax,
+            "-o",
+            csv_base,
+            "--output-format",
+            "krona",
+        )
     print(str(exc.value))
-    assert "Rank (--rank) is required for krona, lineage_summary output formats." in str(exc.value)
+    assert (
+        "Rank (--rank) is required for krona, lineage_summary output formats."
+        in str(exc.value)
+    )
 
 
 def test_metagenome_bad_rank_krona(runtmp):
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
     csv_base = "out"
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax, '-o', csv_base, '--output-format', 'krona', '--rank', 'NotARank')
+        runtmp.run_sourmash(
+            "tax",
+            "metagenome",
+            "-g",
+            g_csv,
+            "--taxonomy-csv",
+            tax,
+            "-o",
+            csv_base,
+            "--output-format",
+            "krona",
+            "--rank",
+            "NotARank",
+        )
     print(str(exc.value))
-    assert "Invalid '--rank'/'--position' input: 'NotARank'. Please choose: 'strain', 'species', 'genus', 'family', 'order', 'class', 'phylum', 'superkingdom'" in runtmp.last_result.err
+    assert (
+        "Invalid '--rank'/'--position' input: 'NotARank'. Please choose: 'strain', 'species', 'genus', 'family', 'order', 'class', 'phylum', 'superkingdom'"
+        in runtmp.last_result.err
+    )
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax, '-o', csv_base, '--output-format', 'krona', '--rank', '5')
+        runtmp.run_sourmash(
+            "tax",
+            "metagenome",
+            "-g",
+            g_csv,
+            "--taxonomy-csv",
+            tax,
+            "-o",
+            csv_base,
+            "--output-format",
+            "krona",
+            "--rank",
+            "5",
+        )
     print(str(exc.value))
-    assert "Invalid '--rank'/'--position' input: '5'. Please choose: 'strain', 'species', 'genus', 'family', 'order', 'class', 'phylum', 'superkingdom'" in runtmp.last_result.err
+    assert (
+        "Invalid '--rank'/'--position' input: '5'. Please choose: 'strain', 'species', 'genus', 'family', 'order', 'class', 'phylum', 'superkingdom'"
+        in runtmp.last_result.err
+    )
 
 
 def test_genome_no_rank_krona(runtmp):
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
     csv_base = "out"
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', tax, '-o', csv_base, '--output-format', 'krona')
+        runtmp.run_sourmash(
+            "tax",
+            "genome",
+            "-g",
+            g_csv,
+            "--taxonomy-csv",
+            tax,
+            "-o",
+            csv_base,
+            "--output-format",
+            "krona",
+        )
     assert "ERROR: Rank (--rank) is required for krona output formats" in str(exc.value)
 
 
 def test_metagenome_rank_not_available(runtmp):
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax,
-                       '--rank', 'strain')
+        c.run_sourmash(
+            "tax", "metagenome", "-g", g_csv, "--taxonomy-csv", tax, "--rank", "strain"
+        )
 
     print(str(exc.value))
 
     assert c.last_result.status == -1
-    assert "No taxonomic information provided for rank strain: cannot summarize at this rank" in str(exc.value)
+    assert (
+        "No taxonomic information provided for rank strain: cannot summarize at this rank"
+        in str(exc.value)
+    )
 
 
 def test_metagenome_duplicated_taxonomy_fail(runtmp):
     c = runtmp
     # write temp taxonomy with duplicates
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
     duplicated_csv = runtmp.output("duplicated_taxonomy.csv")
-    with open(duplicated_csv, 'w') as dup:
+    with open(duplicated_csv, "w") as dup:
         tax = [x.rstrip() for x in Path(taxonomy_csv).read_text().splitlines()]
-        tax.append(tax[1] + 'FOO') # add first tax_assign again
+        tax.append(tax[1] + "FOO")  # add first tax_assign again
         dup.write("\n".join(tax))
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', duplicated_csv)
+        c.run_sourmash(
+            "tax", "metagenome", "-g", g_csv, "--taxonomy-csv", duplicated_csv
+        )
 
     assert "cannot read taxonomy" in str(exc.value)
     assert "multiple lineages for identifier GCF_001881345" in str(exc.value)
@@ -547,16 +1191,18 @@ def test_metagenome_duplicated_taxonomy_fail(runtmp):
 def test_metagenome_duplicated_taxonomy_force(runtmp):
     c = runtmp
     # write temp taxonomy with duplicates
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
     duplicated_csv = runtmp.output("duplicated_taxonomy.csv")
-    with open(duplicated_csv, 'w') as dup:
+    with open(duplicated_csv, "w") as dup:
         tax = [x.rstrip() for x in Path(taxonomy_csv).read_text().splitlines()]
-        tax.append(tax[1]) # add first tax_assign again
+        tax.append(tax[1])  # add first tax_assign again
         dup.write("\n".join(tax))
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
-    c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', duplicated_csv, '--force')
+    c.run_sourmash(
+        "tax", "metagenome", "-g", g_csv, "--taxonomy-csv", duplicated_csv, "--force"
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -564,55 +1210,105 @@ def test_metagenome_duplicated_taxonomy_force(runtmp):
 
     # same as stdout test - just check the first few lines
     assert c.last_result.status == 0
-    assert 'query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,superkingdom,0.204,d__Bacteria,md5,test1.sig,0.131,1024000' in c.last_result.out
-    assert 'test1,superkingdom,0.796,unclassified,md5,test1.sig,0.869,3990000' in c.last_result.out
-    assert 'test1,phylum,0.116,d__Bacteria;p__Bacteroidota,md5,test1.sig,0.073,582000' in c.last_result.out
-    assert 'test1,phylum,0.088,d__Bacteria;p__Proteobacteria,md5,test1.sig,0.058,442000' in c.last_result.out
-    assert 'test1,phylum,0.796,unclassified,md5,test1.sig,0.869,3990000' in c.last_result.out
+    assert (
+        "query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,superkingdom,0.204,d__Bacteria,md5,test1.sig,0.131,1024000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,superkingdom,0.796,unclassified,md5,test1.sig,0.869,3990000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,phylum,0.116,d__Bacteria;p__Bacteroidota,md5,test1.sig,0.073,582000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,phylum,0.088,d__Bacteria;p__Proteobacteria,md5,test1.sig,0.058,442000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,phylum,0.796,unclassified,md5,test1.sig,0.869,3990000"
+        in c.last_result.out
+    )
 
 
 def test_metagenome_missing_taxonomy(runtmp):
     c = runtmp
     # write temp taxonomy with missing entry
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
     subset_csv = runtmp.output("subset_taxonomy.csv")
-    with open(subset_csv, 'w') as subset:
+    with open(subset_csv, "w") as subset:
         tax = [x.rstrip() for x in Path(taxonomy_csv).read_text().splitlines()]
         subset.write("\n".join(tax[:4]))
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
-    c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', subset_csv)
+    c.run_sourmash("tax", "metagenome", "-g", g_csv, "--taxonomy-csv", subset_csv)
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert "The following are missing from the taxonomy information: GCF_003471795" in c.last_result.err
-
-    assert 'query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,superkingdom,0.193,d__Bacteria,md5,test1.sig,0.124,970000'in c.last_result.out
-    assert 'test1,superkingdom,0.807,unclassified,md5,test1.sig,0.876,4044000' in c.last_result.out
-    assert 'test1,phylum,0.105,d__Bacteria;p__Bacteroidota,md5,test1.sig,0.066,528000' in c.last_result.out
-    assert 'test1,phylum,0.088,d__Bacteria;p__Proteobacteria,md5,test1.sig,0.058,442000' in c.last_result.out
-    assert 'test1,phylum,0.807,unclassified,md5,test1.sig,0.876,4044000' in c.last_result.out
-    assert 'test1,class,0.105,d__Bacteria;p__Bacteroidota;c__Bacteroidia,md5,test1.sig,0.066,528000' in c.last_result.out
+    assert (
+        "The following are missing from the taxonomy information: GCF_003471795"
+        in c.last_result.err
+    )
+
+    assert (
+        "query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,superkingdom,0.193,d__Bacteria,md5,test1.sig,0.124,970000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,superkingdom,0.807,unclassified,md5,test1.sig,0.876,4044000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,phylum,0.105,d__Bacteria;p__Bacteroidota,md5,test1.sig,0.066,528000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,phylum,0.088,d__Bacteria;p__Proteobacteria,md5,test1.sig,0.058,442000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,phylum,0.807,unclassified,md5,test1.sig,0.876,4044000"
+        in c.last_result.out
+    )
+    assert (
+        "test1,class,0.105,d__Bacteria;p__Bacteroidota;c__Bacteroidia,md5,test1.sig,0.066,528000"
+        in c.last_result.out
+    )
 
 
 def test_metagenome_missing_fail_taxonomy(runtmp):
     c = runtmp
     # write temp taxonomy with missing entry
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
     subset_csv = runtmp.output("subset_taxonomy.csv")
-    with open(subset_csv, 'w') as subset:
+    with open(subset_csv, "w") as subset:
         tax = [x.rstrip() for x in Path(taxonomy_csv).read_text().splitlines()]
         subset.write("\n".join(tax[:4]))
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', subset_csv, '--fail-on-missing-taxonomy')
+        c.run_sourmash(
+            "tax",
+            "metagenome",
+            "-g",
+            g_csv,
+            "--taxonomy-csv",
+            subset_csv,
+            "--fail-on-missing-taxonomy",
+        )
 
     print(str(exc.value))
 
@@ -624,162 +1320,315 @@ def test_metagenome_missing_fail_taxonomy(runtmp):
 def test_metagenome_multiple_taxonomy_files_missing(runtmp):
     c = runtmp
     # write temp taxonomy with duplicates
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
 
     # gather against mult databases
-    g_csv = utils.get_test_data('tax/test1_x_gtdbrs202_genbank_euks.gather.csv')
+    g_csv = utils.get_test_data("tax/test1_x_gtdbrs202_genbank_euks.gather.csv")
 
-    c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', taxonomy_csv, '--force')
+    c.run_sourmash(
+        "tax", "metagenome", "-g", g_csv, "--taxonomy-csv", taxonomy_csv, "--force"
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
-    assert "of 6 gather results, lineage assignments for 2 results were missed" in c.last_result.err
-    assert 'query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'multtest,superkingdom,0.204,d__Bacteria,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.131,1024000' in c.last_result.out
-    assert 'multtest,superkingdom,0.796,unclassified,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.869,3990000' in c.last_result.out
-    assert 'multtest,phylum,0.116,d__Bacteria;p__Bacteroidota,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.073,582000' in c.last_result.out
-    assert 'multtest,phylum,0.088,d__Bacteria;p__Proteobacteria,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.058,442000' in c.last_result.out
-    assert 'multtest,phylum,0.796,unclassified,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.869,3990000' in c.last_result.out
-    assert 'multtest,class,0.116,d__Bacteria;p__Bacteroidota;c__Bacteroidia,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.073,582000' in c.last_result.out
-    assert 'multtest,class,0.088,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.058,442000' in c.last_result.out
-    assert 'multtest,class,0.796,unclassified,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.869,3990000' in c.last_result.out
+    assert (
+        "of 6 gather results, lineage assignments for 2 results were missed"
+        in c.last_result.err
+    )
+    assert (
+        "query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,superkingdom,0.204,d__Bacteria,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.131,1024000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,superkingdom,0.796,unclassified,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.869,3990000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,phylum,0.116,d__Bacteria;p__Bacteroidota,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.073,582000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,phylum,0.088,d__Bacteria;p__Proteobacteria,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.058,442000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,phylum,0.796,unclassified,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.869,3990000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,class,0.116,d__Bacteria;p__Bacteroidota;c__Bacteroidia,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.073,582000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,class,0.088,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.058,442000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,class,0.796,unclassified,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.869,3990000"
+        in c.last_result.out
+    )
 
 
 def test_metagenome_multiple_taxonomy_files(runtmp):
     c = runtmp
     # write temp taxonomy with duplicates
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    protozoa_genbank = utils.get_test_data('tax/protozoa_genbank_lineage.csv')
-    bacteria_refseq  = utils.get_test_data('tax/bacteria_refseq_lineage.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    protozoa_genbank = utils.get_test_data("tax/protozoa_genbank_lineage.csv")
+    bacteria_refseq = utils.get_test_data("tax/bacteria_refseq_lineage.csv")
 
     # gather against mult databases
-    g_csv = utils.get_test_data('tax/test1_x_gtdbrs202_genbank_euks.gather.csv')
-
-    c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', taxonomy_csv, protozoa_genbank, bacteria_refseq)
+    g_csv = utils.get_test_data("tax/test1_x_gtdbrs202_genbank_euks.gather.csv")
+
+    c.run_sourmash(
+        "tax",
+        "metagenome",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        taxonomy_csv,
+        protozoa_genbank,
+        bacteria_refseq,
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
-    assert 'query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'multtest,superkingdom,0.204,Bacteria,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.131,1024000' in c.last_result.out
-    assert 'multtest,superkingdom,0.051,Eukaryota,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.245,258000' in c.last_result.out
-    assert 'multtest,superkingdom,0.744,unclassified,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.624,3732000' in c.last_result.out
-    assert 'multtest,phylum,0.116,Bacteria;Bacteroidetes,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.073,582000' in c.last_result.out
-    assert 'multtest,phylum,0.088,Bacteria;Proteobacteria,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.058,442000' in c.last_result.out
-    assert 'multtest,phylum,0.051,Eukaryota;Apicomplexa,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.245,258000' in c.last_result.out
-    assert 'multtest,phylum,0.744,unclassified,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.624,3732000' in c.last_result.out
-    assert 'multtest,class,0.116,Bacteria;Bacteroidetes;Bacteroidia,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.073,582000' in c.last_result.out
+    assert (
+        "query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,superkingdom,0.204,Bacteria,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.131,1024000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,superkingdom,0.051,Eukaryota,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.245,258000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,superkingdom,0.744,unclassified,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.624,3732000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,phylum,0.116,Bacteria;Bacteroidetes,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.073,582000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,phylum,0.088,Bacteria;Proteobacteria,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.058,442000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,phylum,0.051,Eukaryota;Apicomplexa,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.245,258000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,phylum,0.744,unclassified,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.624,3732000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,class,0.116,Bacteria;Bacteroidetes;Bacteroidia,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.073,582000"
+        in c.last_result.out
+    )
 
 
 def test_metagenome_multiple_taxonomy_files_multiple_taxonomy_args(runtmp):
     c = runtmp
     # pass in mult tax files using mult tax arguments
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    protozoa_genbank = utils.get_test_data('tax/protozoa_genbank_lineage.csv')
-    bacteria_refseq  = utils.get_test_data('tax/bacteria_refseq_lineage.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    protozoa_genbank = utils.get_test_data("tax/protozoa_genbank_lineage.csv")
+    bacteria_refseq = utils.get_test_data("tax/bacteria_refseq_lineage.csv")
 
     # gather against mult databases
-    g_csv = utils.get_test_data('tax/test1_x_gtdbrs202_genbank_euks.gather.csv')
-
-    c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', taxonomy_csv, '-t', protozoa_genbank, '-t', bacteria_refseq)
+    g_csv = utils.get_test_data("tax/test1_x_gtdbrs202_genbank_euks.gather.csv")
+
+    c.run_sourmash(
+        "tax",
+        "metagenome",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        taxonomy_csv,
+        "-t",
+        protozoa_genbank,
+        "-t",
+        bacteria_refseq,
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
-    assert 'query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'multtest,superkingdom,0.204,Bacteria,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.131,1024000' in c.last_result.out
-    assert 'multtest,superkingdom,0.051,Eukaryota,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.245,258000' in c.last_result.out
-    assert 'multtest,superkingdom,0.744,unclassified,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.624,3732000' in c.last_result.out
-    assert 'multtest,phylum,0.116,Bacteria;Bacteroidetes,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.073,582000' in c.last_result.out
-    assert 'multtest,phylum,0.088,Bacteria;Proteobacteria,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.058,442000' in c.last_result.out
-    assert 'multtest,phylum,0.051,Eukaryota;Apicomplexa,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.245,258000' in c.last_result.out
-    assert 'multtest,phylum,0.744,unclassified,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.624,3732000' in c.last_result.out
-    assert 'multtest,class,0.116,Bacteria;Bacteroidetes;Bacteroidia,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.073,582000' in c.last_result.out
+    assert (
+        "query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,superkingdom,0.204,Bacteria,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.131,1024000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,superkingdom,0.051,Eukaryota,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.245,258000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,superkingdom,0.744,unclassified,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.624,3732000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,phylum,0.116,Bacteria;Bacteroidetes,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.073,582000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,phylum,0.088,Bacteria;Proteobacteria,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.058,442000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,phylum,0.051,Eukaryota;Apicomplexa,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.245,258000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,phylum,0.744,unclassified,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.624,3732000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,class,0.116,Bacteria;Bacteroidetes;Bacteroidia,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.073,582000"
+        in c.last_result.out
+    )
 
 
 def test_metagenome_multiple_taxonomy_files_multiple_taxonomy_args_empty_force(runtmp):
     # pass in mult tax files using mult tax arguments, with one empty,
     # and use --force
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    protozoa_genbank = utils.get_test_data('tax/protozoa_genbank_lineage.csv')
-    bacteria_refseq  = utils.get_test_data('tax/bacteria_refseq_lineage.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    protozoa_genbank = utils.get_test_data("tax/protozoa_genbank_lineage.csv")
+    bacteria_refseq = utils.get_test_data("tax/bacteria_refseq_lineage.csv")
 
-    tax_empty = runtmp.output('t.csv')
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    tax_empty = runtmp.output("t.csv")
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
     with open(tax_empty, "w") as fp:
         fp.write("")
     print("t_csv: ", tax_empty)
 
     # gather against mult databases
-    g_csv = utils.get_test_data('tax/test1_x_gtdbrs202_genbank_euks.gather.csv')
-
-    c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', taxonomy_csv, '-t', protozoa_genbank, '-t', bacteria_refseq, '-t', tax_empty, '--force')
+    g_csv = utils.get_test_data("tax/test1_x_gtdbrs202_genbank_euks.gather.csv")
+
+    c.run_sourmash(
+        "tax",
+        "metagenome",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        taxonomy_csv,
+        "-t",
+        protozoa_genbank,
+        "-t",
+        bacteria_refseq,
+        "-t",
+        tax_empty,
+        "--force",
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
-    assert 'query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'multtest,superkingdom,0.204,Bacteria,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.131,1024000' in c.last_result.out
-    assert 'multtest,superkingdom,0.051,Eukaryota,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.245,258000' in c.last_result.out
-    assert 'multtest,superkingdom,0.744,unclassified,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.624,3732000' in c.last_result.out
-    assert 'multtest,phylum,0.116,Bacteria;Bacteroidetes,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.073,582000' in c.last_result.out
-    assert 'multtest,phylum,0.088,Bacteria;Proteobacteria,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.058,442000' in c.last_result.out
-    assert 'multtest,phylum,0.051,Eukaryota;Apicomplexa,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.245,258000' in c.last_result.out
-    assert 'multtest,phylum,0.744,unclassified,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.624,3732000' in c.last_result.out
-    assert 'multtest,class,0.116,Bacteria;Bacteroidetes;Bacteroidia,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.073,582000' in c.last_result.out
+    assert (
+        "query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,superkingdom,0.204,Bacteria,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.131,1024000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,superkingdom,0.051,Eukaryota,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.245,258000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,superkingdom,0.744,unclassified,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.624,3732000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,phylum,0.116,Bacteria;Bacteroidetes,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.073,582000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,phylum,0.088,Bacteria;Proteobacteria,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.058,442000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,phylum,0.051,Eukaryota;Apicomplexa,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.245,258000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,phylum,0.744,unclassified,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.624,3732000"
+        in c.last_result.out
+    )
+    assert (
+        "multtest,class,0.116,Bacteria;Bacteroidetes;Bacteroidia,9687eeed,outputs/abundtrim/HSMA33MX.abundtrim.fq.gz,0.073,582000"
+        in c.last_result.out
+    )
 
 
 def test_metagenome_empty_gather_results(runtmp):
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
 
-    #creates empty gather result
-    g_csv = runtmp.output('g.csv')
+    # creates empty gather result
+    g_csv = runtmp.output("g.csv")
     with open(g_csv, "w") as fp:
         fp.write("")
     print("g_csv: ", g_csv)
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax)
+        runtmp.run_sourmash("tax", "metagenome", "-g", g_csv, "--taxonomy-csv", tax)
 
-    assert f"Cannot read gather results from '{g_csv}'. Is file empty?" in str(exc.value)
+    assert f"Cannot read gather results from '{g_csv}'. Is file empty?" in str(
+        exc.value
+    )
     assert runtmp.last_result.status == -1
 
 
 def test_metagenome_bad_gather_header(runtmp):
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
-    bad_g_csv = runtmp.output('g.csv')
+    bad_g_csv = runtmp.output("g.csv")
 
-    #creates bad gather result
-    bad_g = [x.replace("query_bp", "nope") + "\n" for x in Path(g_csv).read_text().splitlines()]
-    with open(bad_g_csv, 'w') as fp:
+    # creates bad gather result
+    bad_g = [
+        x.replace("query_bp", "nope") + "\n"
+        for x in Path(g_csv).read_text().splitlines()
+    ]
+    with open(bad_g_csv, "w") as fp:
         fp.writelines(bad_g)
     print("bad_gather_results: \n", bad_g)
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('tax', 'metagenome', '-g', bad_g_csv, '--taxonomy-csv', tax)
+        runtmp.run_sourmash("tax", "metagenome", "-g", bad_g_csv, "--taxonomy-csv", tax)
 
     print(str(exc.value))
-    assert 'is missing columns needed for taxonomic summarization.' in str(exc.value)
+    assert "is missing columns needed for taxonomic summarization." in str(exc.value)
     assert runtmp.last_result.status == -1
 
 
 def test_metagenome_empty_tax_lineage_input(runtmp):
     # test an empty tax CSV
-    tax_empty = runtmp.output('t.csv')
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    tax_empty = runtmp.output("t.csv")
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
     with open(tax_empty, "w") as fp:
         fp.write("")
     print("t_csv: ", tax_empty)
 
-
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax_empty)
+        runtmp.run_sourmash(
+            "tax", "metagenome", "-g", g_csv, "--taxonomy-csv", tax_empty
+        )
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
@@ -791,16 +1640,17 @@ def test_metagenome_empty_tax_lineage_input(runtmp):
 
 def test_metagenome_empty_tax_lineage_input_force(runtmp):
     # test an empty tax CSV with --force
-    tax_empty = runtmp.output('t.csv')
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    tax_empty = runtmp.output("t.csv")
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
     with open(tax_empty, "w") as fp:
         fp.write("")
     print("t_csv: ", tax_empty)
 
-
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax_empty, '--force')
+        runtmp.run_sourmash(
+            "tax", "metagenome", "-g", g_csv, "--taxonomy-csv", tax_empty, "--force"
+        )
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
@@ -811,17 +1661,17 @@ def test_metagenome_empty_tax_lineage_input_force(runtmp):
 
 
 def test_metagenome_perfect_match_warning(runtmp):
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
-    perfect_g_csv = runtmp.output('g.csv')
+    perfect_g_csv = runtmp.output("g.csv")
 
-    #create a perfect gather result
-    with open(g_csv, 'r') as fp:
-        r = csv.DictReader(fp, delimiter=',')
+    # create a perfect gather result
+    with open(g_csv) as fp:
+        r = csv.DictReader(fp, delimiter=",")
         header = r.fieldnames
         print(header)
-        with open(perfect_g_csv, 'w') as out_fp:
+        with open(perfect_g_csv, "w") as out_fp:
             w = csv.DictWriter(out_fp, header)
             w.writeheader()
             for n, row in enumerate(r):
@@ -834,28 +1684,31 @@ def test_metagenome_perfect_match_warning(runtmp):
                 w.writerow(row)
                 print(row)
 
-    runtmp.run_sourmash('tax', 'metagenome', '-g', perfect_g_csv, '--taxonomy-csv', tax)
+    runtmp.run_sourmash("tax", "metagenome", "-g", perfect_g_csv, "--taxonomy-csv", tax)
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
     assert runtmp.last_result.status == 0
-    assert "WARNING: 100% match! Is query 'test1' identical to its database match, 'GCF_001881345'?" in runtmp.last_result.err
+    assert (
+        "WARNING: 100% match! Is query 'test1' identical to its database match, 'GCF_001881345'?"
+        in runtmp.last_result.err
+    )
 
 
 def test_metagenome_over100percent_error(runtmp):
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
-    perfect_g_csv = runtmp.output('g.csv')
+    perfect_g_csv = runtmp.output("g.csv")
 
-    #create a perfect gather result
-    with open(g_csv, 'r') as fp:
-        r = csv.DictReader(fp, delimiter=',')
+    # create a perfect gather result
+    with open(g_csv) as fp:
+        r = csv.DictReader(fp, delimiter=",")
         header = r.fieldnames
         print(header)
-        with open(perfect_g_csv, 'w') as out_fp:
+        with open(perfect_g_csv, "w") as out_fp:
             w = csv.DictWriter(out_fp, header)
             w.writeheader()
             for n, row in enumerate(r):
@@ -866,49 +1719,72 @@ def test_metagenome_over100percent_error(runtmp):
                 print(row)
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.run_sourmash('tax', 'metagenome', '-g', perfect_g_csv, '--taxonomy-csv', tax)
+        runtmp.run_sourmash(
+            "tax", "metagenome", "-g", perfect_g_csv, "--taxonomy-csv", tax
+        )
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
     assert runtmp.last_result.status == -1
-    assert "fraction is > 100% of the query! This should not be possible." in runtmp.last_result.err
+    assert (
+        "fraction is > 100% of the query! This should not be possible."
+        in runtmp.last_result.err
+    )
 
 
 def test_metagenome_gather_duplicate_query(runtmp):
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    g_res = utils.get_test_data('tax/test1.gather.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    g_res = utils.get_test_data("tax/test1.gather.csv")
 
     # different filename, contents identical to test1
     g_res2 = runtmp.output("test2.gather.csv")
-    with open(g_res2, 'w') as fp:
+    with open(g_res2, "w") as fp:
         fp.write(Path(g_res).read_text())
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'metagenome',  '--gather-csv', g_res, g_res2,
-                   '--taxonomy-csv', taxonomy_csv)
+        c.run_sourmash(
+            "tax",
+            "metagenome",
+            "--gather-csv",
+            g_res,
+            g_res2,
+            "--taxonomy-csv",
+            taxonomy_csv,
+        )
 
     assert c.last_result.status == -1
     print(str(exc.value))
-    assert "Gather query test1 was found in more than one CSV. Cannot load from " in str(exc.value)
+    assert (
+        "Gather query test1 was found in more than one CSV. Cannot load from "
+        in str(exc.value)
+    )
 
 
 def test_metagenome_gather_duplicate_query_force(runtmp):
     # do not load same query from multiple files.
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    g_res = utils.get_test_data('tax/test1.gather.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    g_res = utils.get_test_data("tax/test1.gather.csv")
 
     # different filename, contents identical to test1
     g_res2 = runtmp.output("test2.gather.csv")
-    with open(g_res2, 'w') as fp:
+    with open(g_res2, "w") as fp:
         fp.write(Path(g_res).read_text())
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'metagenome',  '--gather-csv', g_res, g_res2,
-                   '--taxonomy-csv', taxonomy_csv, '--force')
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash(
+            "tax",
+            "metagenome",
+            "--gather-csv",
+            g_res,
+            g_res2,
+            "--taxonomy-csv",
+            taxonomy_csv,
+            "--force",
+        )
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -923,18 +1799,27 @@ def test_metagenome_gather_duplicate_query_force(runtmp):
 def test_metagenome_two_queries_human_output(runtmp):
     # do not load same query from multiple files.
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    g_res = utils.get_test_data('tax/test1.gather.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    g_res = utils.get_test_data("tax/test1.gather.csv")
 
     # make a second query with same output
     g_res2 = runtmp.output("test2.gather.csv")
-    with open(g_res2, 'w') as fp:
+    with open(g_res2, "w") as fp:
         for line in Path(g_res).read_text().splitlines():
-            line = line.replace('test1', 'test2') + "\n"
+            line = line.replace("test1", "test2") + "\n"
             fp.write(line)
 
-    c.run_sourmash('tax', 'metagenome',  '--gather-csv', g_res, g_res2,
-                   '--taxonomy-csv', taxonomy_csv, '-F', "human")
+    c.run_sourmash(
+        "tax",
+        "metagenome",
+        "--gather-csv",
+        g_res,
+        g_res2,
+        "--taxonomy-csv",
+        taxonomy_csv,
+        "-F",
+        "human",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -942,9 +1827,15 @@ def test_metagenome_two_queries_human_output(runtmp):
 
     assert c.last_result.status == 0
     assert "test1             86.9%     -      unclassified" in c.last_result.out
-    assert "test1              5.8%     92.5%  d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli" in c.last_result.out
+    assert (
+        "test1              5.8%     92.5%  d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli"
+        in c.last_result.out
+    )
     assert "test2             86.9%     -      unclassified" in c.last_result.out
-    assert "test2              5.8%     92.5%  d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli" in c.last_result.out
+    assert (
+        "test2              5.8%     92.5%  d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli"
+        in c.last_result.out
+    )
     assert "test2              5.7%     92.5%  d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri"
     assert "test2              1.6%     89.1%  d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola vulgatus"
 
@@ -952,22 +1843,36 @@ def test_metagenome_two_queries_human_output(runtmp):
 def test_metagenome_two_queries_with_single_query_output_formats_fail(runtmp):
     # fail on multiple queries with single query output formats
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    g_res = utils.get_test_data('tax/test1.gather.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    g_res = utils.get_test_data("tax/test1.gather.csv")
 
     # make a second query with same output
     g_res2 = runtmp.output("test2.gather.csv")
-    with open(g_res2, 'w') as fp:
+    with open(g_res2, "w") as fp:
         for line in Path(g_res).read_text().splitlines():
-            line = line.replace('test1', 'test2') + "\n"
+            line = line.replace("test1", "test2") + "\n"
             fp.write(line)
 
     csv_summary_out = runtmp.output("tst.summarized.csv")
     kreport_out = runtmp.output("tst.kreport.txt")
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'metagenome',  '--gather-csv', g_res, g_res2,
-                       '--taxonomy-csv', taxonomy_csv, '-F', "csv_summary", "kreport", "--rank", "phylum", "-o", "tst")
+        c.run_sourmash(
+            "tax",
+            "metagenome",
+            "--gather-csv",
+            g_res,
+            g_res2,
+            "--taxonomy-csv",
+            taxonomy_csv,
+            "-F",
+            "csv_summary",
+            "kreport",
+            "--rank",
+            "phylum",
+            "-o",
+            "tst",
+        )
     print(str(exc.value))
 
     assert not os.path.exists(csv_summary_out)
@@ -975,29 +1880,47 @@ def test_metagenome_two_queries_with_single_query_output_formats_fail(runtmp):
 
     assert c.last_result.status == -1
     assert "loaded results for 2 queries from 2 gather CSVs" in c.last_result.err
-    assert "WARNING: found results for multiple gather queries. Can only output multi-query result formats: skipping csv_summary, kreport" in c.last_result.err
+    assert (
+        "WARNING: found results for multiple gather queries. Can only output multi-query result formats: skipping csv_summary, kreport"
+        in c.last_result.err
+    )
     assert "ERROR: No output formats remaining." in c.last_result.err
 
 
 def test_metagenome_two_queries_skip_single_query_output_formats(runtmp):
     # remove single-query outputs when working with multiple queries
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    g_res = utils.get_test_data('tax/test1.gather.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    g_res = utils.get_test_data("tax/test1.gather.csv")
 
     # make a second query with same output
     g_res2 = runtmp.output("test2.gather.csv")
-    with open(g_res2, 'w') as fp:
+    with open(g_res2, "w") as fp:
         for line in Path(g_res).read_text().splitlines():
-            line = line.replace('test1', 'test2') + "\n"
+            line = line.replace("test1", "test2") + "\n"
             fp.write(line)
 
     csv_summary_out = runtmp.output("tst.summarized.csv")
     kreport_out = runtmp.output("tst.kreport.txt")
     lineage_summary_out = runtmp.output("tst.lineage_summary.tsv")
 
-    c.run_sourmash('tax', 'metagenome',  '--gather-csv', g_res, g_res2,
-                       '--taxonomy-csv', taxonomy_csv, '-F', "csv_summary", "kreport", "lineage_summary", "--rank", "phylum", "-o", "tst")
+    c.run_sourmash(
+        "tax",
+        "metagenome",
+        "--gather-csv",
+        g_res,
+        g_res2,
+        "--taxonomy-csv",
+        taxonomy_csv,
+        "-F",
+        "csv_summary",
+        "kreport",
+        "lineage_summary",
+        "--rank",
+        "phylum",
+        "-o",
+        "tst",
+    )
 
     assert not os.path.exists(csv_summary_out)
     assert not os.path.exists(kreport_out)
@@ -1005,32 +1928,52 @@ def test_metagenome_two_queries_skip_single_query_output_formats(runtmp):
 
     assert c.last_result.status == 0
     assert "loaded results for 2 queries from 2 gather CSVs" in c.last_result.err
-    assert "WARNING: found results for multiple gather queries. Can only output multi-query result formats: skipping csv_summary, kreport" in c.last_result.err
+    assert (
+        "WARNING: found results for multiple gather queries. Can only output multi-query result formats: skipping csv_summary, kreport"
+        in c.last_result.err
+    )
 
 
 def test_metagenome_two_queries_krona(runtmp):
     # for now, we enable multi-query krona. Is this desired?
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    g_res = utils.get_test_data('tax/test1.gather.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    g_res = utils.get_test_data("tax/test1.gather.csv")
 
     # make a second query with same output
     g_res2 = runtmp.output("test2.gather.csv")
-    with open(g_res2, 'w') as fp:
+    with open(g_res2, "w") as fp:
         for line in Path(g_res).read_text().splitlines():
-            line = line.replace('test1', 'test2') + "\n"
+            line = line.replace("test1", "test2") + "\n"
             fp.write(line)
 
-    c.run_sourmash('tax', 'metagenome',  '--gather-csv', g_res, g_res2,
-                   '--taxonomy-csv', taxonomy_csv, '-F', "krona", '--rank', 'superkingdom')
+    c.run_sourmash(
+        "tax",
+        "metagenome",
+        "--gather-csv",
+        g_res,
+        g_res2,
+        "--taxonomy-csv",
+        taxonomy_csv,
+        "-F",
+        "krona",
+        "--rank",
+        "superkingdom",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert "WARNING: results from more than one query found. Krona summarization not recommended." in c.last_result.err
-    assert "Percentage assignment will be normalized by the number of queries to maintain range 0-100%" in c.last_result.err
+    assert (
+        "WARNING: results from more than one query found. Krona summarization not recommended."
+        in c.last_result.err
+    )
+    assert (
+        "Percentage assignment will be normalized by the number of queries to maintain range 0-100%"
+        in c.last_result.err
+    )
     assert "fraction	superkingdom" in c.last_result.out
     assert "0.2042281611487834	d__Bacteria" in c.last_result.out
     assert "0.7957718388512166	unclassified" in c.last_result.out
@@ -1040,108 +1983,150 @@ def test_metagenome_gather_duplicate_filename(runtmp):
     # test that a duplicate filename is properly flagged, when passed in
     # twice to a single -g argument.
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    g_res = utils.get_test_data('tax/test1.gather.csv')
-
-    c.run_sourmash('tax', 'metagenome', '--gather-csv', g_res, g_res, '--taxonomy-csv', taxonomy_csv)
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    g_res = utils.get_test_data("tax/test1.gather.csv")
+
+    c.run_sourmash(
+        "tax",
+        "metagenome",
+        "--gather-csv",
+        g_res,
+        g_res,
+        "--taxonomy-csv",
+        taxonomy_csv,
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert f'ignoring duplicated reference to file: {g_res}'
-    assert 'query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,superkingdom,0.204,d__Bacteria,md5,test1.sig,0.131,1024000' in c.last_result.out
+    assert f"ignoring duplicated reference to file: {g_res}"
+    assert (
+        "query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,superkingdom,0.204,d__Bacteria,md5,test1.sig,0.131,1024000"
+        in c.last_result.out
+    )
 
 
 def test_metagenome_gather_duplicate_filename_2(runtmp):
     # test that a duplicate filename is properly flagged, with -g a -g b
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    g_res = utils.get_test_data('tax/test1.gather.csv')
-
-    c.run_sourmash('tax', 'metagenome', '--gather-csv', g_res, '-g', g_res, '--taxonomy-csv', taxonomy_csv)
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    g_res = utils.get_test_data("tax/test1.gather.csv")
+
+    c.run_sourmash(
+        "tax",
+        "metagenome",
+        "--gather-csv",
+        g_res,
+        "-g",
+        g_res,
+        "--taxonomy-csv",
+        taxonomy_csv,
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert f'ignoring duplicated reference to file: {g_res}'
-    assert 'query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,superkingdom,0.204,d__Bacteria,md5,test1.sig,0.131,1024000' in c.last_result.out
+    assert f"ignoring duplicated reference to file: {g_res}"
+    assert (
+        "query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,superkingdom,0.204,d__Bacteria,md5,test1.sig,0.131,1024000"
+        in c.last_result.out
+    )
 
 
 def test_metagenome_gather_duplicate_filename_from_file(runtmp):
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    g_res = utils.get_test_data('tax/test1.gather.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    g_res = utils.get_test_data("tax/test1.gather.csv")
     g_from_file = runtmp.output("tmp-from-file.txt")
-    with open(g_from_file, 'w') as f_csv:
+    with open(g_from_file, "w") as f_csv:
         f_csv.write(f"{g_res}\n")
         f_csv.write(f"{g_res}\n")
 
-    c.run_sourmash('tax', 'metagenome', '--from-file', g_from_file, '--taxonomy-csv', taxonomy_csv)
+    c.run_sourmash(
+        "tax", "metagenome", "--from-file", g_from_file, "--taxonomy-csv", taxonomy_csv
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert f'ignoring duplicated reference to file: {g_res}'
-    assert 'query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,superkingdom,0.204,d__Bacteria,md5,test1.sig,0.131,1024000' in c.last_result.out
+    assert f"ignoring duplicated reference to file: {g_res}"
+    assert (
+        "query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,superkingdom,0.204,d__Bacteria,md5,test1.sig,0.131,1024000"
+        in c.last_result.out
+    )
 
 
 def test_genome_empty_gather_results(runtmp):
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
 
-    #creates empty gather result
-    g_csv = runtmp.output('g.csv')
+    # creates empty gather result
+    g_csv = runtmp.output("g.csv")
     with open(g_csv, "w") as fp:
         fp.write("")
     print("g_csv: ", g_csv)
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', tax)
+        runtmp.run_sourmash("tax", "genome", "-g", g_csv, "--taxonomy-csv", tax)
 
     assert runtmp.last_result.status == -1
     print(runtmp.last_result.err)
     print(runtmp.last_result.out)
-    assert f"Cannot read gather results from '{g_csv}'. Is file empty?" in str(exc.value)
+    assert f"Cannot read gather results from '{g_csv}'. Is file empty?" in str(
+        exc.value
+    )
 
 
 def test_genome_bad_gather_header(runtmp):
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
-    bad_g_csv = runtmp.output('g.csv')
+    bad_g_csv = runtmp.output("g.csv")
 
-    #creates bad gather result
-    bad_g = [x.replace("f_unique_to_query", "nope") + "\n" for x in Path(g_csv).read_text().splitlines()]
-    with open(bad_g_csv, 'w') as fp:
+    # creates bad gather result
+    bad_g = [
+        x.replace("f_unique_to_query", "nope") + "\n"
+        for x in Path(g_csv).read_text().splitlines()
+    ]
+    with open(bad_g_csv, "w") as fp:
         fp.writelines(bad_g)
     print("bad_gather_results: \n", bad_g)
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('tax', 'genome', '-g', bad_g_csv, '--taxonomy-csv', tax)
+        runtmp.run_sourmash("tax", "genome", "-g", bad_g_csv, "--taxonomy-csv", tax)
 
-    assert 'is missing columns needed for taxonomic summarization.' in str(exc.value)
+    assert "is missing columns needed for taxonomic summarization." in str(exc.value)
     assert runtmp.last_result.status == -1
 
 
 def test_genome_empty_tax_lineage_input(runtmp):
     # test an empty tax csv
-    tax_empty = runtmp.output('t.csv')
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    tax_empty = runtmp.output("t.csv")
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
     with open(tax_empty, "w") as fp:
         fp.write("")
     print("t_csv: ", tax_empty)
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', tax_empty)
+        runtmp.run_sourmash("tax", "genome", "-g", g_csv, "--taxonomy-csv", tax_empty)
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
@@ -1155,66 +2140,124 @@ def test_genome_rank_stdout_0(runtmp):
     # test basic genome
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
-
-    c.run_sourmash('tax', 'genome', '--gather-csv', g_csv, '--taxonomy-csv', tax,
-                   '--rank', 'species', '--containment-threshold', '0')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
+
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "--gather-csv",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "--rank",
+        "species",
+        "--containment-threshold",
+        "0",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert 'query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000'  in c.last_result.out
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000"
+        in c.last_result.out
+    )
 
 
 def test_genome_rank_stdout_0_db(runtmp):
     # test basic genome with sqlite database
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.db')
-
-    c.run_sourmash('tax', 'genome', '--gather-csv', g_csv, '--taxonomy-csv',
-                   tax, '--rank', 'species', '--containment-threshold', '0')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.db")
+
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "--gather-csv",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "--rank",
+        "species",
+        "--containment-threshold",
+        "0",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert 'query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000'  in c.last_result.out
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000"
+        in c.last_result.out
+    )
 
     # too stringent of containment threshold:
-    c.run_sourmash('tax', 'genome', '--gather-csv', g_csv, '--taxonomy-csv',
-                   tax, '--rank', 'species', '--containment-threshold', '1.0')
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "--gather-csv",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "--rank",
+        "species",
+        "--containment-threshold",
+        "1.0",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert "test1,below_threshold,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000," in c.last_result.out
+    assert (
+        "test1,below_threshold,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000,"
+        in c.last_result.out
+    )
 
 
 def test_genome_rank_csv_0(runtmp):
     # test basic genome - output csv
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
     csv_base = "out"
     cl_csv = csv_base + ".classifications.csv"
     csvout = runtmp.output(cl_csv)
     outdir = os.path.dirname(csvout)
     print("csvout: ", csvout)
 
-    c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', tax,
-                   '--rank', 'species', '-o', csv_base, '--containment-threshold', '0',
-                   '--output-dir', outdir)
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "--rank",
+        "species",
+        "-o",
+        csv_base,
+        "--containment-threshold",
+        "0",
+        "--output-dir",
+        outdir,
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -1223,25 +2266,46 @@ def test_genome_rank_csv_0(runtmp):
     assert f"saving 'classification' output to '{csvout}'" in runtmp.last_result.err
     assert c.last_result.status == 0
     cl_results = [x.rstrip() for x in Path(csvout).read_text().splitlines()]
-    assert 'query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in cl_results[0]
-    assert 'test1,match,species,0.0885520542481053,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.05701254275940707,444000' in cl_results[1]
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in cl_results[0]
+    )
+    assert (
+        "test1,match,species,0.0885520542481053,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.05701254275940707,444000"
+        in cl_results[1]
+    )
 
 
 def test_genome_rank_krona(runtmp):
     # test basic genome - output csv
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
     csv_base = "out"
     cl_csv = csv_base + ".krona.tsv"
     csvout = runtmp.output(cl_csv)
     outdir = os.path.dirname(csvout)
     print("csvout: ", csvout)
 
-    c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', tax,
-                   '--rank', 'species', '-o', csv_base, '--containment-threshold', '0',
-                   '--output-format', 'krona', '--output-dir', outdir)
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "--rank",
+        "species",
+        "-o",
+        csv_base,
+        "--containment-threshold",
+        "0",
+        "--output-format",
+        "krona",
+        "--output-dir",
+        outdir,
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -1249,26 +2313,59 @@ def test_genome_rank_krona(runtmp):
 
     assert f"saving 'krona' output to '{csvout}'" in runtmp.last_result.err
     assert c.last_result.status == 0
-    kr_results = [x.rstrip().split('\t') for x in Path(csvout).read_text().splitlines()]
+    kr_results = [x.rstrip().split("\t") for x in Path(csvout).read_text().splitlines()]
     print(kr_results)
-    assert ['fraction', 'superkingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species']  == kr_results[0]
-    assert ['0.0885520542481053', 'd__Bacteria', 'p__Bacteroidota', 'c__Bacteroidia', 'o__Bacteroidales', 'f__Bacteroidaceae', 'g__Prevotella', 's__Prevotella copri'] == kr_results[1]
+    assert [
+        "fraction",
+        "superkingdom",
+        "phylum",
+        "class",
+        "order",
+        "family",
+        "genus",
+        "species",
+    ] == kr_results[0]
+    assert [
+        "0.0885520542481053",
+        "d__Bacteria",
+        "p__Bacteroidota",
+        "c__Bacteroidia",
+        "o__Bacteroidales",
+        "f__Bacteroidaceae",
+        "g__Prevotella",
+        "s__Prevotella copri",
+    ] == kr_results[1]
 
 
 def test_genome_rank_human_output(runtmp):
     # test basic genome - output csv
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
     csv_base = "out"
-    csvout = runtmp.output(csv_base + '.human.txt')
+    csvout = runtmp.output(csv_base + ".human.txt")
     outdir = os.path.dirname(csvout)
     print("csvout: ", csvout)
 
-    c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', tax,
-                   '--rank', 'species', '-o', csv_base, '--containment-threshold', '0',
-                   '--output-format', 'human', '--output-dir', outdir)
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "--rank",
+        "species",
+        "-o",
+        csv_base,
+        "--containment-threshold",
+        "0",
+        "--output-format",
+        "human",
+        "--output-dir",
+        outdir,
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -1282,27 +2379,45 @@ def test_genome_rank_human_output(runtmp):
         print(outp)
 
     assert len(outp) == 3
-    outp = [ x.strip() for x in outp ]
+    outp = [x.strip() for x in outp]
 
-    assert outp[0] == 'sample name    status    proportion   cANI   lineage'
-    assert outp[1] == '-----------    ------    ----------   ----   -------'
-    assert outp[2] == 'test1             match     5.7%     92.5%  d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri'
+    assert outp[0] == "sample name    status    proportion   cANI   lineage"
+    assert outp[1] == "-----------    ------    ----------   ----   -------"
+    assert (
+        outp[2]
+        == "test1             match     5.7%     92.5%  d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri"
+    )
 
 
 def test_genome_rank_lineage_csv_output(runtmp):
     # test basic genome - output csv
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
     csv_base = "out"
-    csvout = runtmp.output(csv_base + '.lineage.csv')
+    csvout = runtmp.output(csv_base + ".lineage.csv")
     outdir = os.path.dirname(csvout)
     print("csvout: ", csvout)
 
-    c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', tax,
-                   '--rank', 'species', '-o', csv_base, '--containment-threshold', '0',
-                   '--output-format', 'lineage_csv', '--output-dir', outdir)
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "--rank",
+        "species",
+        "-o",
+        csv_base,
+        "--containment-threshold",
+        "0",
+        "--output-format",
+        "lineage_csv",
+        "--output-dir",
+        outdir,
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -1314,169 +2429,291 @@ def test_genome_rank_lineage_csv_output(runtmp):
         outp = fp.readlines()
 
     assert len(outp) == 2
-    outp = [ x.strip() for x in outp ]
+    outp = [x.strip() for x in outp]
 
-    assert outp[0] == 'ident,superkingdom,phylum,class,order,family,genus,species'
-    assert outp[1] == 'test1,d__Bacteria,p__Bacteroidota,c__Bacteroidia,o__Bacteroidales,f__Bacteroidaceae,g__Prevotella,s__Prevotella copri'
+    assert outp[0] == "ident,superkingdom,phylum,class,order,family,genus,species"
+    assert (
+        outp[1]
+        == "test1,d__Bacteria,p__Bacteroidota,c__Bacteroidia,o__Bacteroidales,f__Bacteroidaceae,g__Prevotella,s__Prevotella copri"
+    )
 
 
 def test_genome_gather_from_file_rank(runtmp):
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    g_res = utils.get_test_data('tax/test1.gather.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    g_res = utils.get_test_data("tax/test1.gather.csv")
     g_from_file = runtmp.output("tmp-from-file.txt")
-    with open(g_from_file, 'w') as f_csv:
+    with open(g_from_file, "w") as f_csv:
         f_csv.write(f"{g_res}\n")
 
-    c.run_sourmash('tax', 'genome', '--from-file', g_from_file, '--taxonomy-csv', taxonomy_csv,
-                   '--rank', 'species', '--containment-threshold', '0')
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "--from-file",
+        g_from_file,
+        "--taxonomy-csv",
+        taxonomy_csv,
+        "--rank",
+        "species",
+        "--containment-threshold",
+        "0",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert 'query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000' in c.last_result.out
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000"
+        in c.last_result.out
+    )
 
 
 def test_genome_gather_two_files(runtmp):
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    g_res = utils.get_test_data('tax/test1.gather.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    g_res = utils.get_test_data("tax/test1.gather.csv")
 
     # make test2 results (identical to test1 except query_name and filename)
     g_res2 = runtmp.output("test2.gather.csv")
-    test2_results = [x.replace("test1", "test2") + "\n" for x in Path(g_res).read_text().splitlines()]
-    with open(g_res2, 'w') as fp:
+    test2_results = [
+        x.replace("test1", "test2") + "\n" for x in Path(g_res).read_text().splitlines()
+    ]
+    with open(g_res2, "w") as fp:
         fp.writelines(test2_results)
 
-    c.run_sourmash('tax', 'genome', '-g', g_res, g_res2, '--taxonomy-csv', taxonomy_csv,
-                   '--rank', 'species', '--containment-threshold', '0')
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        g_res,
+        g_res2,
+        "--taxonomy-csv",
+        taxonomy_csv,
+        "--rank",
+        "species",
+        "--containment-threshold",
+        "0",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert 'query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000' in c.last_result.out
-    assert 'test2,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test2.sig,0.057,444000' in c.last_result.out
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000"
+        in c.last_result.out
+    )
+    assert (
+        "test2,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test2.sig,0.057,444000"
+        in c.last_result.out
+    )
 
 
 def test_genome_gather_two_files_empty_force(runtmp):
     # make test2 results (identical to test1 except query_name and filename)
     # add an empty file too, with --force -> should work
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    g_res = utils.get_test_data('tax/test1.gather.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    g_res = utils.get_test_data("tax/test1.gather.csv")
 
-    g_empty_csv = runtmp.output('g_empty.csv')
+    g_empty_csv = runtmp.output("g_empty.csv")
     with open(g_empty_csv, "w") as fp:
         fp.write("")
     print("g_csv: ", g_empty_csv)
 
     g_res2 = runtmp.output("test2.gather.csv")
-    test2_results = [x.replace("test1", "test2") + "\n" for x in Path(g_res).read_text().splitlines()]
-    with open(g_res2, 'w') as fp:
+    test2_results = [
+        x.replace("test1", "test2") + "\n" for x in Path(g_res).read_text().splitlines()
+    ]
+    with open(g_res2, "w") as fp:
         fp.writelines(test2_results)
 
-    c.run_sourmash('tax', 'genome', '-g', g_res, g_res2, '-g', g_empty_csv,
-                   '--taxonomy-csv', taxonomy_csv,
-                   '--rank', 'species', '--containment-threshold', '0',
-                   '--force')
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        g_res,
+        g_res2,
+        "-g",
+        g_empty_csv,
+        "--taxonomy-csv",
+        taxonomy_csv,
+        "--rank",
+        "species",
+        "--containment-threshold",
+        "0",
+        "--force",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert 'query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000' in c.last_result.out
-    assert 'test2,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test2.sig,0.057,444000' in c.last_result.out
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000"
+        in c.last_result.out
+    )
+    assert (
+        "test2,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test2.sig,0.057,444000"
+        in c.last_result.out
+    )
 
 
 def test_genome_gather_duplicate_filename(runtmp):
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    g_res = utils.get_test_data('tax/test1.gather.csv')
-
-    c.run_sourmash('tax', 'genome', '--gather-csv', g_res, '-g', g_res, '--taxonomy-csv', taxonomy_csv,
-                   '--rank', 'species', '--containment-threshold', '0')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    g_res = utils.get_test_data("tax/test1.gather.csv")
+
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "--gather-csv",
+        g_res,
+        "-g",
+        g_res,
+        "--taxonomy-csv",
+        taxonomy_csv,
+        "--rank",
+        "species",
+        "--containment-threshold",
+        "0",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert f'ignoring duplicated reference to file: {g_res}'
-    assert 'query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000' in c.last_result.out
+    assert f"ignoring duplicated reference to file: {g_res}"
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000"
+        in c.last_result.out
+    )
 
 
 def test_genome_gather_from_file_duplicate_filename(runtmp):
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    g_res = utils.get_test_data('tax/test1.gather.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    g_res = utils.get_test_data("tax/test1.gather.csv")
     g_from_file = runtmp.output("tmp-from-file.txt")
-    with open(g_from_file, 'w') as f_csv:
+    with open(g_from_file, "w") as f_csv:
         f_csv.write(f"{g_res}\n")
         f_csv.write(f"{g_res}\n")
 
-    c.run_sourmash('tax', 'genome', '--from-file', g_from_file, '--taxonomy-csv', taxonomy_csv,
-                   '--rank', 'species', '--containment-threshold', '0')
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "--from-file",
+        g_from_file,
+        "--taxonomy-csv",
+        taxonomy_csv,
+        "--rank",
+        "species",
+        "--containment-threshold",
+        "0",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert f'ignoring duplicated reference to file: {g_res}'
-    assert 'query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000' in c.last_result.out
+    assert f"ignoring duplicated reference to file: {g_res}"
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000"
+        in c.last_result.out
+    )
 
 
 def test_genome_gather_from_file_duplicate_query(runtmp):
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    g_res = utils.get_test_data('tax/test1.gather.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    g_res = utils.get_test_data("tax/test1.gather.csv")
 
     # different filename, contents identical to test1
     g_res2 = runtmp.output("test2.gather.csv")
-    with open(g_res2, 'w') as fp:
+    with open(g_res2, "w") as fp:
         fp.write(Path(g_res).read_text())
 
     g_from_file = runtmp.output("tmp-from-file.txt")
-    with open(g_from_file, 'w') as f_csv:
+    with open(g_from_file, "w") as f_csv:
         f_csv.write(f"{g_res}\n")
         f_csv.write(f"{g_res2}\n")
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'genome', '--from-file', g_from_file, '--taxonomy-csv', taxonomy_csv,
-                   '--rank', 'species', '--containment-threshold', '0')
+        c.run_sourmash(
+            "tax",
+            "genome",
+            "--from-file",
+            g_from_file,
+            "--taxonomy-csv",
+            taxonomy_csv,
+            "--rank",
+            "species",
+            "--containment-threshold",
+            "0",
+        )
     assert c.last_result.status == -1
     print(str(exc.value))
-    assert "Gather query test1 was found in more than one CSV. Cannot load from " in str(exc.value)
+    assert (
+        "Gather query test1 was found in more than one CSV. Cannot load from "
+        in str(exc.value)
+    )
 
 
 def test_genome_gather_from_file_duplicate_query_force(runtmp):
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    g_res = utils.get_test_data('tax/test1.gather.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    g_res = utils.get_test_data("tax/test1.gather.csv")
 
     # different filename, contents identical to test1
     g_res2 = runtmp.output("test2.gather.csv")
-    with open(g_res2, 'w') as fp:
+    with open(g_res2, "w") as fp:
         fp.write(Path(g_res).read_text())
 
     g_from_file = runtmp.output("tmp-from-file.txt")
-    with open(g_from_file, 'w') as f_csv:
+    with open(g_from_file, "w") as f_csv:
         f_csv.write(f"{g_res}\n")
         f_csv.write(f"{g_res2}\n")
 
-    with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'genome', '--from-file', g_from_file, '--taxonomy-csv', taxonomy_csv,
-                   '--rank', 'species', '--containment-threshold', '0', '--force')
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash(
+            "tax",
+            "genome",
+            "--from-file",
+            g_from_file,
+            "--taxonomy-csv",
+            taxonomy_csv,
+            "--rank",
+            "species",
+            "--containment-threshold",
+            "0",
+            "--force",
+        )
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -1490,70 +2727,119 @@ def test_genome_gather_from_file_duplicate_query_force(runtmp):
 
 def test_genome_gather_cli_and_from_file(runtmp):
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    g_res = utils.get_test_data('tax/test1.gather.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    g_res = utils.get_test_data("tax/test1.gather.csv")
     g_from_file = runtmp.output("tmp-from-file.txt")
 
     # make test2 results (identical to test1 except query_name)
     g_res2 = runtmp.output("test2.gather.csv")
-    test2_results = [x.replace("test1", "test2") + "\n" for x in Path(g_res).read_text().splitlines()]
-    with open(g_res2, 'w') as fp:
+    test2_results = [
+        x.replace("test1", "test2") + "\n" for x in Path(g_res).read_text().splitlines()
+    ]
+    with open(g_res2, "w") as fp:
         fp.writelines(test2_results)
 
     # write test2 csv to a text file for input
     g_from_file = runtmp.output("tmp-from-file.txt")
-    with open(g_from_file, 'w') as f_csv:
+    with open(g_from_file, "w") as f_csv:
         f_csv.write(f"{g_res2}\n")
 
-    c.run_sourmash('tax', 'genome', '-g', g_res, '--from-file', g_from_file, '--taxonomy-csv', taxonomy_csv,
-                   '--rank', 'species', '--containment-threshold', '0')
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        g_res,
+        "--from-file",
+        g_from_file,
+        "--taxonomy-csv",
+        taxonomy_csv,
+        "--rank",
+        "species",
+        "--containment-threshold",
+        "0",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert 'query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000' in c.last_result.out
-    assert 'test2,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test2.sig,0.057,444000' in c.last_result.out
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000"
+        in c.last_result.out
+    )
+    assert (
+        "test2,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test2.sig,0.057,444000"
+        in c.last_result.out
+    )
 
 
 def test_genome_gather_cli_and_from_file_duplicate_filename(runtmp):
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    g_res = utils.get_test_data('tax/test1.gather.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    g_res = utils.get_test_data("tax/test1.gather.csv")
     g_from_file = runtmp.output("tmp-from-file.txt")
 
     # also write test1 csv to a text file for input
     g_from_file = runtmp.output("tmp-from-file.txt")
-    with open(g_from_file, 'w') as f_csv:
+    with open(g_from_file, "w") as f_csv:
         f_csv.write(f"{g_res}\n")
 
-    c.run_sourmash('tax', 'genome', '-g', g_res, '--from-file', g_from_file, '--taxonomy-csv', taxonomy_csv,
-                   '--rank', 'species', '--containment-threshold', '0')
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        g_res,
+        "--from-file",
+        g_from_file,
+        "--taxonomy-csv",
+        taxonomy_csv,
+        "--rank",
+        "species",
+        "--containment-threshold",
+        "0",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert f'ignoring duplicated reference to file: {g_res}' in c.last_result.err
-    assert 'query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000' in c.last_result.out
+    assert f"ignoring duplicated reference to file: {g_res}" in c.last_result.err
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000"
+        in c.last_result.out
+    )
 
 
 def test_genome_gather_from_file_below_threshold(runtmp):
     # What do we want the results from this to be? I think I initially thought we shouldn't report anything,
     # but wouldn't a "below_threshold" + superkingdom result (here, 0.204) be helpful information?
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    g_res = utils.get_test_data('tax/test1.gather.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    g_res = utils.get_test_data("tax/test1.gather.csv")
     g_from_file = runtmp.output("tmp-from-file.txt")
-    with open(g_from_file, 'w') as f_csv:
+    with open(g_from_file, "w") as f_csv:
         f_csv.write(f"{g_res}\n")
 
-    c.run_sourmash('tax', 'genome', '--from-file', g_from_file, '--taxonomy-csv', taxonomy_csv,
-                   '--containment-threshold', '1')
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "--from-file",
+        g_from_file,
+        "--taxonomy-csv",
+        taxonomy_csv,
+        "--containment-threshold",
+        "1",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -1565,53 +2851,75 @@ def test_genome_gather_from_file_below_threshold(runtmp):
 
 
 def test_genome_gather_two_queries(runtmp):
-    '''
+    """
     This checks for initial bug where classification
     would only happen for one genome per rank when
     doing --containment-threshold classification
-    '''
+    """
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    g_res = utils.get_test_data('tax/47+63_x_gtdb-rs202.gather.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    g_res = utils.get_test_data("tax/47+63_x_gtdb-rs202.gather.csv")
 
     # split 47+63 into two fake queries: q47, q63
     g_res2 = runtmp.output("two-queries.gather.csv")
     q2_results = [x + "\n" for x in Path(g_res).read_text().splitlines()]
     # rename queries
-    q2_results[1] = q2_results[1].replace('47+63', 'q47')
-    q2_results[2] = q2_results[2].replace('47+63', 'q63')
-    with open(g_res2, 'w') as fp:
+    q2_results[1] = q2_results[1].replace("47+63", "q47")
+    q2_results[2] = q2_results[2].replace("47+63", "q63")
+    with open(g_res2, "w") as fp:
         for line in q2_results:
             print(line)
             fp.write(line)
 
-    c.run_sourmash('tax', 'genome', '-g', g_res2, '--taxonomy-csv', taxonomy_csv,
-                   '--containment-threshold', '0')
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        g_res2,
+        "--taxonomy-csv",
+        taxonomy_csv,
+        "--containment-threshold",
+        "0",
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
     assert "query_name,status,rank,fraction,lineage" in c.last_result.out
-    assert "q63,match,species,0.336,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Shewanellaceae;g__Shewanella;s__Shewanella baltica,491c0a81," in c.last_result.out
-    assert "q47,match,species,0.664,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Shewanellaceae;g__Shewanella;s__Shewanella baltica," in c.last_result.out
+    assert (
+        "q63,match,species,0.336,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Shewanellaceae;g__Shewanella;s__Shewanella baltica,491c0a81,"
+        in c.last_result.out
+    )
+    assert (
+        "q47,match,species,0.664,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Shewanellaceae;g__Shewanella;s__Shewanella baltica,"
+        in c.last_result.out
+    )
 
 
 def test_genome_rank_duplicated_taxonomy_fail(runtmp):
     c = runtmp
     # write temp taxonomy with duplicates
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
     duplicated_csv = runtmp.output("duplicated_taxonomy.csv")
-    with open(duplicated_csv, 'w') as dup:
+    with open(duplicated_csv, "w") as dup:
         tax = [x.rstrip() for x in Path(taxonomy_csv).read_text().splitlines()]
-        tax.append(tax[1] + 'FOO') # add first tax_assign again
+        tax.append(tax[1] + "FOO")  # add first tax_assign again
         dup.write("\n".join(tax))
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', duplicated_csv,
-                       '--rank', 'species')
+        c.run_sourmash(
+            "tax",
+            "genome",
+            "-g",
+            g_csv,
+            "--taxonomy-csv",
+            duplicated_csv,
+            "--rank",
+            "species",
+        )
     assert "cannot read taxonomy assignments" in str(exc.value)
     assert "multiple lineages for identifier GCF_001881345" in str(exc.value)
 
@@ -1620,16 +2928,16 @@ def test_genome_rank_duplicated_taxonomy_fail_lineages(runtmp):
     # write temp taxonomy with duplicates => lineages-style file
     c = runtmp
 
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
     taxdb = tax_utils.LineageDB.load(taxonomy_csv)
 
     for k, v in taxdb.items():
         print(k, v)
 
-    lineage_csv = runtmp.output('lin.csv')
-    with open(lineage_csv, 'w', newline="") as fp:
+    lineage_csv = runtmp.output("lin.csv")
+    with open(lineage_csv, "w", newline="") as fp:
         w = csv.writer(fp)
-        w.writerow(['name', 'lineage'])
+        w.writerow(["name", "lineage"])
         for k, v in taxdb.items():
             linstr = lca_utils.display_lineage(v)
             w.writerow([k, linstr])
@@ -1640,7 +2948,7 @@ def test_genome_rank_duplicated_taxonomy_fail_lineages(runtmp):
             w.writerow([k, linstr])
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'summarize', lineage_csv)
+        c.run_sourmash("tax", "summarize", lineage_csv)
         print(c.last_result.out)
         print(c.last_result.err)
 
@@ -1651,174 +2959,292 @@ def test_genome_rank_duplicated_taxonomy_fail_lineages(runtmp):
 def test_genome_rank_duplicated_taxonomy_force(runtmp):
     # write temp taxonomy with duplicates
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
     duplicated_csv = runtmp.output("duplicated_taxonomy.csv")
-    with open(duplicated_csv, 'w') as dup:
+    with open(duplicated_csv, "w") as dup:
         tax = [x.rstrip() for x in Path(taxonomy_csv).read_text().splitlines()]
-        tax.append(tax[1]) # add first tax_assign again
+        tax.append(tax[1])  # add first tax_assign again
         dup.write("\n".join(tax))
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-
-    c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', duplicated_csv,
-                   '--rank', 'species', '--force', '--containment-threshold', '0')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        duplicated_csv,
+        "--rank",
+        "species",
+        "--force",
+        "--containment-threshold",
+        "0",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert 'query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000' in c.last_result.out
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000"
+        in c.last_result.out
+    )
 
 
 def test_genome_missing_taxonomy_ignore_threshold(runtmp):
     c = runtmp
     # write temp taxonomy with missing entry
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
     subset_csv = runtmp.output("subset_taxonomy.csv")
-    with open(subset_csv, 'w') as subset:
+    with open(subset_csv, "w") as subset:
         tax = [x.rstrip() for x in Path(taxonomy_csv).read_text().splitlines()]
-        tax = [tax[0]] + tax[2:] # remove the best match (1st tax entry)
+        tax = [tax[0]] + tax[2:]  # remove the best match (1st tax entry)
         subset.write("\n".join(tax))
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-
-    c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', subset_csv, '--containment-threshold', '0')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        subset_csv,
+        "--containment-threshold",
+        "0",
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert "The following are missing from the taxonomy information: GCF_001881345" in c.last_result.err
-    assert 'query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000' in c.last_result.out
+    assert (
+        "The following are missing from the taxonomy information: GCF_001881345"
+        in c.last_result.err
+    )
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000"
+        in c.last_result.out
+    )
 
 
 def test_genome_missing_taxonomy_recover_with_second_tax_file(runtmp):
     c = runtmp
     # write temp taxonomy with missing entry
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
     subset_csv = runtmp.output("subset_taxonomy.csv")
-    with open(subset_csv, 'w') as subset:
+    with open(subset_csv, "w") as subset:
         tax = [x.rstrip() for x in Path(taxonomy_csv).read_text().splitlines()]
-        tax = [tax[0]] + tax[2:] # remove the best match (1st tax entry)
+        tax = [tax[0]] + tax[2:]  # remove the best match (1st tax entry)
         subset.write("\n".join(tax))
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-
-    c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', subset_csv, '-t', taxonomy_csv, '--containment-threshold', '0')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        subset_csv,
+        "-t",
+        taxonomy_csv,
+        "--containment-threshold",
+        "0",
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert "The following are missing from the taxonomy information: GCF_001881345" not in c.last_result.err
-    assert 'query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000' in c.last_result.out
+    assert (
+        "The following are missing from the taxonomy information: GCF_001881345"
+        not in c.last_result.err
+    )
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000"
+        in c.last_result.out
+    )
 
 
 def test_genome_missing_taxonomy_ignore_rank(runtmp):
     c = runtmp
     # write temp taxonomy with missing entry
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
     subset_csv = runtmp.output("subset_taxonomy.csv")
-    with open(subset_csv, 'w') as subset:
+    with open(subset_csv, "w") as subset:
         tax = [x.rstrip() for x in Path(taxonomy_csv).read_text().splitlines()]
-        tax = [tax[0]] + tax[2:] # remove the best match (1st tax entry)
+        tax = [tax[0]] + tax[2:]  # remove the best match (1st tax entry)
         subset.write("\n".join(tax))
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
-    c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', subset_csv, '--rank', 'species')
+    c.run_sourmash(
+        "tax", "genome", "-g", g_csv, "--taxonomy-csv", subset_csv, "--rank", "species"
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert "The following are missing from the taxonomy information: GCF_001881345" in c.last_result.err
-    assert 'query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,below_threshold,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000' in c.last_result.out
+    assert (
+        "The following are missing from the taxonomy information: GCF_001881345"
+        in c.last_result.err
+    )
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,below_threshold,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000"
+        in c.last_result.out
+    )
 
 
 def test_genome_multiple_taxonomy_files(runtmp):
     c = runtmp
     # write temp taxonomy with missing entry
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
     subset_csv = runtmp.output("subset_taxonomy.csv")
-    with open(subset_csv, 'w') as subset:
+    with open(subset_csv, "w") as subset:
         tax = [x.rstrip() for x in Path(taxonomy_csv).read_text().splitlines()]
-        tax = [tax[0]] + tax[2:] # remove the best match (1st tax entry)
+        tax = [tax[0]] + tax[2:]  # remove the best match (1st tax entry)
         subset.write("\n".join(tax))
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
     # using mult -t args
-    c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', subset_csv, '-t', taxonomy_csv)
+    c.run_sourmash(
+        "tax", "genome", "-g", g_csv, "--taxonomy-csv", subset_csv, "-t", taxonomy_csv
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert "The following are missing from the taxonomy information: GCF_001881345" not in c.last_result.err
-    assert 'query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,match,family,0.116,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae,md5,test1.sig,0.073,582000,' in c.last_result.out
+    assert (
+        "The following are missing from the taxonomy information: GCF_001881345"
+        not in c.last_result.err
+    )
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,match,family,0.116,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae,md5,test1.sig,0.073,582000,"
+        in c.last_result.out
+    )
     # using single -t arg
-    c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', subset_csv, taxonomy_csv)
+    c.run_sourmash(
+        "tax", "genome", "-g", g_csv, "--taxonomy-csv", subset_csv, taxonomy_csv
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert "The following are missing from the taxonomy information: GCF_001881345" not in c.last_result.err
-    assert 'query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,match,family,0.116,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae,md5,test1.sig,0.073,582000,' in c.last_result.out
+    assert (
+        "The following are missing from the taxonomy information: GCF_001881345"
+        not in c.last_result.err
+    )
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,match,family,0.116,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae,md5,test1.sig,0.073,582000,"
+        in c.last_result.out
+    )
 
 
 def test_genome_multiple_taxonomy_files_empty_force(runtmp):
     c = runtmp
     # write temp taxonomy with missing entry, as well as an empty file,
     # and use force
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
     subset_csv = runtmp.output("subset_taxonomy.csv")
-    with open(subset_csv, 'w') as subset:
+    with open(subset_csv, "w") as subset:
         tax = [x.rstrip() for x in Path(taxonomy_csv).read_text().splitlines()]
-        tax = [tax[0]] + tax[2:] # remove the best match (1st tax entry)
+        tax = [tax[0]] + tax[2:]  # remove the best match (1st tax entry)
         subset.write("\n".join(tax))
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
-    empty_tax = runtmp.output('tax_empty.txt')
+    empty_tax = runtmp.output("tax_empty.txt")
     with open(empty_tax, "w") as fp:
         fp.write("")
-    
+
     # using mult -t args
-    c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', subset_csv, '-t', taxonomy_csv, '-t', empty_tax, '--force')
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        subset_csv,
+        "-t",
+        taxonomy_csv,
+        "-t",
+        empty_tax,
+        "--force",
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert "The following are missing from the taxonomy information: GCF_001881345" not in c.last_result.err
-    assert 'query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,match,family,0.116,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae,md5,test1.sig,0.073,582000,' in c.last_result.out
+    assert (
+        "The following are missing from the taxonomy information: GCF_001881345"
+        not in c.last_result.err
+    )
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,match,family,0.116,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae,md5,test1.sig,0.073,582000,"
+        in c.last_result.out
+    )
 
 
 def test_genome_missing_taxonomy_fail_threshold(runtmp):
     c = runtmp
     # write temp taxonomy with missing entry
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
     subset_csv = runtmp.output("subset_taxonomy.csv")
-    with open(subset_csv, 'w') as subset:
+    with open(subset_csv, "w") as subset:
         tax = [x.rstrip() for x in Path(taxonomy_csv).read_text().splitlines()]
-        tax = [tax[0]] + tax[2:] # remove the best match (1st tax entry)
+        tax = [tax[0]] + tax[2:]  # remove the best match (1st tax entry)
         subset.write("\n".join(tax))
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', subset_csv,
-                       '--fail-on-missing-taxonomy', '--containment-threshold', '0')
+        c.run_sourmash(
+            "tax",
+            "genome",
+            "-g",
+            g_csv,
+            "--taxonomy-csv",
+            subset_csv,
+            "--fail-on-missing-taxonomy",
+            "--containment-threshold",
+            "0",
+        )
 
     print(str(exc.value))
     print(c.last_result.status)
@@ -1833,18 +3259,27 @@ def test_genome_missing_taxonomy_fail_threshold(runtmp):
 def test_genome_missing_taxonomy_fail_rank(runtmp):
     c = runtmp
     # write temp taxonomy with missing entry
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
     subset_csv = runtmp.output("subset_taxonomy.csv")
-    with open(subset_csv, 'w') as subset:
+    with open(subset_csv, "w") as subset:
         tax = [x.rstrip() for x in Path(taxonomy_csv).read_text().splitlines()]
-        tax = [tax[0]] + tax[2:] # remove the best match (1st tax entry)
+        tax = [tax[0]] + tax[2:]  # remove the best match (1st tax entry)
         subset.write("\n".join(tax))
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', subset_csv,
-                       '--fail-on-missing-taxonomy', '--rank', 'species')
+        c.run_sourmash(
+            "tax",
+            "genome",
+            "-g",
+            g_csv,
+            "--taxonomy-csv",
+            subset_csv,
+            "--fail-on-missing-taxonomy",
+            "--rank",
+            "species",
+        )
 
     print(str(exc.value))
     print(c.last_result.status)
@@ -1859,12 +3294,22 @@ def test_genome_missing_taxonomy_fail_rank(runtmp):
 def test_genome_rank_not_available(runtmp):
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', tax,
-                       '--rank', 'strain', '--containment-threshold', '0')
+        c.run_sourmash(
+            "tax",
+            "genome",
+            "-g",
+            g_csv,
+            "--taxonomy-csv",
+            tax,
+            "--rank",
+            "strain",
+            "--containment-threshold",
+            "0",
+        )
 
     print(str(exc.value))
     print(c.last_result.status)
@@ -1872,22 +3317,32 @@ def test_genome_rank_not_available(runtmp):
     print(c.last_result.err)
 
     assert c.last_result.status == -1
-    assert "No taxonomic information provided for rank strain: cannot classify at this rank" in str(exc.value)
+    assert (
+        "No taxonomic information provided for rank strain: cannot classify at this rank"
+        in str(exc.value)
+    )
 
 
 def test_genome_empty_gather_results_with_header_single(runtmp):
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
     gather_results = [x for x in Path(g_csv).read_text().splitlines()]
-    empty_gather_with_header = runtmp.output('g_header.csv')
+    empty_gather_with_header = runtmp.output("g_header.csv")
     # write temp empty gather results (header only)
     with open(empty_gather_with_header, "w") as fp:
         fp.write(gather_results[0])
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'genome', '-g', empty_gather_with_header, '--taxonomy-csv', taxonomy_csv)
+        c.run_sourmash(
+            "tax",
+            "genome",
+            "-g",
+            empty_gather_with_header,
+            "--taxonomy-csv",
+            taxonomy_csv,
+        )
 
     print(str(exc.value))
     print(c.last_result.status)
@@ -1895,44 +3350,48 @@ def test_genome_empty_gather_results_with_header_single(runtmp):
     print(c.last_result.err)
 
     assert c.last_result.status == -1
-    assert f'No gather results loaded from {empty_gather_with_header}.' in str(exc.value)
-    assert 'Exiting.' in str(exc.value)
+    assert f"No gather results loaded from {empty_gather_with_header}." in str(
+        exc.value
+    )
+    assert "Exiting." in str(exc.value)
 
 
 def test_genome_empty_gather_results_single(runtmp):
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
 
     # write temp empty gather results
-    empty_tax = runtmp.output('tax_header.csv')
+    empty_tax = runtmp.output("tax_header.csv")
     with open(empty_tax, "w") as fp:
         fp.write("")
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'genome', '-g', empty_tax, '--taxonomy-csv', taxonomy_csv)
-
+        c.run_sourmash("tax", "genome", "-g", empty_tax, "--taxonomy-csv", taxonomy_csv)
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == -1
-    assert f"Cannot read gather results from '{empty_tax}'. Is file empty?" in str(exc.value)
-    assert 'Exiting.' in c.last_result.err
+    assert f"Cannot read gather results from '{empty_tax}'. Is file empty?" in str(
+        exc.value
+    )
+    assert "Exiting." in c.last_result.err
 
 
 def test_genome_empty_gather_results_single_force(runtmp):
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
 
     # write temp empty gather results (header only)
-    empty_tax = runtmp.output('tax_header.csv')
+    empty_tax = runtmp.output("tax_header.csv")
     with open(empty_tax, "w") as fp:
         fp.write("")
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'genome', '-g', empty_tax, '--taxonomy-csv', taxonomy_csv,
-                       '--force')
+        c.run_sourmash(
+            "tax", "genome", "-g", empty_tax, "--taxonomy-csv", taxonomy_csv, "--force"
+        )
 
     print(str(exc.value))
     print(c.last_result.status)
@@ -1940,26 +3399,40 @@ def test_genome_empty_gather_results_single_force(runtmp):
     print(c.last_result.err)
 
     assert c.last_result.status == -1
-    assert '--force is set. Attempting to continue to next set of gather results.' in str(exc.value)
-    assert 'No results for classification. Exiting.' in str(exc.value)
+    assert (
+        "--force is set. Attempting to continue to next set of gather results."
+        in str(exc.value)
+    )
+    assert "No results for classification. Exiting." in str(exc.value)
 
 
 def test_genome_empty_gather_results_with_empty_csv_force(runtmp):
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
 
     # write temp empty gather results
-    empty_tax = runtmp.output('tax_empty.txt')
+    empty_tax = runtmp.output("tax_empty.txt")
     with open(empty_tax, "w") as fp:
         fp.write("")
 
     g_from_file = runtmp.output("tmp-from-csv.csv")
-    with open(g_from_file, 'w') as f_csv:
+    with open(g_from_file, "w") as f_csv:
         f_csv.write(f"{empty_tax}\n")
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'genome', '-g', empty_tax, '--from-file', g_from_file,
-                       '--taxonomy-csv', taxonomy_csv, '--rank', 'species', '--force')
+        c.run_sourmash(
+            "tax",
+            "genome",
+            "-g",
+            empty_tax,
+            "--from-file",
+            g_from_file,
+            "--taxonomy-csv",
+            taxonomy_csv,
+            "--rank",
+            "species",
+            "--force",
+        )
 
     print(str(exc.value))
     print(c.last_result.status)
@@ -1967,48 +3440,80 @@ def test_genome_empty_gather_results_with_empty_csv_force(runtmp):
     print(c.last_result.err)
 
     assert c.last_result.status == -1
-    assert '--force is set. Attempting to continue to next set of gather results.' in str(exc.value)
-    assert 'No results for classification. Exiting.' in str(exc.value)
+    assert (
+        "--force is set. Attempting to continue to next set of gather results."
+        in str(exc.value)
+    )
+    assert "No results for classification. Exiting." in str(exc.value)
 
 
 def test_genome_empty_gather_results_with_csv_force(runtmp):
     c = runtmp
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
 
-    g_res = utils.get_test_data('tax/test1.gather.csv')
+    g_res = utils.get_test_data("tax/test1.gather.csv")
     g_from_file = runtmp.output("tmp-from-file.txt")
-    with open(g_from_file, 'w') as f_csv:
+    with open(g_from_file, "w") as f_csv:
         f_csv.write(f"{g_res}\n")
 
     # write temp empty gather results
-    empty_tax = runtmp.output('tax_empty.csv')
+    empty_tax = runtmp.output("tax_empty.csv")
     with open(empty_tax, "w") as fp:
         fp.write("")
 
-    c.run_sourmash('tax', 'genome', '-g', empty_tax, '--from-file', g_from_file,
-                   '--taxonomy-csv', taxonomy_csv, '--rank', 'species',
-                   '--containment-threshold', '0', '--force')
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        empty_tax,
+        "--from-file",
+        g_from_file,
+        "--taxonomy-csv",
+        taxonomy_csv,
+        "--rank",
+        "species",
+        "--containment-threshold",
+        "0",
+        "--force",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert '--force is set. Attempting to continue to next set of gather results.' in c.last_result.err
-    assert 'loaded results for 1 queries from 1 gather CSVs' in c.last_result.err
-    assert 'query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000' in c.last_result.out
+    assert (
+        "--force is set. Attempting to continue to next set of gather results."
+        in c.last_result.err
+    )
+    assert "loaded results for 1 queries from 1 gather CSVs" in c.last_result.err
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000"
+        in c.last_result.out
+    )
 
 
 def test_genome_containment_threshold_bounds(runtmp):
     c = runtmp
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
     below_threshold = "-1"
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'genome', '-g', tax, '--taxonomy-csv', tax,
-                       '--containment-threshold', below_threshold)
+        c.run_sourmash(
+            "tax",
+            "genome",
+            "-g",
+            tax,
+            "--taxonomy-csv",
+            tax,
+            "--containment-threshold",
+            below_threshold,
+        )
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -2017,8 +3522,16 @@ def test_genome_containment_threshold_bounds(runtmp):
 
     above_threshold = "1.1"
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', tax,
-                       '--containment-threshold', above_threshold)
+        c.run_sourmash(
+            "tax",
+            "genome",
+            "-g",
+            g_csv,
+            "--taxonomy-csv",
+            tax,
+            "--containment-threshold",
+            above_threshold,
+        )
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -2028,13 +3541,21 @@ def test_genome_containment_threshold_bounds(runtmp):
 
 def test_genome_containment_threshold_type(runtmp):
     c = runtmp
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
     not_a_float = "str"
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', tax,
-                       '--containment-threshold', not_a_float)
+        c.run_sourmash(
+            "tax",
+            "genome",
+            "-g",
+            g_csv,
+            "--taxonomy-csv",
+            tax,
+            "--containment-threshold",
+            not_a_float,
+        )
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -2043,17 +3564,17 @@ def test_genome_containment_threshold_type(runtmp):
 
 
 def test_genome_over100percent_error(runtmp):
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
-    perfect_g_csv = runtmp.output('g.csv')
+    perfect_g_csv = runtmp.output("g.csv")
 
-    #create an impossible gather result
-    with open(g_csv, 'r') as fp:
-        r = csv.DictReader(fp, delimiter=',')
+    # create an impossible gather result
+    with open(g_csv) as fp:
+        r = csv.DictReader(fp, delimiter=",")
         header = r.fieldnames
         print(header)
-        with open(perfect_g_csv, 'w') as out_fp:
+        with open(perfect_g_csv, "w") as out_fp:
             w = csv.DictWriter(out_fp, header)
             w.writeheader()
             for n, row in enumerate(r):
@@ -2063,25 +3584,36 @@ def test_genome_over100percent_error(runtmp):
                 print(row)
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.run_sourmash('tax', 'genome', '-g', perfect_g_csv, '--taxonomy-csv', tax)
+        runtmp.run_sourmash("tax", "genome", "-g", perfect_g_csv, "--taxonomy-csv", tax)
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
     assert runtmp.last_result.status == -1
-    assert "fraction is > 100% of the query! This should not be possible." in runtmp.last_result.err
+    assert (
+        "fraction is > 100% of the query! This should not be possible."
+        in runtmp.last_result.err
+    )
 
 
 def test_genome_ani_threshold_input_errors(runtmp):
     c = runtmp
-    g_csv = utils.get_test_data('tax/test1.gather_old.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather_old.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
     below_threshold = "-1"
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'genome', '-g', tax, '--taxonomy-csv', tax,
-                       '--ani-threshold', below_threshold)
+        c.run_sourmash(
+            "tax",
+            "genome",
+            "-g",
+            tax,
+            "--taxonomy-csv",
+            tax,
+            "--ani-threshold",
+            below_threshold,
+        )
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -2090,8 +3622,16 @@ def test_genome_ani_threshold_input_errors(runtmp):
 
     above_threshold = "1.1"
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', tax,
-                       '--ani-threshold', above_threshold)
+        c.run_sourmash(
+            "tax",
+            "genome",
+            "-g",
+            g_csv,
+            "--taxonomy-csv",
+            tax,
+            "--ani-threshold",
+            above_threshold,
+        )
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -2101,8 +3641,16 @@ def test_genome_ani_threshold_input_errors(runtmp):
     not_a_float = "str"
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', tax,
-                       '--ani-threshold', not_a_float)
+        c.run_sourmash(
+            "tax",
+            "genome",
+            "-g",
+            g_csv,
+            "--taxonomy-csv",
+            tax,
+            "--ani-threshold",
+            not_a_float,
+        )
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -2112,49 +3660,76 @@ def test_genome_ani_threshold_input_errors(runtmp):
 
 def test_genome_ani_threshold(runtmp):
     c = runtmp
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
 
-    c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', tax,
-                       '--ani-threshold', "0.93") # note: I think this was previously a bug, if 0.95 produced the result below...
+    c.run_sourmash(
+        "tax", "genome", "-g", g_csv, "--taxonomy-csv", tax, "--ani-threshold", "0.93"
+    )  # note: I think this was previously a bug, if 0.95 produced the result below...
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert 'query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
-    assert 'test1,match,family,0.116,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae,md5,test1.sig,0.073,582000,0.93' in c.last_result.out 
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,match,family,0.116,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae,md5,test1.sig,0.073,582000,0.93"
+        in c.last_result.out
+    )
 
     # more lax threshold
-    c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', tax,
-                       '--ani-threshold', "0.9")
+    c.run_sourmash(
+        "tax", "genome", "-g", g_csv, "--taxonomy-csv", tax, "--ani-threshold", "0.9"
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert 'test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000'  in c.last_result.out
+    assert (
+        "test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000"
+        in c.last_result.out
+    )
 
     # too stringent of threshold (using rank)
-    c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', tax,
-                       '--ani-threshold', "1.0", '--rank', 'species')
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "--ani-threshold",
+        "1.0",
+        "--rank",
+        "species",
+    )
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
-    assert "test1,below_threshold,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000,0.92" in c.last_result.out
+    assert (
+        "test1,below_threshold,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000,0.92"
+        in c.last_result.out
+    )
 
 
 def test_genome_ani_oldgather(runtmp):
     # now fail if using gather <4.4
     c = runtmp
-    g_csv = utils.get_test_data('tax/test1.gather_old.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather_old.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', tax)
-    assert "is missing columns needed for taxonomic summarization. Please run gather with sourmash >= 4.4." in str(exc.value)
+        c.run_sourmash("tax", "genome", "-g", g_csv, "--taxonomy-csv", tax)
+    assert (
+        "is missing columns needed for taxonomic summarization. Please run gather with sourmash >= 4.4."
+        in str(exc.value)
+    )
     assert c.last_result.status == -1
 
 
@@ -2164,11 +3739,10 @@ def test_genome_ani_lemonade_classify(runtmp):
     c = runtmp
 
     ## first run gather
-    genome = utils.get_test_data('tax/lemonade-MAG3.sig.gz')
-    matches = utils.get_test_data('tax/lemonade-MAG3.x.gtdb.matches.zip')
+    genome = utils.get_test_data("tax/lemonade-MAG3.sig.gz")
+    matches = utils.get_test_data("tax/lemonade-MAG3.x.gtdb.matches.zip")
 
-    c.run_sourmash('gather', genome, matches,
-                   '--threshold-bp=5000', '-o', 'gather.csv')
+    c.run_sourmash("gather", genome, matches, "--threshold-bp=5000", "-o", "gather.csv")
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -2176,29 +3750,55 @@ def test_genome_ani_lemonade_classify(runtmp):
 
     assert c.last_result.status == 0
 
-    this_gather_file = c.output('gather.csv')
+    this_gather_file = c.output("gather.csv")
     this_gather = Path(this_gather_file).read_text().splitlines()
 
     assert len(this_gather) == 4
 
     ## now run 'tax genome' with human output
-    taxonomy_file = utils.get_test_data('tax/lemonade-MAG3.x.gtdb.matches.tax.csv')
-    c.run_sourmash('tax', 'genome', '-g', this_gather_file, '-t', taxonomy_file,
-                   '--ani', '0.8', '-F', 'human')
+    taxonomy_file = utils.get_test_data("tax/lemonade-MAG3.x.gtdb.matches.tax.csv")
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        this_gather_file,
+        "-t",
+        taxonomy_file,
+        "--ani",
+        "0.8",
+        "-F",
+        "human",
+    )
 
     output = c.last_result.out
-    assert 'MAG3_1            match     5.3%     91.0%  d__Bacteria;p__Bacteroidota;c__Chlorobia;o__Chlorobiales;f__Chlorobiaceae;g__Prosthecochloris;s__Prosthecochloris vibrioformis' in output
+    assert (
+        "MAG3_1            match     5.3%     91.0%  d__Bacteria;p__Bacteroidota;c__Chlorobia;o__Chlorobiales;f__Chlorobiaceae;g__Prosthecochloris;s__Prosthecochloris vibrioformis"
+        in output
+    )
 
     # aaand classify to lineage_csv
-    c.run_sourmash('tax', 'genome', '-g', this_gather_file, '-t', taxonomy_file,
-                   '--ani', '0.8', '-F', 'lineage_csv')
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        this_gather_file,
+        "-t",
+        taxonomy_file,
+        "--ani",
+        "0.8",
+        "-F",
+        "lineage_csv",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
     output = c.last_result.out
-    assert 'ident,superkingdom,phylum,class,order,family,genus,species' in output
-    assert 'MAG3_1,d__Bacteria,p__Bacteroidota,c__Chlorobia,o__Chlorobiales,f__Chlorobiaceae,g__Prosthecochloris,s__Prosthecochloris vibrioformis' in output
+    assert "ident,superkingdom,phylum,class,order,family,genus,species" in output
+    assert (
+        "MAG3_1,d__Bacteria,p__Bacteroidota,c__Chlorobia,o__Chlorobiales,f__Chlorobiaceae,g__Prosthecochloris,s__Prosthecochloris vibrioformis"
+        in output
+    )
 
 
 def test_genome_ani_lemonade_classify_estimate_ani_ci(runtmp):
@@ -2207,11 +3807,18 @@ def test_genome_ani_lemonade_classify_estimate_ani_ci(runtmp):
     c = runtmp
 
     ## first run gather
-    genome = utils.get_test_data('tax/lemonade-MAG3.sig.gz')
-    matches = utils.get_test_data('tax/lemonade-MAG3.x.gtdb.matches.zip')
-
-    c.run_sourmash('gather', genome, matches,
-                   '--threshold-bp=5000', '-o', 'gather.csv', '--estimate-ani')
+    genome = utils.get_test_data("tax/lemonade-MAG3.sig.gz")
+    matches = utils.get_test_data("tax/lemonade-MAG3.x.gtdb.matches.zip")
+
+    c.run_sourmash(
+        "gather",
+        genome,
+        matches,
+        "--threshold-bp=5000",
+        "-o",
+        "gather.csv",
+        "--estimate-ani",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -2219,36 +3826,62 @@ def test_genome_ani_lemonade_classify_estimate_ani_ci(runtmp):
 
     assert c.last_result.status == 0
 
-    this_gather_file = c.output('gather.csv')
+    this_gather_file = c.output("gather.csv")
     this_gather = Path(this_gather_file).read_text().splitlines()
 
     assert len(this_gather) == 4
 
     ## now run 'tax genome' with human output
-    taxonomy_file = utils.get_test_data('tax/lemonade-MAG3.x.gtdb.matches.tax.csv')
-    c.run_sourmash('tax', 'genome', '-g', this_gather_file, '-t', taxonomy_file,
-                   '--ani', '0.8', '-F', 'human')
+    taxonomy_file = utils.get_test_data("tax/lemonade-MAG3.x.gtdb.matches.tax.csv")
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        this_gather_file,
+        "-t",
+        taxonomy_file,
+        "--ani",
+        "0.8",
+        "-F",
+        "human",
+    )
 
     output = c.last_result.out
-    assert 'MAG3_1            match     5.3%     91.0%  d__Bacteria;p__Bacteroidota;c__Chlorobia;o__Chlorobiales;f__Chlorobiaceae;g__Prosthecochloris;s__Prosthecochloris vibrioformis' in output
+    assert (
+        "MAG3_1            match     5.3%     91.0%  d__Bacteria;p__Bacteroidota;c__Chlorobia;o__Chlorobiales;f__Chlorobiaceae;g__Prosthecochloris;s__Prosthecochloris vibrioformis"
+        in output
+    )
 
     # aaand classify to lineage_csv
-    c.run_sourmash('tax', 'genome', '-g', this_gather_file, '-t', taxonomy_file,
-                   '--ani', '0.8', '-F', 'lineage_csv')
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        this_gather_file,
+        "-t",
+        taxonomy_file,
+        "--ani",
+        "0.8",
+        "-F",
+        "lineage_csv",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
     output = c.last_result.out
-    assert 'ident,superkingdom,phylum,class,order,family,genus,species' in output
-    assert 'MAG3_1,d__Bacteria,p__Bacteroidota,c__Chlorobia,o__Chlorobiales,f__Chlorobiaceae,g__Prosthecochloris,s__Prosthecochloris vibrioformis' in output
+    assert "ident,superkingdom,phylum,class,order,family,genus,species" in output
+    assert (
+        "MAG3_1,d__Bacteria,p__Bacteroidota,c__Chlorobia,o__Chlorobiales,f__Chlorobiaceae,g__Prosthecochloris,s__Prosthecochloris vibrioformis"
+        in output
+    )
 
 
 def test_metagenome_no_gather_csv(runtmp):
     # test tax metagenome with no -g
-    taxonomy_file = utils.get_test_data('tax/lemonade-MAG3.x.gtdb.matches.tax.csv')
-    with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('tax', 'metagenome', '-t', taxonomy_file)
+    taxonomy_file = utils.get_test_data("tax/lemonade-MAG3.x.gtdb.matches.tax.csv")
+    with pytest.raises(SourmashCommandFailed):
+        runtmp.run_sourmash("tax", "metagenome", "-t", taxonomy_file)
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
@@ -2257,9 +3890,9 @@ def test_metagenome_no_gather_csv(runtmp):
 
 def test_genome_no_gather_csv(runtmp):
     # test tax genome with no -g
-    taxonomy_file = utils.get_test_data('tax/lemonade-MAG3.x.gtdb.matches.tax.csv')
-    with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('tax', 'genome', '-t', taxonomy_file)
+    taxonomy_file = utils.get_test_data("tax/lemonade-MAG3.x.gtdb.matches.tax.csv")
+    with pytest.raises(SourmashCommandFailed):
+        runtmp.run_sourmash("tax", "genome", "-t", taxonomy_file)
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
@@ -2268,9 +3901,9 @@ def test_genome_no_gather_csv(runtmp):
 
 def test_annotate_no_gather_csv(runtmp):
     # test tax annotate with no -g
-    taxonomy_file = utils.get_test_data('tax/lemonade-MAG3.x.gtdb.matches.tax.csv')
-    with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('tax', 'annotate', '-t', taxonomy_file)
+    taxonomy_file = utils.get_test_data("tax/lemonade-MAG3.x.gtdb.matches.tax.csv")
+    with pytest.raises(SourmashCommandFailed):
+        runtmp.run_sourmash("tax", "annotate", "-t", taxonomy_file)
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
@@ -2281,89 +3914,165 @@ def test_genome_LIN(runtmp):
     # test basic genome with LIN taxonomy
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.LIN-taxonomy.csv')
-
-    c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', tax, '--lins', '--ani-threshold', '0.93')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.LIN-taxonomy.csv")
+
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "--lins",
+        "--ani-threshold",
+        "0.93",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank,query_ani_at_rank" in c.last_result.out
-    assert "test1,below_threshold,0,0.089,1,md5,test1.sig,0.057,444000,0.925" in c.last_result.out
-
-    c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', tax, '--lins', '--ani-threshold', '0.924')
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank,query_ani_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,below_threshold,0,0.089,1,md5,test1.sig,0.057,444000,0.925"
+        in c.last_result.out
+    )
+
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "--lins",
+        "--ani-threshold",
+        "0.924",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank,query_ani_at_rank" in c.last_result.out
-    assert "test1,match,19,0.088,0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0,md5,test1.sig,0.058,442000,0.925" in c.last_result.out
-
-    c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', tax, '--lins', '--rank', '4')
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank,query_ani_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,match,19,0.088,0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0,md5,test1.sig,0.058,442000,0.925"
+        in c.last_result.out
+    )
+
+    c.run_sourmash(
+        "tax", "genome", "-g", g_csv, "--taxonomy-csv", tax, "--lins", "--rank", "4"
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank,query_ani_at_rank" in c.last_result.out
-    assert "test1,below_threshold,4,0.088,0;0;0;0;0,md5,test1.sig,0.058,442000,0.925" in c.last_result.out
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank,query_ani_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,below_threshold,4,0.088,0;0;0;0;0,md5,test1.sig,0.058,442000,0.925"
+        in c.last_result.out
+    )
 
 
 def test_genome_LIN_lingroups(runtmp):
     # test basic genome with LIN taxonomy
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.LIN-taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.LIN-taxonomy.csv")
 
     lg_file = runtmp.output("test.lg.csv")
 
-    with open(lg_file, 'w') as out:
-        out.write('lin,name\n')
-        out.write('0;0;0,lg1\n')
-        out.write('1;0;0,lg2\n')
-        out.write('2;0;0,lg3\n')
-        out.write('1;0;1,lg3\n')
+    with open(lg_file, "w") as out:
+        out.write("lin,name\n")
+        out.write("0;0;0,lg1\n")
+        out.write("1;0;0,lg2\n")
+        out.write("2;0;0,lg3\n")
+        out.write("1;0;1,lg3\n")
         # write a 19 so we can check the end
-        out.write('0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0,lg4\n')
-
-    c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', tax, '--lins', '--lingroup', lg_file)
+        out.write("0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0,lg4\n")
+
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "--lins",
+        "--lingroup",
+        lg_file,
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank,query_ani_at_rank" in c.last_result.out
-    assert "test1,below_threshold,2,0.088,0;0;0,md5,test1.sig,0.058,442000,0.925" in c.last_result.out
-
-    c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', tax, '--lins', '--lingroup', lg_file, '--ani-threshold', '0.924')
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank,query_ani_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,below_threshold,2,0.088,0;0;0,md5,test1.sig,0.058,442000,0.925"
+        in c.last_result.out
+    )
+
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "--lins",
+        "--lingroup",
+        lg_file,
+        "--ani-threshold",
+        "0.924",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank,query_ani_at_rank" in c.last_result.out
-    assert "test1,match,19,0.088,0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0,md5,test1.sig,0.058,442000,0.925" in c.last_result.out
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank,query_ani_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,match,19,0.088,0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0,md5,test1.sig,0.058,442000,0.925"
+        in c.last_result.out
+    )
 
 
 def test_annotate_0(runtmp):
     # test annotate basics
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
     csvout = runtmp.output("test1.gather.with-lineages.csv")
     out_dir = os.path.dirname(csvout)
 
-    c.run_sourmash('tax', 'annotate', '--gather-csv', g_csv, '--taxonomy-csv', tax, '-o', out_dir)
+    c.run_sourmash(
+        "tax", "annotate", "--gather-csv", g_csv, "--taxonomy-csv", tax, "-o", out_dir
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -2377,27 +4086,48 @@ def test_annotate_0(runtmp):
     assert f"saving 'annotate' output to '{csvout}'" in runtmp.last_result.err
 
     assert "lineage" in lin_gather_results[0]
-    assert "d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli" in lin_gather_results[1]
-    assert "d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri" in lin_gather_results[2]
-    assert "d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola vulgatus" in lin_gather_results[3]
-    assert "d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri" in lin_gather_results[4]
+    assert (
+        "d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli"
+        in lin_gather_results[1]
+    )
+    assert (
+        "d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri"
+        in lin_gather_results[2]
+    )
+    assert (
+        "d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola vulgatus"
+        in lin_gather_results[3]
+    )
+    assert (
+        "d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri"
+        in lin_gather_results[4]
+    )
 
 
 def test_annotate_gzipped_gather(runtmp):
     # test annotate basics
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
     # rewrite gather_csv as gzipped csv
-    gz_gather = runtmp.output('test1.gather.csv.gz')
-    with open(g_csv, 'rb') as f_in, gzip.open(gz_gather, 'wb') as f_out:
+    gz_gather = runtmp.output("test1.gather.csv.gz")
+    with open(g_csv, "rb") as f_in, gzip.open(gz_gather, "wb") as f_out:
         f_out.writelines(f_in)
 
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
     csvout = runtmp.output("test1.gather.with-lineages.csv")
     out_dir = os.path.dirname(csvout)
 
-    c.run_sourmash('tax', 'annotate', '--gather-csv', gz_gather, '--taxonomy-csv', tax, '-o', out_dir)
+    c.run_sourmash(
+        "tax",
+        "annotate",
+        "--gather-csv",
+        gz_gather,
+        "--taxonomy-csv",
+        tax,
+        "-o",
+        out_dir,
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -2411,22 +4141,44 @@ def test_annotate_gzipped_gather(runtmp):
     assert f"saving 'annotate' output to '{csvout}'" in runtmp.last_result.err
 
     assert "lineage" in lin_gather_results[0]
-    assert "d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli" in lin_gather_results[1]
-    assert "d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri" in lin_gather_results[2]
-    assert "d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola vulgatus" in lin_gather_results[3]
-    assert "d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri" in lin_gather_results[4]
+    assert (
+        "d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli"
+        in lin_gather_results[1]
+    )
+    assert (
+        "d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri"
+        in lin_gather_results[2]
+    )
+    assert (
+        "d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola vulgatus"
+        in lin_gather_results[3]
+    )
+    assert (
+        "d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri"
+        in lin_gather_results[4]
+    )
 
 
 def test_annotate_0_LIN(runtmp):
     # test annotate basics
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.LIN-taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.LIN-taxonomy.csv")
     csvout = runtmp.output("test1.gather.with-lineages.csv")
     out_dir = os.path.dirname(csvout)
 
-    c.run_sourmash('tax', 'annotate', '--gather-csv', g_csv, '--taxonomy-csv', tax, '-o', out_dir, "--lins")
+    c.run_sourmash(
+        "tax",
+        "annotate",
+        "--gather-csv",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "-o",
+        out_dir,
+        "--lins",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -2451,19 +4203,29 @@ def test_annotate_gather_argparse(runtmp):
     # this tests argparse handling w/extend.
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
     csvout = runtmp.output("test1.gather.with-lineages.csv")
     out_dir = os.path.dirname(csvout)
 
-    g_empty_csv = runtmp.output('g_empty.csv')
+    g_empty_csv = runtmp.output("g_empty.csv")
     with open(g_empty_csv, "w") as fp:
         fp.write("")
     print("g_csv: ", g_empty_csv)
 
-    c.run_sourmash('tax', 'annotate', '--gather-csv', g_csv,
-                   '-g', g_empty_csv, '--taxonomy-csv', tax, '-o', out_dir,
-                   '--force')
+    c.run_sourmash(
+        "tax",
+        "annotate",
+        "--gather-csv",
+        g_csv,
+        "-g",
+        g_empty_csv,
+        "--taxonomy-csv",
+        tax,
+        "-o",
+        out_dir,
+        "--force",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -2477,19 +4239,24 @@ def test_annotate_gather_argparse(runtmp):
     assert f"saving 'annotate' output to '{csvout}'" in runtmp.last_result.err
 
     assert "lineage" in lin_gather_results[0]
-    assert "d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli" in lin_gather_results[1]
+    assert (
+        "d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli"
+        in lin_gather_results[1]
+    )
 
 
 def test_annotate_0_db(runtmp):
     # test annotate with sqlite db
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.db')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.db")
     csvout = runtmp.output("test1.gather.with-lineages.csv")
     out_dir = os.path.dirname(csvout)
 
-    c.run_sourmash('tax', 'annotate', '--gather-csv', g_csv, '--taxonomy-csv', tax, '-o', out_dir)
+    c.run_sourmash(
+        "tax", "annotate", "--gather-csv", g_csv, "--taxonomy-csv", tax, "-o", out_dir
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
@@ -2502,105 +4269,134 @@ def test_annotate_0_db(runtmp):
     assert f"saving 'annotate' output to '{csvout}'" in runtmp.last_result.err
 
     assert "lineage" in lin_gather_results[0]
-    assert "d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli" in lin_gather_results[1]
-    assert "d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri" in lin_gather_results[2]
-    assert "d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola vulgatus" in lin_gather_results[3]
-    assert "d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri" in lin_gather_results[4]
+    assert (
+        "d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli"
+        in lin_gather_results[1]
+    )
+    assert (
+        "d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri"
+        in lin_gather_results[2]
+    )
+    assert (
+        "d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola vulgatus"
+        in lin_gather_results[3]
+    )
+    assert (
+        "d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri"
+        in lin_gather_results[4]
+    )
 
 
 def test_annotate_empty_gather_results(runtmp):
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
 
-    #creates empty gather result
-    g_csv = runtmp.output('g.csv')
+    # creates empty gather result
+    g_csv = runtmp.output("g.csv")
     with open(g_csv, "w") as fp:
         fp.write("")
     print("g_csv: ", g_csv)
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('tax', 'annotate', '-g', g_csv, '--taxonomy-csv', tax)
+        runtmp.run_sourmash("tax", "annotate", "-g", g_csv, "--taxonomy-csv", tax)
 
     assert f"Cannot read from '{g_csv}'. Is file empty?" in str(exc.value)
     assert runtmp.last_result.status == -1
 
 
 def test_annotate_prefetch_or_other_header(runtmp):
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-
-    alt_csv = runtmp.output('g.csv')
-    for alt_col in ['match_name', 'ident', 'accession']:
-        #modify 'name' to other acceptable id_columns result
-        alt_g = [x.replace("name", alt_col) + "\n" for x in Path(g_csv).read_text().splitlines()]
-        with open(alt_csv, 'w') as fp:
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+
+    alt_csv = runtmp.output("g.csv")
+    for alt_col in ["match_name", "ident", "accession"]:
+        # modify 'name' to other acceptable id_columns result
+        alt_g = [
+            x.replace("name", alt_col) + "\n"
+            for x in Path(g_csv).read_text().splitlines()
+        ]
+        with open(alt_csv, "w") as fp:
             fp.writelines(alt_g)
 
-        runtmp.run_sourmash('tax', 'annotate', '-g', alt_csv, '--taxonomy-csv', tax)
+        runtmp.run_sourmash("tax", "annotate", "-g", alt_csv, "--taxonomy-csv", tax)
 
         assert runtmp.last_result.status == 0
         print(runtmp.last_result.out)
         print(runtmp.last_result.err)
-        assert f"Starting annotation on '{alt_csv}'. Using ID column: '{alt_col}'" in runtmp.last_result.err
+        assert (
+            f"Starting annotation on '{alt_csv}'. Using ID column: '{alt_col}'"
+            in runtmp.last_result.err
+        )
         assert f"Annotated 4 of 4 total rows from '{alt_csv}'" in runtmp.last_result.err
 
 
 def test_annotate_bad_header(runtmp):
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
-    bad_g_csv = runtmp.output('g.csv')
+    bad_g_csv = runtmp.output("g.csv")
 
-    #creates bad gather result
-    bad_g = [x.replace("name", "nope") + "\n" for x in Path(g_csv).read_text().splitlines()]
-    with open(bad_g_csv, 'w') as fp:
+    # creates bad gather result
+    bad_g = [
+        x.replace("name", "nope") + "\n" for x in Path(g_csv).read_text().splitlines()
+    ]
+    with open(bad_g_csv, "w") as fp:
         fp.writelines(bad_g)
     # print("bad_gather_results: \n", bad_g)
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('tax', 'annotate', '-g', bad_g_csv, '--taxonomy-csv', tax)
+        runtmp.run_sourmash("tax", "annotate", "-g", bad_g_csv, "--taxonomy-csv", tax)
 
-    assert f"ERROR: Cannot find taxonomic identifier column in '{bad_g_csv}'. Tried: name, match_name, ident, accession" in str(exc.value)
+    assert (
+        f"ERROR: Cannot find taxonomic identifier column in '{bad_g_csv}'. Tried: name, match_name, ident, accession"
+        in str(exc.value)
+    )
     assert runtmp.last_result.status == -1
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
 
 def test_annotate_no_tax_matches(runtmp):
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
-    bad_g_csv = runtmp.output('g.csv')
+    bad_g_csv = runtmp.output("g.csv")
 
-    #mess up tax idents
-    bad_g = [x.replace("GCF_", "GGG_") + "\n" for x in Path(g_csv).read_text().splitlines()]
-    with open(bad_g_csv, 'w') as fp:
+    # mess up tax idents
+    bad_g = [
+        x.replace("GCF_", "GGG_") + "\n" for x in Path(g_csv).read_text().splitlines()
+    ]
+    with open(bad_g_csv, "w") as fp:
         fp.writelines(bad_g)
     # print("bad_gather_results: \n", bad_g)
 
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('tax', 'annotate', '-g', bad_g_csv, '--taxonomy-csv', tax)
+        runtmp.run_sourmash("tax", "annotate", "-g", bad_g_csv, "--taxonomy-csv", tax)
 
     assert f"ERROR: Could not annotate any rows from '{bad_g_csv}'" in str(exc.value)
     assert runtmp.last_result.status == -1
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
-    runtmp.run_sourmash('tax', 'annotate', '-g', bad_g_csv, '--taxonomy-csv', tax, '--force')
+    runtmp.run_sourmash(
+        "tax", "annotate", "-g", bad_g_csv, "--taxonomy-csv", tax, "--force"
+    )
 
     assert runtmp.last_result.status == 0
     assert f"Could not annotate any rows from '{bad_g_csv}'" in runtmp.last_result.err
-    assert f"--force is set. Attempting to continue to next file." in runtmp.last_result.err
+    assert (
+        "--force is set. Attempting to continue to next file." in runtmp.last_result.err
+    )
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
 
 
 def test_annotate_missed_tax_matches(runtmp):
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
-    bad_g_csv = runtmp.output('g.csv')
+    bad_g_csv = runtmp.output("g.csv")
 
-    with open(g_csv, 'r') as gather_lines, open(bad_g_csv, 'w') as fp:
+    with open(g_csv) as gather_lines, open(bad_g_csv, "w") as fp:
         for n, line in enumerate(gather_lines):
             if n > 2:
                 # mess up tax idents of lines 3, 4
@@ -2608,7 +4404,7 @@ def test_annotate_missed_tax_matches(runtmp):
             fp.write(line)
     # print("bad_gather_results: \n", bad_g)
 
-    runtmp.run_sourmash('tax', 'annotate', '-g', bad_g_csv, '--taxonomy-csv', tax)
+    runtmp.run_sourmash("tax", "annotate", "-g", bad_g_csv, "--taxonomy-csv", tax)
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
@@ -2618,16 +4414,15 @@ def test_annotate_missed_tax_matches(runtmp):
 
 
 def test_annotate_empty_tax_lineage_input(runtmp):
-    tax_empty = runtmp.output('t.csv')
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    tax_empty = runtmp.output("t.csv")
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
     with open(tax_empty, "w") as fp:
         fp.write("")
     print("t_csv: ", tax_empty)
 
-
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('tax', 'annotate', '-g', g_csv, '--taxonomy-csv', tax_empty)
+        runtmp.run_sourmash("tax", "annotate", "-g", g_csv, "--taxonomy-csv", tax_empty)
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
@@ -2638,15 +4433,25 @@ def test_annotate_empty_tax_lineage_input(runtmp):
 
 
 def test_annotate_empty_tax_lineage_input_recover_with_second_taxfile(runtmp):
-    tax_empty = runtmp.output('t.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    tax_empty = runtmp.output("t.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
     with open(tax_empty, "w") as fp:
         fp.write("")
     print("t_csv: ", tax_empty)
 
-    runtmp.run_sourmash('tax', 'annotate', '-g', g_csv, '-t', tax_empty, '--taxonomy-csv', tax, '--force')
+    runtmp.run_sourmash(
+        "tax",
+        "annotate",
+        "-g",
+        g_csv,
+        "-t",
+        tax_empty,
+        "--taxonomy-csv",
+        tax,
+        "--force",
+    )
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
@@ -2657,16 +4462,25 @@ def test_annotate_empty_tax_lineage_input_recover_with_second_taxfile(runtmp):
 
 def test_annotate_empty_tax_lineage_input_recover_with_second_taxfile_2(runtmp):
     # test with empty tax second, to check on argparse handling
-    tax_empty = runtmp.output('t.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    tax_empty = runtmp.output("t.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
 
     with open(tax_empty, "w") as fp:
         fp.write("")
     print("t_csv: ", tax_empty)
 
-    runtmp.run_sourmash('tax', 'annotate', '-g', g_csv,
-                        '--taxonomy-csv', tax, '-t', tax_empty, '--force')
+    runtmp.run_sourmash(
+        "tax",
+        "annotate",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "-t",
+        tax_empty,
+        "--force",
+    )
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
@@ -2677,29 +4491,31 @@ def test_annotate_empty_tax_lineage_input_recover_with_second_taxfile_2(runtmp):
 
 def test_tax_prepare_1_csv_to_csv(runtmp, keep_identifiers, keep_versions):
     # CSV -> CSV; same assignments
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
-    taxout = runtmp.output('out.csv')
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
+    taxout = runtmp.output("out.csv")
 
     args = []
     if keep_identifiers:
-        args.append('--keep-full-identifiers')
+        args.append("--keep-full-identifiers")
     if keep_versions:
-        args.append('--keep-identifier-versions')
+        args.append("--keep-identifier-versions")
 
     # this is an error - can't strip versions if not splitting identifiers
     if keep_identifiers and not keep_versions:
         with pytest.raises(SourmashCommandFailed):
-            runtmp.run_sourmash('tax', 'prepare', '-t', tax, '-o',
-                                taxout, '-F', 'csv', *args)
+            runtmp.run_sourmash(
+                "tax", "prepare", "-t", tax, "-o", taxout, "-F", "csv", *args
+            )
         return
 
-    runtmp.run_sourmash('tax', 'prepare', '-t', tax, '-o',
-                        taxout, '-F', 'csv', *args)
+    runtmp.run_sourmash("tax", "prepare", "-t", tax, "-o", taxout, "-F", "csv", *args)
     assert os.path.exists(taxout)
 
-    db1 = tax_utils.MultiLineageDB.load([tax],
-                                        keep_full_identifiers=keep_identifiers,
-                                        keep_identifier_versions=keep_versions)
+    db1 = tax_utils.MultiLineageDB.load(
+        [tax],
+        keep_full_identifiers=keep_identifiers,
+        keep_identifier_versions=keep_versions,
+    )
 
     db2 = tax_utils.MultiLineageDB.load([taxout])
 
@@ -2708,13 +4524,12 @@ def test_tax_prepare_1_csv_to_csv(runtmp, keep_identifiers, keep_versions):
 
 def test_tax_prepare_1_combine_csv(runtmp):
     # multiple CSVs to a single combined CSV
-    tax1 = utils.get_test_data('tax/test.taxonomy.csv')
-    tax2 = utils.get_test_data('tax/protozoa_genbank_lineage.csv')
+    tax1 = utils.get_test_data("tax/test.taxonomy.csv")
+    tax2 = utils.get_test_data("tax/protozoa_genbank_lineage.csv")
 
-    taxout = runtmp.output('out.csv')
+    taxout = runtmp.output("out.csv")
 
-    runtmp.sourmash('tax', 'prepare', '-t', tax1, tax2, '-F', 'csv',
-                    '-o', taxout)
+    runtmp.sourmash("tax", "prepare", "-t", tax1, tax2, "-F", "csv", "-o", taxout)
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -2728,29 +4543,31 @@ def test_tax_prepare_1_combine_csv(runtmp):
 
 def test_tax_prepare_1_csv_to_csv_empty_ranks(runtmp, keep_identifiers, keep_versions):
     # CSV -> CSV; same assignments, even when trailing ranks are empty
-    tax = utils.get_test_data('tax/test-empty-ranks.taxonomy.csv')
-    taxout = runtmp.output('out.csv')
+    tax = utils.get_test_data("tax/test-empty-ranks.taxonomy.csv")
+    taxout = runtmp.output("out.csv")
 
     args = []
     if keep_identifiers:
-        args.append('--keep-full-identifiers')
+        args.append("--keep-full-identifiers")
     if keep_versions:
-        args.append('--keep-identifier-versions')
+        args.append("--keep-identifier-versions")
 
     # this is an error - can't strip versions if not splitting identifiers
     if keep_identifiers and not keep_versions:
         with pytest.raises(SourmashCommandFailed):
-            runtmp.run_sourmash('tax', 'prepare', '-t', tax, '-o',
-                                taxout, '-F', 'csv', *args)
+            runtmp.run_sourmash(
+                "tax", "prepare", "-t", tax, "-o", taxout, "-F", "csv", *args
+            )
         return
 
-    runtmp.run_sourmash('tax', 'prepare', '-t', tax, '-o',
-                        taxout, '-F', 'csv', *args)
+    runtmp.run_sourmash("tax", "prepare", "-t", tax, "-o", taxout, "-F", "csv", *args)
     assert os.path.exists(taxout)
 
-    db1 = tax_utils.MultiLineageDB.load([tax],
-                                        keep_full_identifiers=keep_identifiers,
-                                        keep_identifier_versions=keep_versions)
+    db1 = tax_utils.MultiLineageDB.load(
+        [tax],
+        keep_full_identifiers=keep_identifiers,
+        keep_identifier_versions=keep_versions,
+    )
 
     db2 = tax_utils.MultiLineageDB.load([taxout])
 
@@ -2760,9 +4577,9 @@ def test_tax_prepare_1_csv_to_csv_empty_ranks(runtmp, keep_identifiers, keep_ver
 def test_tax_prepare_1_csv_to_csv_empty_file(runtmp, keep_identifiers, keep_versions):
     # CSV -> CSV with an empty input file and --force
     # tests argparse extend
-    tax = utils.get_test_data('tax/test-empty-ranks.taxonomy.csv')
-    tax_empty = runtmp.output('t.csv')
-    taxout = runtmp.output('out.csv')
+    tax = utils.get_test_data("tax/test-empty-ranks.taxonomy.csv")
+    tax_empty = runtmp.output("t.csv")
+    taxout = runtmp.output("out.csv")
 
     with open(tax_empty, "w") as fp:
         fp.write("")
@@ -2770,86 +4587,109 @@ def test_tax_prepare_1_csv_to_csv_empty_file(runtmp, keep_identifiers, keep_vers
 
     args = []
     if keep_identifiers:
-        args.append('--keep-full-identifiers')
+        args.append("--keep-full-identifiers")
     if keep_versions:
-        args.append('--keep-identifier-versions')
+        args.append("--keep-identifier-versions")
 
     # this is an error - can't strip versions if not splitting identifiers
     if keep_identifiers and not keep_versions:
         with pytest.raises(SourmashCommandFailed):
-            runtmp.run_sourmash('tax', 'prepare', '-t', tax, '-o',
-                                taxout, '-F', 'csv', *args)
+            runtmp.run_sourmash(
+                "tax", "prepare", "-t", tax, "-o", taxout, "-F", "csv", *args
+            )
         return
 
-    runtmp.run_sourmash('tax', 'prepare', '-t', tax, '-t', tax_empty, '-o',
-                        taxout, '-F', 'csv', *args, '--force')
+    runtmp.run_sourmash(
+        "tax",
+        "prepare",
+        "-t",
+        tax,
+        "-t",
+        tax_empty,
+        "-o",
+        taxout,
+        "-F",
+        "csv",
+        *args,
+        "--force",
+    )
     assert os.path.exists(taxout)
 
-    db1 = tax_utils.MultiLineageDB.load([tax],
-                                        keep_full_identifiers=keep_identifiers,
-                                        keep_identifier_versions=keep_versions)
+    db1 = tax_utils.MultiLineageDB.load(
+        [tax],
+        keep_full_identifiers=keep_identifiers,
+        keep_identifier_versions=keep_versions,
+    )
 
     db2 = tax_utils.MultiLineageDB.load([taxout])
 
     assert set(db1) == set(db2)
 
 
-def test_tax_prepare_1_csv_to_csv_empty_ranks_2(runtmp, keep_identifiers, keep_versions):
+def test_tax_prepare_1_csv_to_csv_empty_ranks_2(
+    runtmp, keep_identifiers, keep_versions
+):
     # CSV -> CSV; same assignments for situations with empty internal ranks
-    tax = utils.get_test_data('tax/test-empty-ranks-2.taxonomy.csv')
-    taxout = runtmp.output('out.csv')
+    tax = utils.get_test_data("tax/test-empty-ranks-2.taxonomy.csv")
+    taxout = runtmp.output("out.csv")
 
     args = []
     if keep_identifiers:
-        args.append('--keep-full-identifiers')
+        args.append("--keep-full-identifiers")
     if keep_versions:
-        args.append('--keep-identifier-versions')
+        args.append("--keep-identifier-versions")
 
     # this is an error - can't strip versions if not splitting identifiers
     if keep_identifiers and not keep_versions:
         with pytest.raises(SourmashCommandFailed):
-            runtmp.run_sourmash('tax', 'prepare', '-t', tax, '-o',
-                                taxout, '-F', 'csv', *args)
+            runtmp.run_sourmash(
+                "tax", "prepare", "-t", tax, "-o", taxout, "-F", "csv", *args
+            )
         return
 
-    runtmp.run_sourmash('tax', 'prepare', '-t', tax, '-o',
-                        taxout, '-F', 'csv', *args)
+    runtmp.run_sourmash("tax", "prepare", "-t", tax, "-o", taxout, "-F", "csv", *args)
     assert os.path.exists(taxout)
 
-    db1 = tax_utils.MultiLineageDB.load([tax],
-                                        keep_full_identifiers=keep_identifiers,
-                                        keep_identifier_versions=keep_versions)
+    db1 = tax_utils.MultiLineageDB.load(
+        [tax],
+        keep_full_identifiers=keep_identifiers,
+        keep_identifier_versions=keep_versions,
+    )
 
     db2 = tax_utils.MultiLineageDB.load([taxout])
 
     assert set(db1) == set(db2)
 
 
-def test_tax_prepare_1_csv_to_csv_empty_ranks_3(runtmp, keep_identifiers, keep_versions):
+def test_tax_prepare_1_csv_to_csv_empty_ranks_3(
+    runtmp, keep_identifiers, keep_versions
+):
     # CSV -> CSV; same assignments for situations with empty internal ranks
-    tax = utils.get_test_data('tax/test-empty-ranks-3.taxonomy.csv')
-    taxout = runtmp.output('out.csv')
+    tax = utils.get_test_data("tax/test-empty-ranks-3.taxonomy.csv")
+    taxout = runtmp.output("out.csv")
 
     args = []
     if keep_identifiers:
-        args.append('--keep-full-identifiers')
+        args.append("--keep-full-identifiers")
     if keep_versions:
-        args.append('--keep-identifier-versions')
+        args.append("--keep-identifier-versions")
 
     # this is an error - can't strip versions if not splitting identifiers
     if keep_identifiers and not keep_versions:
         with pytest.raises(SourmashCommandFailed):
-            runtmp.run_sourmash('tax', 'prepare', '-t', tax, '-o',
-                                taxout, '-F', 'csv', *args)
+            runtmp.run_sourmash(
+                "tax", "prepare", "-t", tax, "-o", taxout, "-F", "csv", *args
+            )
         return
 
-    runtmp.run_sourmash('tax', 'prepare', '-t', tax, '-o',
-                        taxout, '-F', 'csv', *args)
+    runtmp.run_sourmash("tax", "prepare", "-t", tax, "-o", taxout, "-F", "csv", *args)
     assert os.path.exists(taxout)
 
-    db1 = tax_utils.MultiLineageDB.load([tax],
-                                        keep_full_identifiers=keep_identifiers,
-                                        keep_identifier_versions=keep_versions)
+    db1 = tax_utils.MultiLineageDB.load(
+        [tax],
+        keep_full_identifiers=keep_identifiers,
+        keep_identifier_versions=keep_versions,
+    )
 
     db2 = tax_utils.MultiLineageDB.load([taxout])
 
@@ -2858,65 +4698,70 @@ def test_tax_prepare_1_csv_to_csv_empty_ranks_3(runtmp, keep_identifiers, keep_v
 
 def test_tax_prepare_2_csv_to_sql(runtmp, keep_identifiers, keep_versions):
     # CSV -> SQL; same assignments?
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
-    taxout = runtmp.output('out.db')
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
+    taxout = runtmp.output("out.db")
 
     args = []
     if keep_identifiers:
-        args.append('--keep-full-identifiers')
+        args.append("--keep-full-identifiers")
     if keep_versions:
-        args.append('--keep-identifier-versions')
+        args.append("--keep-identifier-versions")
 
     # this is an error - can't strip versions if not splitting identifiers
     if keep_identifiers and not keep_versions:
         with pytest.raises(SourmashCommandFailed):
-            runtmp.run_sourmash('tax', 'prepare', '-t', tax, '-o', taxout,
-                                '-F', 'sql', *args)
+            runtmp.run_sourmash(
+                "tax", "prepare", "-t", tax, "-o", taxout, "-F", "sql", *args
+            )
         return
 
-    runtmp.run_sourmash('tax', 'prepare', '-t', tax, '-o', taxout,
-                        '-F', 'sql', *args)
+    runtmp.run_sourmash("tax", "prepare", "-t", tax, "-o", taxout, "-F", "sql", *args)
     assert os.path.exists(taxout)
 
-    db1 = tax_utils.MultiLineageDB.load([tax],
-                                        keep_full_identifiers=keep_identifiers,
-                                        keep_identifier_versions=keep_versions)
+    db1 = tax_utils.MultiLineageDB.load(
+        [tax],
+        keep_full_identifiers=keep_identifiers,
+        keep_identifier_versions=keep_versions,
+    )
     db2 = tax_utils.MultiLineageDB.load([taxout])
 
     assert set(db1) == set(db2)
 
     # cannot overwrite -
     with pytest.raises(SourmashCommandFailed) as exc:
-        runtmp.run_sourmash('tax', 'prepare', '-t', tax, '-o', taxout,
-                            '-F', 'sql', *args)
-    assert 'taxonomy table already exists' in str(exc.value)
+        runtmp.run_sourmash(
+            "tax", "prepare", "-t", tax, "-o", taxout, "-F", "sql", *args
+        )
+    assert "taxonomy table already exists" in str(exc.value)
 
 
 def test_tax_prepare_2_csv_to_sql_empty_ranks(runtmp, keep_identifiers, keep_versions):
     # CSV -> SQL with some empty ranks in the taxonomy file
-    tax = utils.get_test_data('tax/test-empty-ranks.taxonomy.csv')
-    taxout = runtmp.output('out.db')
+    tax = utils.get_test_data("tax/test-empty-ranks.taxonomy.csv")
+    taxout = runtmp.output("out.db")
 
     args = []
     if keep_identifiers:
-        args.append('--keep-full-identifiers')
+        args.append("--keep-full-identifiers")
     if keep_versions:
-        args.append('--keep-identifier-versions')
+        args.append("--keep-identifier-versions")
 
     # this is an error - can't strip versions if not splitting identifiers
     if keep_identifiers and not keep_versions:
         with pytest.raises(SourmashCommandFailed):
-            runtmp.run_sourmash('tax', 'prepare', '-t', tax, '-o', taxout,
-                                '-F', 'sql', *args)
+            runtmp.run_sourmash(
+                "tax", "prepare", "-t", tax, "-o", taxout, "-F", "sql", *args
+            )
         return
 
-    runtmp.run_sourmash('tax', 'prepare', '-t', tax, '-o', taxout,
-                        '-F', 'sql', *args)
+    runtmp.run_sourmash("tax", "prepare", "-t", tax, "-o", taxout, "-F", "sql", *args)
     assert os.path.exists(taxout)
 
-    db1 = tax_utils.MultiLineageDB.load([tax],
-                                        keep_full_identifiers=keep_identifiers,
-                                        keep_identifier_versions=keep_versions)
+    db1 = tax_utils.MultiLineageDB.load(
+        [tax],
+        keep_full_identifiers=keep_identifiers,
+        keep_identifier_versions=keep_versions,
+    )
     db2 = tax_utils.MultiLineageDB.load([taxout])
 
     assert set(db1) == set(db2)
@@ -2924,107 +4769,113 @@ def test_tax_prepare_2_csv_to_sql_empty_ranks(runtmp, keep_identifiers, keep_ver
 
 def test_tax_prepare_3_db_to_csv(runtmp):
     # SQL -> CSV; same assignments
-    taxcsv = utils.get_test_data('tax/test.taxonomy.csv')
-    taxdb = utils.get_test_data('tax/test.taxonomy.db')
-    taxout = runtmp.output('out.csv')
+    taxcsv = utils.get_test_data("tax/test.taxonomy.csv")
+    taxdb = utils.get_test_data("tax/test.taxonomy.db")
+    taxout = runtmp.output("out.csv")
 
-    runtmp.run_sourmash('tax', 'prepare', '-t', taxdb,
-                        '-o', taxout, '-F', 'csv')
+    runtmp.run_sourmash("tax", "prepare", "-t", taxdb, "-o", taxout, "-F", "csv")
     assert os.path.exists(taxout)
     with open(taxout) as fp:
         print(fp.read())
 
-    db1 = tax_utils.MultiLineageDB.load([taxcsv],
-                                        keep_full_identifiers=False,
-                                        keep_identifier_versions=False)
+    db1 = tax_utils.MultiLineageDB.load(
+        [taxcsv], keep_full_identifiers=False, keep_identifier_versions=False
+    )
 
     db2 = tax_utils.MultiLineageDB.load([taxout])
-    db3 = tax_utils.MultiLineageDB.load([taxdb],
-                                        keep_full_identifiers=False,
-                                        keep_identifier_versions=False)
+    db3 = tax_utils.MultiLineageDB.load(
+        [taxdb], keep_full_identifiers=False, keep_identifier_versions=False
+    )
     assert set(db1) == set(db2)
     assert set(db1) == set(db3)
 
 
 def test_tax_prepare_3_db_to_csv_gz(runtmp):
     # SQL -> CSV; same assignments
-    taxcsv = utils.get_test_data('tax/test.taxonomy.csv')
-    taxdb = utils.get_test_data('tax/test.taxonomy.db')
-    taxout = runtmp.output('out.csv.gz')
+    taxcsv = utils.get_test_data("tax/test.taxonomy.csv")
+    taxdb = utils.get_test_data("tax/test.taxonomy.db")
+    taxout = runtmp.output("out.csv.gz")
 
-    runtmp.run_sourmash('tax', 'prepare', '-t', taxdb,
-                        '-o', taxout, '-F', 'csv')
+    runtmp.run_sourmash("tax", "prepare", "-t", taxdb, "-o", taxout, "-F", "csv")
     assert os.path.exists(taxout)
-    with gzip.open(taxout, 'rt') as fp:
+    with gzip.open(taxout, "rt") as fp:
         print(fp.read())
 
-    db1 = tax_utils.MultiLineageDB.load([taxcsv],
-                                        keep_full_identifiers=False,
-                                        keep_identifier_versions=False)
+    db1 = tax_utils.MultiLineageDB.load(
+        [taxcsv], keep_full_identifiers=False, keep_identifier_versions=False
+    )
 
     db2 = tax_utils.MultiLineageDB.load([taxout])
-    db3 = tax_utils.MultiLineageDB.load([taxdb],
-                                        keep_full_identifiers=False,
-                                        keep_identifier_versions=False)
+    db3 = tax_utils.MultiLineageDB.load(
+        [taxdb], keep_full_identifiers=False, keep_identifier_versions=False
+    )
     assert set(db1) == set(db2)
     assert set(db1) == set(db3)
 
 
-def test_tax_prepare_2_csv_to_sql_empty_ranks_2(runtmp, keep_identifiers, keep_versions):
+def test_tax_prepare_2_csv_to_sql_empty_ranks_2(
+    runtmp, keep_identifiers, keep_versions
+):
     # CSV -> SQL with some empty internal ranks in the taxonomy file
-    tax = utils.get_test_data('tax/test-empty-ranks-2.taxonomy.csv')
-    taxout = runtmp.output('out.db')
+    tax = utils.get_test_data("tax/test-empty-ranks-2.taxonomy.csv")
+    taxout = runtmp.output("out.db")
 
     args = []
     if keep_identifiers:
-        args.append('--keep-full-identifiers')
+        args.append("--keep-full-identifiers")
     if keep_versions:
-        args.append('--keep-identifier-versions')
+        args.append("--keep-identifier-versions")
 
     # this is an error - can't strip versions if not splitting identifiers
     if keep_identifiers and not keep_versions:
         with pytest.raises(SourmashCommandFailed):
-            runtmp.run_sourmash('tax', 'prepare', '-t', tax, '-o', taxout,
-                                '-F', 'sql', *args)
+            runtmp.run_sourmash(
+                "tax", "prepare", "-t", tax, "-o", taxout, "-F", "sql", *args
+            )
         return
 
-    runtmp.run_sourmash('tax', 'prepare', '-t', tax, '-o', taxout,
-                        '-F', 'sql', *args)
+    runtmp.run_sourmash("tax", "prepare", "-t", tax, "-o", taxout, "-F", "sql", *args)
     assert os.path.exists(taxout)
 
-    db1 = tax_utils.MultiLineageDB.load([tax],
-                                        keep_full_identifiers=keep_identifiers,
-                                        keep_identifier_versions=keep_versions)
+    db1 = tax_utils.MultiLineageDB.load(
+        [tax],
+        keep_full_identifiers=keep_identifiers,
+        keep_identifier_versions=keep_versions,
+    )
     db2 = tax_utils.MultiLineageDB.load([taxout])
 
     assert set(db1) == set(db2)
 
 
-def test_tax_prepare_2_csv_to_sql_empty_ranks_3(runtmp, keep_identifiers, keep_versions):
+def test_tax_prepare_2_csv_to_sql_empty_ranks_3(
+    runtmp, keep_identifiers, keep_versions
+):
     # CSV -> SQL with some empty internal ranks in the taxonomy file
-    tax = utils.get_test_data('tax/test-empty-ranks-3.taxonomy.csv')
-    taxout = runtmp.output('out.db')
+    tax = utils.get_test_data("tax/test-empty-ranks-3.taxonomy.csv")
+    taxout = runtmp.output("out.db")
 
     args = []
     if keep_identifiers:
-        args.append('--keep-full-identifiers')
+        args.append("--keep-full-identifiers")
     if keep_versions:
-        args.append('--keep-identifier-versions')
+        args.append("--keep-identifier-versions")
 
     # this is an error - can't strip versions if not splitting identifiers
     if keep_identifiers and not keep_versions:
         with pytest.raises(SourmashCommandFailed):
-            runtmp.run_sourmash('tax', 'prepare', '-t', tax, '-o', taxout,
-                                '-F', 'sql', *args)
+            runtmp.run_sourmash(
+                "tax", "prepare", "-t", tax, "-o", taxout, "-F", "sql", *args
+            )
         return
 
-    runtmp.run_sourmash('tax', 'prepare', '-t', tax, '-o', taxout,
-                        '-F', 'sql', *args)
+    runtmp.run_sourmash("tax", "prepare", "-t", tax, "-o", taxout, "-F", "sql", *args)
     assert os.path.exists(taxout)
 
-    db1 = tax_utils.MultiLineageDB.load([tax],
-                                        keep_full_identifiers=keep_identifiers,
-                                        keep_identifier_versions=keep_versions)
+    db1 = tax_utils.MultiLineageDB.load(
+        [tax],
+        keep_full_identifiers=keep_identifiers,
+        keep_identifier_versions=keep_versions,
+    )
     db2 = tax_utils.MultiLineageDB.load([taxout])
 
     assert set(db1) == set(db2)
@@ -3032,83 +4883,79 @@ def test_tax_prepare_2_csv_to_sql_empty_ranks_3(runtmp, keep_identifiers, keep_v
 
 def test_tax_prepare_3_db_to_csv_empty_ranks(runtmp):
     # SQL -> CSV; same assignments, with empty ranks
-    taxcsv = utils.get_test_data('tax/test-empty-ranks.taxonomy.csv')
-    taxdb = utils.get_test_data('tax/test-empty-ranks.taxonomy.db')
-    taxout = runtmp.output('out.csv')
+    taxcsv = utils.get_test_data("tax/test-empty-ranks.taxonomy.csv")
+    taxdb = utils.get_test_data("tax/test-empty-ranks.taxonomy.db")
+    taxout = runtmp.output("out.csv")
 
-    runtmp.run_sourmash('tax', 'prepare', '-t', taxdb,
-                        '-o', taxout, '-F', 'csv')
+    runtmp.run_sourmash("tax", "prepare", "-t", taxdb, "-o", taxout, "-F", "csv")
     assert os.path.exists(taxout)
     with open(taxout) as fp:
         print(fp.read())
 
-    db1 = tax_utils.MultiLineageDB.load([taxcsv],
-                                        keep_full_identifiers=False,
-                                        keep_identifier_versions=False)
+    db1 = tax_utils.MultiLineageDB.load(
+        [taxcsv], keep_full_identifiers=False, keep_identifier_versions=False
+    )
 
     db2 = tax_utils.MultiLineageDB.load([taxout])
-    db3 = tax_utils.MultiLineageDB.load([taxdb],
-                                        keep_full_identifiers=False,
-                                        keep_identifier_versions=False)
+    db3 = tax_utils.MultiLineageDB.load(
+        [taxdb], keep_full_identifiers=False, keep_identifier_versions=False
+    )
     assert set(db1) == set(db2)
     assert set(db1) == set(db3)
 
 
 def test_tax_prepare_3_db_to_csv_empty_ranks_2(runtmp):
     # SQL -> CSV; same assignments, with empty ranks
-    taxcsv = utils.get_test_data('tax/test-empty-ranks-2.taxonomy.csv')
-    taxdb = utils.get_test_data('tax/test-empty-ranks-2.taxonomy.db')
-    taxout = runtmp.output('out.csv')
+    taxcsv = utils.get_test_data("tax/test-empty-ranks-2.taxonomy.csv")
+    taxdb = utils.get_test_data("tax/test-empty-ranks-2.taxonomy.db")
+    taxout = runtmp.output("out.csv")
 
-    runtmp.run_sourmash('tax', 'prepare', '-t', taxdb,
-                        '-o', taxout, '-F', 'csv')
+    runtmp.run_sourmash("tax", "prepare", "-t", taxdb, "-o", taxout, "-F", "csv")
     assert os.path.exists(taxout)
     with open(taxout) as fp:
         print(fp.read())
 
-    db1 = tax_utils.MultiLineageDB.load([taxcsv],
-                                        keep_full_identifiers=False,
-                                        keep_identifier_versions=False)
+    db1 = tax_utils.MultiLineageDB.load(
+        [taxcsv], keep_full_identifiers=False, keep_identifier_versions=False
+    )
 
     db2 = tax_utils.MultiLineageDB.load([taxout])
-    db3 = tax_utils.MultiLineageDB.load([taxdb],
-                                        keep_full_identifiers=False,
-                                        keep_identifier_versions=False)
+    db3 = tax_utils.MultiLineageDB.load(
+        [taxdb], keep_full_identifiers=False, keep_identifier_versions=False
+    )
     assert set(db1) == set(db2)
     assert set(db1) == set(db3)
 
 
 def test_tax_prepare_3_db_to_csv_empty_ranks_3(runtmp):
     # SQL -> CSV; same assignments, with empty ranks
-    taxcsv = utils.get_test_data('tax/test-empty-ranks-3.taxonomy.csv')
-    taxdb = utils.get_test_data('tax/test-empty-ranks-3.taxonomy.db')
-    taxout = runtmp.output('out.csv')
+    taxcsv = utils.get_test_data("tax/test-empty-ranks-3.taxonomy.csv")
+    taxdb = utils.get_test_data("tax/test-empty-ranks-3.taxonomy.db")
+    taxout = runtmp.output("out.csv")
 
-    runtmp.run_sourmash('tax', 'prepare', '-t', taxdb,
-                        '-o', taxout, '-F', 'csv')
+    runtmp.run_sourmash("tax", "prepare", "-t", taxdb, "-o", taxout, "-F", "csv")
     assert os.path.exists(taxout)
     with open(taxout) as fp:
         print(fp.read())
 
-    db1 = tax_utils.MultiLineageDB.load([taxcsv],
-                                        keep_full_identifiers=False,
-                                        keep_identifier_versions=False)
+    db1 = tax_utils.MultiLineageDB.load(
+        [taxcsv], keep_full_identifiers=False, keep_identifier_versions=False
+    )
 
     db2 = tax_utils.MultiLineageDB.load([taxout])
-    db3 = tax_utils.MultiLineageDB.load([taxdb],
-                                        keep_full_identifiers=False,
-                                        keep_identifier_versions=False)
+    db3 = tax_utils.MultiLineageDB.load(
+        [taxdb], keep_full_identifiers=False, keep_identifier_versions=False
+    )
     assert set(db1) == set(db2)
     assert set(db1) == set(db3)
 
 
 def test_tax_prepare_sqlite_lineage_version(runtmp):
     # test bad sourmash_internals version for SqliteLineage
-    taxcsv = utils.get_test_data('tax/test.taxonomy.csv')
-    taxout = runtmp.output('out.db')
+    taxcsv = utils.get_test_data("tax/test.taxonomy.csv")
+    taxout = runtmp.output("out.db")
 
-    runtmp.run_sourmash('tax', 'prepare', '-t', taxcsv,
-                        '-o', taxout, '-F', 'sql')
+    runtmp.run_sourmash("tax", "prepare", "-t", taxcsv, "-o", taxout, "-F", "sql")
     assert os.path.exists(taxout)
 
     # set bad version
@@ -3120,206 +4967,208 @@ def test_tax_prepare_sqlite_lineage_version(runtmp):
     conn.close()
 
     with pytest.raises(IndexNotSupported):
-        db = tax_utils.MultiLineageDB.load([taxout])
+        tax_utils.MultiLineageDB.load([taxout])
 
 
 def test_tax_prepare_sqlite_no_lineage():
     # no lineage table at all
-    sqldb = utils.get_test_data('sqlite/index.sqldb')
+    sqldb = utils.get_test_data("sqlite/index.sqldb")
 
     with pytest.raises(ValueError):
-        db = tax_utils.MultiLineageDB.load([sqldb])
+        tax_utils.MultiLineageDB.load([sqldb])
 
 
 def test_tax_grep_exists(runtmp):
     # test that 'tax grep' exists
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('tax', 'grep')
+        runtmp.sourmash("tax", "grep")
 
     err = runtmp.last_result.err
-    assert 'usage:' in err
+    assert "usage:" in err
 
 
 def test_tax_grep_search_shew(runtmp):
     # test 'tax grep Shew'
-    taxfile = utils.get_test_data('tax/test.taxonomy.csv')
+    taxfile = utils.get_test_data("tax/test.taxonomy.csv")
 
-    runtmp.sourmash('tax', 'grep', 'Shew', '-t', taxfile)
+    runtmp.sourmash("tax", "grep", "Shew", "-t", taxfile)
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
 
-    lines = [ x.strip() for x in out.splitlines() ]
-    lines = [ x.split(',') for x in lines ]
-    assert lines[0][0] == 'ident'
-    assert lines[1][0] == 'GCF_000017325.1'
-    assert lines[2][0] == 'GCF_000021665.1'
+    lines = [x.strip() for x in out.splitlines()]
+    lines = [x.split(",") for x in lines]
+    assert lines[0][0] == "ident"
+    assert lines[1][0] == "GCF_000017325.1"
+    assert lines[2][0] == "GCF_000021665.1"
     assert len(lines) == 3
 
     assert "searching 1 taxonomy files for 'Shew'" in err
-    assert 'found 2 matches; saved identifiers to picklist' in err
+    assert "found 2 matches; saved identifiers to picklist" in err
 
 
 def test_tax_grep_search_shew_out(runtmp):
     # test 'tax grep Shew', save result to a file
-    taxfile = utils.get_test_data('tax/test.taxonomy.csv')
+    taxfile = utils.get_test_data("tax/test.taxonomy.csv")
 
-    runtmp.sourmash('tax', 'grep', 'Shew', '-t', taxfile, '-o', 'pick.csv')
+    runtmp.sourmash("tax", "grep", "Shew", "-t", taxfile, "-o", "pick.csv")
 
     err = runtmp.last_result.err
 
-    out = Path(runtmp.output('pick.csv')).read_text()
-    lines = [ x.strip() for x in out.splitlines() ]
-    lines = [ x.split(',') for x in lines ]
-    assert lines[0][0] == 'ident'
-    assert lines[1][0] == 'GCF_000017325.1'
-    assert lines[2][0] == 'GCF_000021665.1'
+    out = Path(runtmp.output("pick.csv")).read_text()
+    lines = [x.strip() for x in out.splitlines()]
+    lines = [x.split(",") for x in lines]
+    assert lines[0][0] == "ident"
+    assert lines[1][0] == "GCF_000017325.1"
+    assert lines[2][0] == "GCF_000021665.1"
     assert len(lines) == 3
 
     assert "searching 1 taxonomy files for 'Shew'" in err
-    assert 'found 2 matches; saved identifiers to picklist' in err
+    assert "found 2 matches; saved identifiers to picklist" in err
 
 
 def test_tax_grep_search_shew_sqldb_out(runtmp):
     # test 'tax grep Shew' on a sqldb, save result to a file
-    taxfile = utils.get_test_data('tax/test.taxonomy.db')
+    taxfile = utils.get_test_data("tax/test.taxonomy.db")
 
-    runtmp.sourmash('tax', 'grep', 'Shew', '-t', taxfile, '-o', 'pick.csv')
+    runtmp.sourmash("tax", "grep", "Shew", "-t", taxfile, "-o", "pick.csv")
 
     err = runtmp.last_result.err
 
-    out = Path(runtmp.output('pick.csv')).read_text()
-    lines = [ x.strip() for x in out.splitlines() ]
-    lines = [ x.split(',') for x in lines ]
-    assert lines[0][0] == 'ident'
-    assert lines[1][0] == 'GCF_000017325'
-    assert lines[2][0] == 'GCF_000021665'
+    out = Path(runtmp.output("pick.csv")).read_text()
+    lines = [x.strip() for x in out.splitlines()]
+    lines = [x.split(",") for x in lines]
+    assert lines[0][0] == "ident"
+    assert lines[1][0] == "GCF_000017325"
+    assert lines[2][0] == "GCF_000021665"
     assert len(lines) == 3
 
     assert "searching 1 taxonomy files for 'Shew'" in err
-    assert 'found 2 matches; saved identifiers to picklist' in err
+    assert "found 2 matches; saved identifiers to picklist" in err
 
 
 def test_tax_grep_search_shew_lowercase(runtmp):
     # test 'tax grep shew' (lowercase), save result to a file
-    taxfile = utils.get_test_data('tax/test.taxonomy.csv')
+    taxfile = utils.get_test_data("tax/test.taxonomy.csv")
 
-    runtmp.sourmash('tax', 'grep', 'shew', '-t', taxfile, '-o', 'pick.csv')
+    runtmp.sourmash("tax", "grep", "shew", "-t", taxfile, "-o", "pick.csv")
 
     err = runtmp.last_result.err
     assert "searching 1 taxonomy files for 'shew'" in err
-    assert 'found 0 matches; saved identifiers to picklist' in err
+    assert "found 0 matches; saved identifiers to picklist" in err
 
-    runtmp.sourmash('tax', 'grep', '-i', 'shew',
-                    '-t', taxfile, '-o', 'pick.csv')
+    runtmp.sourmash("tax", "grep", "-i", "shew", "-t", taxfile, "-o", "pick.csv")
 
     err = runtmp.last_result.err
     assert "searching 1 taxonomy files for 'shew'" in err
-    assert 'found 2 matches; saved identifiers to picklist' in err
-
-    out = Path(runtmp.output('pick.csv')).read_text()
-    lines = [ x.strip() for x in out.splitlines() ]
-    lines = [ x.split(',') for x in lines ]
-    assert lines[0][0] == 'ident'
-    assert lines[1][0] == 'GCF_000017325.1'
-    assert lines[2][0] == 'GCF_000021665.1'
+    assert "found 2 matches; saved identifiers to picklist" in err
+
+    out = Path(runtmp.output("pick.csv")).read_text()
+    lines = [x.strip() for x in out.splitlines()]
+    lines = [x.split(",") for x in lines]
+    assert lines[0][0] == "ident"
+    assert lines[1][0] == "GCF_000017325.1"
+    assert lines[2][0] == "GCF_000021665.1"
     assert len(lines) == 3
 
 
 def test_tax_grep_search_shew_out_use_picklist(runtmp):
     # test 'tax grep Shew', output to a picklist, use picklist
-    taxfile = utils.get_test_data('tax/test.taxonomy.csv')
-    dbfile = utils.get_test_data('tax/gtdb-tax-grep.sigs.zip')
+    taxfile = utils.get_test_data("tax/test.taxonomy.csv")
+    dbfile = utils.get_test_data("tax/gtdb-tax-grep.sigs.zip")
 
-    runtmp.sourmash('tax', 'grep', 'Shew', '-t', taxfile, '-o', 'pick.csv')
+    runtmp.sourmash("tax", "grep", "Shew", "-t", taxfile, "-o", "pick.csv")
 
-    runtmp.sourmash('sig', 'cat', dbfile, '--picklist',
-                    'pick.csv:ident:ident', '-o', 'pick-out.zip')
+    runtmp.sourmash(
+        "sig", "cat", dbfile, "--picklist", "pick.csv:ident:ident", "-o", "pick-out.zip"
+    )
 
     all_sigs = sourmash.load_file_as_index(dbfile)
     assert len(all_sigs) == 3
 
-    pick_sigs = sourmash.load_file_as_index(runtmp.output('pick-out.zip'))
+    pick_sigs = sourmash.load_file_as_index(runtmp.output("pick-out.zip"))
     assert len(pick_sigs) == 2
 
-    names = [ ss.name.split()[0] for ss in pick_sigs.signatures() ]
+    names = [ss.name.split()[0] for ss in pick_sigs.signatures()]
     assert len(names) == 2
-    assert 'GCF_000017325.1' in names
-    assert 'GCF_000021665.1' in names
+    assert "GCF_000017325.1" in names
+    assert "GCF_000021665.1" in names
 
 
 def test_tax_grep_search_shew_invert(runtmp):
     # test 'tax grep -v Shew'
-    taxfile = utils.get_test_data('tax/test.taxonomy.csv')
+    taxfile = utils.get_test_data("tax/test.taxonomy.csv")
 
-    runtmp.sourmash('tax', 'grep', '-v', 'Shew', '-t', taxfile)
+    runtmp.sourmash("tax", "grep", "-v", "Shew", "-t", taxfile)
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
 
-    assert "-v/--invert-match specified; returning only lineages that do not match." in err
+    assert (
+        "-v/--invert-match specified; returning only lineages that do not match." in err
+    )
 
-    lines = [ x.strip() for x in out.splitlines() ]
-    lines = [ x.split(',') for x in lines ]
-    assert lines[0][0] == 'ident'
-    assert lines[1][0] == 'GCF_001881345.1'
-    assert lines[2][0] == 'GCF_003471795.1'
+    lines = [x.strip() for x in out.splitlines()]
+    lines = [x.split(",") for x in lines]
+    assert lines[0][0] == "ident"
+    assert lines[1][0] == "GCF_001881345.1"
+    assert lines[2][0] == "GCF_003471795.1"
     assert len(lines) == 5
 
     assert "searching 1 taxonomy files for 'Shew'" in err
-    assert 'found 4 matches; saved identifiers to picklist' in err
+    assert "found 4 matches; saved identifiers to picklist" in err
 
-    all_names = set([ x[0] for x in lines ])
-    assert 'GCF_000017325.1' not in all_names
-    assert 'GCF_000021665.1' not in all_names
+    all_names = set([x[0] for x in lines])
+    assert "GCF_000017325.1" not in all_names
+    assert "GCF_000021665.1" not in all_names
 
 
 def test_tax_grep_search_shew_invert_select_phylum(runtmp):
     # test 'tax grep -v Shew -r phylum'
-    taxfile = utils.get_test_data('tax/test.taxonomy.csv')
+    taxfile = utils.get_test_data("tax/test.taxonomy.csv")
 
-    runtmp.sourmash('tax', 'grep', '-v', 'Shew', '-t', taxfile, '-r', 'phylum')
+    runtmp.sourmash("tax", "grep", "-v", "Shew", "-t", taxfile, "-r", "phylum")
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
 
-    assert "-v/--invert-match specified; returning only lineages that do not match." in err
+    assert (
+        "-v/--invert-match specified; returning only lineages that do not match." in err
+    )
     assert "limiting matches to phylum"
 
-    lines = [ x.strip() for x in out.splitlines() ]
-    lines = [ x.split(',') for x in lines ]
-    assert lines[0][0] == 'ident'
+    lines = [x.strip() for x in out.splitlines()]
+    lines = [x.split(",") for x in lines]
+    assert lines[0][0] == "ident"
     assert len(lines) == 7
 
     assert "searching 1 taxonomy files for 'Shew'" in err
-    assert 'found 6 matches; saved identifiers to picklist' in err
+    assert "found 6 matches; saved identifiers to picklist" in err
 
-    all_names = set([ x[0] for x in lines ])
-    assert 'GCF_000017325.1' in all_names
-    assert 'GCF_000021665.1' in all_names
+    all_names = set([x[0] for x in lines])
+    assert "GCF_000017325.1" in all_names
+    assert "GCF_000021665.1" in all_names
 
 
 def test_tax_grep_search_shew_invert_select_bad_rank(runtmp):
     # test 'tax grep -v Shew -r badrank' - should fail
-    taxfile = utils.get_test_data('tax/test.taxonomy.csv')
+    taxfile = utils.get_test_data("tax/test.taxonomy.csv")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('tax', 'grep', '-v', 'Shew', '-t', taxfile,
-                        '-r', 'badrank')
+        runtmp.sourmash("tax", "grep", "-v", "Shew", "-t", taxfile, "-r", "badrank")
 
-    out = runtmp.last_result.out
     err = runtmp.last_result.err
 
     print(err)
-    assert 'error: argument -r/--rank: invalid choice:' in err
+    assert "error: argument -r/--rank: invalid choice:" in err
 
 
 def test_tax_grep_search_shew_count(runtmp):
     # test 'tax grep Shew --count'
-    taxfile = utils.get_test_data('tax/test.taxonomy.csv')
+    taxfile = utils.get_test_data("tax/test.taxonomy.csv")
 
-    runtmp.sourmash('tax', 'grep', 'Shew', '-t', taxfile, '-c')
+    runtmp.sourmash("tax", "grep", "Shew", "-t", taxfile, "-c")
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -3327,19 +5176,17 @@ def test_tax_grep_search_shew_count(runtmp):
     assert not out.strip()
 
     assert "searching 1 taxonomy files for 'Shew'" in err
-    assert not 'found 2 matches; saved identifiers to picklist' in err
+    assert "found 2 matches; saved identifiers to picklist" not in err
 
 
 def test_tax_grep_multiple_csv(runtmp):
     # grep on multiple CSVs
-    tax1 = utils.get_test_data('tax/test.taxonomy.csv')
-    tax2 = utils.get_test_data('tax/protozoa_genbank_lineage.csv')
+    tax1 = utils.get_test_data("tax/test.taxonomy.csv")
+    tax2 = utils.get_test_data("tax/protozoa_genbank_lineage.csv")
 
-    taxout = runtmp.output('out.csv')
+    taxout = runtmp.output("out.csv")
 
-    runtmp.sourmash('tax', 'grep', "Toxo|Gamma",
-                    '-t', tax1, tax2,
-                    '-o', taxout)
+    runtmp.sourmash("tax", "grep", "Toxo|Gamma", "-t", tax1, tax2, "-o", taxout)
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -3350,27 +5197,37 @@ def test_tax_grep_multiple_csv(runtmp):
     lines = Path(taxout).read_text().splitlines()
     assert len(lines) == 5
 
-    names = set([ x.split(',')[0] for x in lines ])
-    assert 'GCA_000256725' in names
-    assert 'GCF_000017325.1' in names
-    assert 'GCF_000021665.1' in names
-    assert 'GCF_001881345.1' in names
+    names = set([x.split(",")[0] for x in lines])
+    assert "GCA_000256725" in names
+    assert "GCF_000017325.1" in names
+    assert "GCF_000021665.1" in names
+    assert "GCF_001881345.1" in names
 
 
 def test_tax_grep_multiple_csv_empty_force(runtmp):
     # grep on multiple CSVs, one empty, with --force
-    tax1 = utils.get_test_data('tax/test.taxonomy.csv')
-    tax2 = utils.get_test_data('tax/protozoa_genbank_lineage.csv')
-    tax_empty = runtmp.output('t.csv')
+    tax1 = utils.get_test_data("tax/test.taxonomy.csv")
+    tax2 = utils.get_test_data("tax/protozoa_genbank_lineage.csv")
+    tax_empty = runtmp.output("t.csv")
 
-    taxout = runtmp.output('out.csv')
+    taxout = runtmp.output("out.csv")
     with open(tax_empty, "w") as fp:
         fp.write("")
     print("t_csv: ", tax_empty)
 
-    runtmp.sourmash('tax', 'grep', "Toxo|Gamma",
-                    '-t', tax1, tax2, '-t', tax_empty,
-                    '-o', taxout, '--force')
+    runtmp.sourmash(
+        "tax",
+        "grep",
+        "Toxo|Gamma",
+        "-t",
+        tax1,
+        tax2,
+        "-t",
+        tax_empty,
+        "-o",
+        taxout,
+        "--force",
+    )
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -3381,22 +5238,20 @@ def test_tax_grep_multiple_csv_empty_force(runtmp):
     lines = Path(taxout).read_text().splitlines()
     assert len(lines) == 5
 
-    names = set([ x.split(',')[0] for x in lines ])
-    assert 'GCA_000256725' in names
-    assert 'GCF_000017325.1' in names
-    assert 'GCF_000021665.1' in names
-    assert 'GCF_001881345.1' in names
+    names = set([x.split(",")[0] for x in lines])
+    assert "GCA_000256725" in names
+    assert "GCF_000017325.1" in names
+    assert "GCF_000021665.1" in names
+    assert "GCF_001881345.1" in names
 
 
 def test_tax_grep_duplicate_csv(runtmp):
     # grep on duplicates => should collapse to uniques on identifiers
-    tax1 = utils.get_test_data('tax/test.taxonomy.csv')
+    tax1 = utils.get_test_data("tax/test.taxonomy.csv")
 
-    taxout = runtmp.output('out.csv')
+    taxout = runtmp.output("out.csv")
 
-    runtmp.sourmash('tax', 'grep', "Gamma",
-                    '-t', tax1, tax1,
-                    '-o', taxout)
+    runtmp.sourmash("tax", "grep", "Gamma", "-t", tax1, tax1, "-o", taxout)
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -3407,20 +5262,19 @@ def test_tax_grep_duplicate_csv(runtmp):
     lines = Path(taxout).read_text().splitlines()
     assert len(lines) == 4
 
-    names = set([ x.split(',')[0] for x in lines ])
-    assert 'GCF_000017325.1' in names
-    assert 'GCF_000021665.1' in names
-    assert 'GCF_001881345.1' in names
+    names = set([x.split(",")[0] for x in lines])
+    assert "GCF_000017325.1" in names
+    assert "GCF_000021665.1" in names
+    assert "GCF_001881345.1" in names
 
 
 def test_tax_summarize(runtmp):
     # test basic operation with summarize
-    taxfile = utils.get_test_data('tax/test.taxonomy.csv')
+    taxfile = utils.get_test_data("tax/test.taxonomy.csv")
 
-    runtmp.sourmash('tax', 'summarize', taxfile)
+    runtmp.sourmash("tax", "summarize", taxfile)
 
     out = runtmp.last_result.out
-    err = runtmp.last_result.err
 
     assert "number of distinct taxonomic lineages: 6" in out
     assert "rank superkingdom:        1 distinct taxonomic lineages" in out
@@ -3434,13 +5288,12 @@ def test_tax_summarize(runtmp):
 
 def test_tax_summarize_multiple(runtmp):
     # test basic operation with summarize on multiple files
-    tax1 = utils.get_test_data('tax/bacteria_refseq_lineage.csv')
-    tax2 = utils.get_test_data('tax/protozoa_genbank_lineage.csv')
+    tax1 = utils.get_test_data("tax/bacteria_refseq_lineage.csv")
+    tax2 = utils.get_test_data("tax/protozoa_genbank_lineage.csv")
 
-    runtmp.sourmash('tax', 'summarize', tax1, tax2)
+    runtmp.sourmash("tax", "summarize", tax1, tax2)
 
     out = runtmp.last_result.out
-    err = runtmp.last_result.err
 
     assert "number of distinct taxonomic lineages: 6" in out
     assert "rank superkingdom:        2 distinct taxonomic lineages" in out
@@ -3454,12 +5307,11 @@ def test_tax_summarize_multiple(runtmp):
 
 def test_tax_summarize_empty_line(runtmp):
     # test basic operation with summarize on a file w/empty line
-    taxfile = utils.get_test_data('tax/test-empty-line.taxonomy.csv')
+    taxfile = utils.get_test_data("tax/test-empty-line.taxonomy.csv")
 
-    runtmp.sourmash('tax', 'summarize', taxfile)
+    runtmp.sourmash("tax", "summarize", taxfile)
 
     out = runtmp.last_result.out
-    err = runtmp.last_result.err
 
     assert "number of distinct taxonomic lineages: 6" in out
     assert "rank superkingdom:        1 distinct taxonomic lineages" in out
@@ -3473,21 +5325,20 @@ def test_tax_summarize_empty_line(runtmp):
 
 def test_tax_summarize_empty(runtmp):
     # test failure on empty file
-    taxfile = runtmp.output('no-exist')
+    taxfile = runtmp.output("no-exist")
 
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('tax', 'summarize', taxfile)
+        runtmp.sourmash("tax", "summarize", taxfile)
 
-    out = runtmp.last_result.out
     err = runtmp.last_result.err
     assert "ERROR while loading taxonomies" in err
 
 
 def test_tax_summarize_csv(runtmp):
     # test basic operation w/csv output
-    taxfile = utils.get_test_data('tax/test.taxonomy.csv')
+    taxfile = utils.get_test_data("tax/test.taxonomy.csv")
 
-    runtmp.sourmash('tax', 'summarize', taxfile, '-o', 'ranks.csv')
+    runtmp.sourmash("tax", "summarize", taxfile, "-o", "ranks.csv")
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -3495,28 +5346,30 @@ def test_tax_summarize_csv(runtmp):
     assert "number of distinct taxonomic lineages: 6" in out
     assert "saved 18 lineage counts to 'ranks.csv'" in err
 
-    csv_out = runtmp.output('ranks.csv')
+    csv_out = runtmp.output("ranks.csv")
 
     with sourmash_args.FileInputCSV(csv_out) as r:
         # count number across ranks as a cheap consistency check
         c = Counter()
         for row in r:
-            val = row['lineage_count']
+            val = row["lineage_count"]
             c[val] += 1
 
-        assert c['3'] == 7
-        assert c['2'] == 5
-        assert c['1'] == 5
+        assert c["3"] == 7
+        assert c["2"] == 5
+        assert c["1"] == 5
 
 
 def test_tax_summarize_on_annotate(runtmp):
     # test summarize on output of annotate basics
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.taxonomy.csv")
     csvout = runtmp.output("test1.gather.with-lineages.csv")
     out_dir = os.path.dirname(csvout)
 
-    runtmp.run_sourmash('tax', 'annotate', '--gather-csv', g_csv, '--taxonomy-csv', tax, '-o', out_dir)
+    runtmp.run_sourmash(
+        "tax", "annotate", "--gather-csv", g_csv, "--taxonomy-csv", tax, "-o", out_dir
+    )
 
     print(runtmp.last_result.status)
     print(runtmp.last_result.out)
@@ -3527,7 +5380,7 @@ def test_tax_summarize_on_annotate(runtmp):
 
     # so far so good - now see if we can run summarize!
 
-    runtmp.run_sourmash('tax', 'summarize', csvout)
+    runtmp.run_sourmash("tax", "summarize", csvout)
     out = runtmp.last_result.out
     err = runtmp.last_result.err
 
@@ -3546,9 +5399,9 @@ def test_tax_summarize_on_annotate(runtmp):
 
 def test_tax_summarize_strain_csv(runtmp):
     # test basic operation w/csv output on taxonomy with strains
-    taxfile = utils.get_test_data('tax/test-strain.taxonomy.csv')
+    taxfile = utils.get_test_data("tax/test-strain.taxonomy.csv")
 
-    runtmp.sourmash('tax', 'summarize', taxfile, '-o', 'ranks.csv')
+    runtmp.sourmash("tax", "summarize", taxfile, "-o", "ranks.csv")
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -3556,38 +5409,38 @@ def test_tax_summarize_strain_csv(runtmp):
     assert "number of distinct taxonomic lineages: 6" in out
     assert "saved 24 lineage counts to 'ranks.csv'" in err
 
-    csv_out = runtmp.output('ranks.csv')
+    csv_out = runtmp.output("ranks.csv")
 
     with sourmash_args.FileInputCSV(csv_out) as r:
         # count number across ranks as a cheap consistency check
         c = Counter()
         for row in r:
             print(row)
-            val = row['lineage_count']
+            val = row["lineage_count"]
             c[val] += 1
 
         print(list(c.most_common()))
 
-        assert c['3'] == 7
-        assert c['2'] == 5
-        assert c['6'] == 1
-        assert c['1'] == 11
+        assert c["3"] == 7
+        assert c["2"] == 5
+        assert c["6"] == 1
+        assert c["1"] == 11
 
 
 def test_tax_summarize_strain_csv_with_lineages(runtmp):
     # test basic operation w/csv output on lineages-style file w/strain csv
-    taxfile = utils.get_test_data('tax/test-strain.taxonomy.csv')
-    lineage_csv = runtmp.output('lin-with-strains.csv')
+    taxfile = utils.get_test_data("tax/test-strain.taxonomy.csv")
+    lineage_csv = runtmp.output("lin-with-strains.csv")
 
     taxdb = tax_utils.LineageDB.load(taxfile)
-    with open(lineage_csv, 'w', newline="") as fp:
+    with open(lineage_csv, "w", newline="") as fp:
         w = csv.writer(fp)
-        w.writerow(['name', 'lineage'])
+        w.writerow(["name", "lineage"])
         for k, v in taxdb.items():
             linstr = lca_utils.display_lineage(v)
             w.writerow([k, linstr])
 
-    runtmp.sourmash('tax', 'summarize', lineage_csv, '-o', 'ranks.csv')
+    runtmp.sourmash("tax", "summarize", lineage_csv, "-o", "ranks.csv")
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -3595,40 +5448,40 @@ def test_tax_summarize_strain_csv_with_lineages(runtmp):
     assert "number of distinct taxonomic lineages: 6" in out
     assert "saved 24 lineage counts to" in err
 
-    csv_out = runtmp.output('ranks.csv')
+    csv_out = runtmp.output("ranks.csv")
 
     with sourmash_args.FileInputCSV(csv_out) as r:
         # count number across ranks as a cheap consistency check
         c = Counter()
         for row in r:
             print(row)
-            val = row['lineage_count']
+            val = row["lineage_count"]
             c[val] += 1
 
         print(list(c.most_common()))
 
-        assert c['3'] == 7
-        assert c['2'] == 5
-        assert c['6'] == 1
-        assert c['1'] == 11
+        assert c["3"] == 7
+        assert c["2"] == 5
+        assert c["6"] == 1
+        assert c["1"] == 11
 
 
 def test_tax_summarize_LINS(runtmp):
     # test basic operation w/LINs
-    taxfile = utils.get_test_data('tax/test.LIN-taxonomy.csv')
-    lineage_csv = runtmp.output('annotated-lin.csv')
+    taxfile = utils.get_test_data("tax/test.LIN-taxonomy.csv")
+    lineage_csv = runtmp.output("annotated-lin.csv")
 
     taxdb = tax_utils.LineageDB.load(taxfile, lins=True)
-    with open(lineage_csv, 'w', newline="") as fp:
+    with open(lineage_csv, "w", newline="") as fp:
         w = csv.writer(fp)
-        w.writerow(['name', 'lineage'])
+        w.writerow(["name", "lineage"])
         for k, v in taxdb.items():
             lin = tax_utils.LINLineageInfo(lineage=v)
             linstr = lin.display_lineage(truncate_empty=False)
             print(linstr)
             w.writerow([k, linstr])
 
-    runtmp.sourmash('tax', 'summarize', lineage_csv, '-o', 'ranks.csv', '--lins')
+    runtmp.sourmash("tax", "summarize", lineage_csv, "-o", "ranks.csv", "--lins")
 
     out = runtmp.last_result.out
     err = runtmp.last_result.err
@@ -3639,137 +5492,209 @@ def test_tax_summarize_LINS(runtmp):
     assert "number of distinct taxonomic lineages: 6" in out
     assert "saved 91 lineage counts to" in err
 
-    csv_out = runtmp.output('ranks.csv')
+    csv_out = runtmp.output("ranks.csv")
 
     with sourmash_args.FileInputCSV(csv_out) as r:
-         # count number across ranks as a cheap consistency check
+        # count number across ranks as a cheap consistency check
         c = Counter()
         for row in r:
             print(row)
-            val = row['lineage_count']
+            val = row["lineage_count"]
             c[val] += 1
 
         print(list(c.most_common()))
 
-        assert c['1'] == 77
-        assert c['2'] == 1
-        assert c['3'] == 11
-        assert c['4'] == 2
+        assert c["1"] == 77
+        assert c["2"] == 1
+        assert c["3"] == 11
+        assert c["4"] == 2
 
 
 def test_metagenome_LIN(runtmp):
     # test basic metagenome with LIN taxonomy
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-    tax = utils.get_test_data('tax/test.LIN-taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+    tax = utils.get_test_data("tax/test.LIN-taxonomy.csv")
 
-    c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax, '--lins')
+    c.run_sourmash("tax", "metagenome", "-g", g_csv, "--taxonomy-csv", tax, "--lins")
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert 'query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank' in c.last_result.out
+    assert (
+        "query_name,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
     # 0th rank/position
     assert "test1,0,0.089,1,md5,test1.sig,0.057,444000,0.925,0" in c.last_result.out
     assert "test1,0,0.088,0,md5,test1.sig,0.058,442000,0.925,0" in c.last_result.out
     assert "test1,0,0.028,2,md5,test1.sig,0.016,138000,0.891,0" in c.last_result.out
-    assert "test1,0,0.796,unclassified,md5,test1.sig,0.869,3990000,,0" in c.last_result.out
+    assert (
+        "test1,0,0.796,unclassified,md5,test1.sig,0.869,3990000,,0" in c.last_result.out
+    )
     # 1st rank/position
     assert "test1,1,0.089,1;0,md5,test1.sig,0.057,444000,0.925,0" in c.last_result.out
     assert "test1,1,0.088,0;0,md5,test1.sig,0.058,442000,0.925,0" in c.last_result.out
     assert "test1,1,0.028,2;0,md5,test1.sig,0.016,138000,0.891,0" in c.last_result.out
-    assert "test1,1,0.796,unclassified,md5,test1.sig,0.869,3990000,,0" in c.last_result.out
+    assert (
+        "test1,1,0.796,unclassified,md5,test1.sig,0.869,3990000,,0" in c.last_result.out
+    )
     # 2nd rank/position
     assert "test1,2,0.088,0;0;0,md5,test1.sig,0.058,442000,0.925,0" in c.last_result.out
     assert "test1,2,0.078,1;0;0,md5,test1.sig,0.050,390000,0.921,0" in c.last_result.out
     assert "test1,2,0.028,2;0;0,md5,test1.sig,0.016,138000,0.891,0" in c.last_result.out
     assert "test1,2,0.011,1;0;1,md5,test1.sig,0.007,54000,0.864,0" in c.last_result.out
-    assert "test1,2,0.796,unclassified,md5,test1.sig,0.869,3990000,,0" in c.last_result.out
+    assert (
+        "test1,2,0.796,unclassified,md5,test1.sig,0.869,3990000,,0" in c.last_result.out
+    )
     # 19th rank/position
-    assert "test1,19,0.088,0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0,md5,test1.sig,0.058,442000,0.925,0" in c.last_result.out
-    assert "test1,19,0.078,1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0,md5,test1.sig,0.050,390000,0.921,0" in c.last_result.out
-    assert "test1,19,0.028,2;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0,md5,test1.sig,0.016,138000,0.891,0" in c.last_result.out
-    assert "test1,19,0.011,1;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0,md5,test1.sig,0.007,54000,0.864,0" in c.last_result.out
-    assert "test1,19,0.796,unclassified,md5,test1.sig,0.869,3990000,,0" in c.last_result.out
+    assert (
+        "test1,19,0.088,0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0,md5,test1.sig,0.058,442000,0.925,0"
+        in c.last_result.out
+    )
+    assert (
+        "test1,19,0.078,1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0,md5,test1.sig,0.050,390000,0.921,0"
+        in c.last_result.out
+    )
+    assert (
+        "test1,19,0.028,2;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0,md5,test1.sig,0.016,138000,0.891,0"
+        in c.last_result.out
+    )
+    assert (
+        "test1,19,0.011,1;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0,md5,test1.sig,0.007,54000,0.864,0"
+        in c.last_result.out
+    )
+    assert (
+        "test1,19,0.796,unclassified,md5,test1.sig,0.869,3990000,,0"
+        in c.last_result.out
+    )
 
 
 def test_metagenome_LIN_lingroups(runtmp):
     # test lingroups output
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.v450.csv')
-    tax = utils.get_test_data('tax/test.LIN-taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.v450.csv")
+    tax = utils.get_test_data("tax/test.LIN-taxonomy.csv")
 
     lg_file = runtmp.output("test.lg.csv")
-    with open(lg_file, 'w') as out:
-        out.write('lin,name\n')
-        out.write('0;0;0,lg1\n')
-        out.write('1;0;0,lg2\n')
-        out.write('2;0;0,lg3\n')
-        out.write('1;0;1,lg3\n')
+    with open(lg_file, "w") as out:
+        out.write("lin,name\n")
+        out.write("0;0;0,lg1\n")
+        out.write("1;0;0,lg2\n")
+        out.write("2;0;0,lg3\n")
+        out.write("1;0;1,lg3\n")
         # write a 19 so we can check the end
-        out.write('1;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0,lg4\n')
-
-    c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax,
-                   '--lins', '--lingroup', lg_file)
+        out.write("1;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0,lg4\n")
+
+    c.run_sourmash(
+        "tax",
+        "metagenome",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "--lins",
+        "--lingroup",
+        lg_file,
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert "Starting summarization up rank(s): 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0" in c.last_result.err
-    assert "Read 5 lingroup rows and found 5 distinct lingroup prefixes." in c.last_result.err
+    assert (
+        "Starting summarization up rank(s): 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0"
+        in c.last_result.err
+    )
+    assert (
+        "Read 5 lingroup rows and found 5 distinct lingroup prefixes."
+        in c.last_result.err
+    )
     assert "name	lin	percent_containment	num_bp_contained" in c.last_result.out
     assert "lg1	0;0;0	5.82	714000" in c.last_result.out
     assert "lg2	1;0;0	5.05	620000" in c.last_result.out
     assert "lg3	2;0;0	1.56	192000" in c.last_result.out
     assert "lg3	1;0;1	0.65	80000" in c.last_result.out
-    assert "lg4	1;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0	0.65	80000" in c.last_result.out
+    assert (
+        "lg4	1;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0	0.65	80000"
+        in c.last_result.out
+    )
 
 
 def test_metagenome_LIN_human_summary_no_lin_position(runtmp):
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.v450.csv')
-    tax = utils.get_test_data('tax/test.LIN-taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.v450.csv")
+    tax = utils.get_test_data("tax/test.LIN-taxonomy.csv")
 
-    c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax,
-                   '--lins', '-F', "human")
+    c.run_sourmash(
+        "tax", "metagenome", "-g", g_csv, "--taxonomy-csv", tax, "--lins", "-F", "human"
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert "Starting summarization up rank(s): 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0" in c.last_result.err
+    assert (
+        "Starting summarization up rank(s): 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0"
+        in c.last_result.err
+    )
     assert "sample name    proportion   cANI   lineage" in c.last_result.out
     assert "-----------    ----------   ----   -------" in c.last_result.out
     assert "test1             86.9%     -      unclassified" in c.last_result.out
-    assert "test1              5.8%     92.5%  0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0" in c.last_result.out
-    assert "test1              5.0%     92.1%  1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0" in c.last_result.out
-    assert "test1              1.6%     89.1%  2;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0" in c.last_result.out
-    assert "test1              0.7%     86.4%  1;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0" in c.last_result.out
+    assert (
+        "test1              5.8%     92.5%  0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0"
+        in c.last_result.out
+    )
+    assert (
+        "test1              5.0%     92.1%  1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0"
+        in c.last_result.out
+    )
+    assert (
+        "test1              1.6%     89.1%  2;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0"
+        in c.last_result.out
+    )
+    assert (
+        "test1              0.7%     86.4%  1;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0"
+        in c.last_result.out
+    )
 
 
 def test_metagenome_LIN_human_summary_lin_position_5(runtmp):
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.v450.csv')
-    tax = utils.get_test_data('tax/test.LIN-taxonomy.csv')
-
-    c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax,
-                   '--lins', '-F', "human", '--lin-position', '5')
+    g_csv = utils.get_test_data("tax/test1.gather.v450.csv")
+    tax = utils.get_test_data("tax/test.LIN-taxonomy.csv")
+
+    c.run_sourmash(
+        "tax",
+        "metagenome",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "--lins",
+        "-F",
+        "human",
+        "--lin-position",
+        "5",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert "Starting summarization up rank(s): 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0" in c.last_result.err
+    assert (
+        "Starting summarization up rank(s): 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0"
+        in c.last_result.err
+    )
     assert "sample name    proportion   cANI   lineage" in c.last_result.out
     assert "-----------    ----------   ----   -------" in c.last_result.out
     assert "test1             86.9%     -      unclassified" in c.last_result.out
@@ -3782,155 +5707,274 @@ def test_metagenome_LIN_human_summary_lin_position_5(runtmp):
 def test_metagenome_LIN_krona_lin_position_5(runtmp):
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.v450.csv')
-    tax = utils.get_test_data('tax/test.LIN-taxonomy.csv')
-
-    c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax,
-                   '--lins', '-F', "krona", '--lin-position', '5')
+    g_csv = utils.get_test_data("tax/test1.gather.v450.csv")
+    tax = utils.get_test_data("tax/test.LIN-taxonomy.csv")
+
+    c.run_sourmash(
+        "tax",
+        "metagenome",
+        "-g",
+        g_csv,
+        "--taxonomy-csv",
+        tax,
+        "--lins",
+        "-F",
+        "krona",
+        "--lin-position",
+        "5",
+    )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert "Starting summarization up rank(s): 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0" in c.last_result.err
+    assert (
+        "Starting summarization up rank(s): 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0"
+        in c.last_result.err
+    )
     assert "fraction	0	1	2	3	4	5" in c.last_result.out
     assert "0.08815317112086159	0	0	0	0	0	0" in c.last_result.out
     assert "0.07778220981252493	1	0	0	0	0	0" in c.last_result.out
     assert "0.027522935779816515	2	0	0	0	0	0" in c.last_result.out
     assert "0.010769844435580374	1	0	1	0	0	0" in c.last_result.out
-    assert "0.7957718388512166	unclassified	unclassified	unclassified	unclassified	unclassified	unclassified" in c.last_result.out
+    assert (
+        "0.7957718388512166	unclassified	unclassified	unclassified	unclassified	unclassified	unclassified"
+        in c.last_result.out
+    )
 
 
 def test_metagenome_LIN_krona_bad_rank(runtmp):
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.v450.csv')
-    tax = utils.get_test_data('tax/test.LIN-taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.v450.csv")
+    tax = utils.get_test_data("tax/test.LIN-taxonomy.csv")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax,
-                   '--lins', '-F', "krona", '--lin-position', 'strain')
+        c.run_sourmash(
+            "tax",
+            "metagenome",
+            "-g",
+            g_csv,
+            "--taxonomy-csv",
+            tax,
+            "--lins",
+            "-F",
+            "krona",
+            "--lin-position",
+            "strain",
+        )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status != 0
-    assert "Invalid '--rank'/'--position' input: 'strain'. '--lins' is specified. Rank must be an integer corresponding to a LIN position." in c.last_result.err
-
+    assert (
+        "Invalid '--rank'/'--position' input: 'strain'. '--lins' is specified. Rank must be an integer corresponding to a LIN position."
+        in c.last_result.err
+    )
 
 
 def test_metagenome_LIN_lingroups_empty_lg_file(runtmp):
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.v450.csv')
-    tax = utils.get_test_data('tax/test.LIN-taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.v450.csv")
+    tax = utils.get_test_data("tax/test.LIN-taxonomy.csv")
 
     lg_file = runtmp.output("test.lg.csv")
-    with open(lg_file, 'w') as out:
+    with open(lg_file, "w") as out:
         out.write("")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax,
-                   '--lins', '--lingroup', lg_file)
+        c.run_sourmash(
+            "tax",
+            "metagenome",
+            "-g",
+            g_csv,
+            "--taxonomy-csv",
+            tax,
+            "--lins",
+            "--lingroup",
+            lg_file,
+        )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status != 0
-    assert "Starting summarization up rank(s): 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0" in c.last_result.err
-    assert f"Cannot read lingroups from '{lg_file}'. Is file empty?" in c.last_result.err
+    assert (
+        "Starting summarization up rank(s): 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0"
+        in c.last_result.err
+    )
+    assert (
+        f"Cannot read lingroups from '{lg_file}'. Is file empty?" in c.last_result.err
+    )
 
 
 def test_metagenome_LIN_lingroups_bad_cli_inputs(runtmp):
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.v450.csv')
-    tax = utils.get_test_data('tax/test.LIN-taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.v450.csv")
+    tax = utils.get_test_data("tax/test.LIN-taxonomy.csv")
 
     lg_file = runtmp.output("test.lg.csv")
-    with open(lg_file, 'w') as out:
+    with open(lg_file, "w") as out:
         out.write("")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax,
-                   '--lins', '-F', "lingroup")
+        c.run_sourmash(
+            "tax",
+            "metagenome",
+            "-g",
+            g_csv,
+            "--taxonomy-csv",
+            tax,
+            "--lins",
+            "-F",
+            "lingroup",
+        )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status != 0
-    assert "Must provide lingroup csv via '--lingroup' in order to output a lingroup report." in c.last_result.err
+    assert (
+        "Must provide lingroup csv via '--lingroup' in order to output a lingroup report."
+        in c.last_result.err
+    )
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax, '-F', "lingroup")
+        c.run_sourmash(
+            "tax", "metagenome", "-g", g_csv, "--taxonomy-csv", tax, "-F", "lingroup"
+        )
     print(c.last_result.err)
     assert c.last_result.status != 0
-    assert "Must enable LIN taxonomy via '--lins' in order to use lingroups." in c.last_result.err
+    assert (
+        "Must enable LIN taxonomy via '--lins' in order to use lingroups."
+        in c.last_result.err
+    )
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax, '--lingroup', lg_file)
+        c.run_sourmash(
+            "tax",
+            "metagenome",
+            "-g",
+            g_csv,
+            "--taxonomy-csv",
+            tax,
+            "--lingroup",
+            lg_file,
+        )
     print(c.last_result.err)
     assert c.last_result.status != 0
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax, '--lins', '-F', 'bioboxes')
+        c.run_sourmash(
+            "tax",
+            "metagenome",
+            "-g",
+            g_csv,
+            "--taxonomy-csv",
+            tax,
+            "--lins",
+            "-F",
+            "bioboxes",
+        )
     print(c.last_result.err)
     assert c.last_result.status != 0
-    assert "ERROR: The following outputs are incompatible with '--lins': : bioboxes, kreport" in c.last_result.err
+    assert (
+        "ERROR: The following outputs are incompatible with '--lins': : bioboxes, kreport"
+        in c.last_result.err
+    )
 
 
 def test_metagenome_mult_outputs_stdout_fail(runtmp):
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.v450.csv')
-    tax = utils.get_test_data('tax/test.LIN-taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.v450.csv")
+    tax = utils.get_test_data("tax/test.LIN-taxonomy.csv")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax,
-                       '-F', "kreport", 'csv_summary')
+        c.run_sourmash(
+            "tax",
+            "metagenome",
+            "-g",
+            g_csv,
+            "--taxonomy-csv",
+            tax,
+            "-F",
+            "kreport",
+            "csv_summary",
+        )
 
     print(c.last_result.err)
     assert c.last_result.status != 0
-    assert f"Writing to stdout is incompatible with multiple output formats ['kreport', 'csv_summary']" in c.last_result.err
+    assert (
+        "Writing to stdout is incompatible with multiple output formats ['kreport', 'csv_summary']"
+        in c.last_result.err
+    )
 
 
 def test_genome_mult_outputs_stdout_fail(runtmp):
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.v450.csv')
-    tax = utils.get_test_data('tax/test.LIN-taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.v450.csv")
+    tax = utils.get_test_data("tax/test.LIN-taxonomy.csv")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('tax', 'genome', '-g', g_csv, '--taxonomy-csv', tax,
-                       '-F', "lineage_csv", 'csv_summary')
+        c.run_sourmash(
+            "tax",
+            "genome",
+            "-g",
+            g_csv,
+            "--taxonomy-csv",
+            tax,
+            "-F",
+            "lineage_csv",
+            "csv_summary",
+        )
 
     print(c.last_result.err)
     assert c.last_result.status != 0
-    assert f"Writing to stdout is incompatible with multiple output formats ['lineage_csv', 'csv_summary']" in c.last_result.err
+    assert (
+        "Writing to stdout is incompatible with multiple output formats ['lineage_csv', 'csv_summary']"
+        in c.last_result.err
+    )
 
 
 def test_metagenome_LIN_lingroups_lg_only_header(runtmp):
     c = runtmp
 
-    g_csv = utils.get_test_data('tax/test1.gather.v450.csv')
-    tax = utils.get_test_data('tax/test.LIN-taxonomy.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.v450.csv")
+    tax = utils.get_test_data("tax/test.LIN-taxonomy.csv")
 
     lg_file = runtmp.output("test.lg.csv")
-    with open(lg_file, 'w') as out:
-        out.write('lin,name\n')
+    with open(lg_file, "w") as out:
+        out.write("lin,name\n")
 
     with pytest.raises(SourmashCommandFailed):
-        c.run_sourmash('tax', 'metagenome', '-g', g_csv, '--taxonomy-csv', tax,
-                   '--lins', '--lingroup', lg_file)
+        c.run_sourmash(
+            "tax",
+            "metagenome",
+            "-g",
+            g_csv,
+            "--taxonomy-csv",
+            tax,
+            "--lins",
+            "--lingroup",
+            lg_file,
+        )
 
     print(c.last_result.status)
     print(c.last_result.out)
     print(c.last_result.err)
 
     assert c.last_result.status != 0
-    assert "Starting summarization up rank(s): 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0" in c.last_result.err
+    assert (
+        "Starting summarization up rank(s): 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0"
+        in c.last_result.err
+    )
     assert f"No lingroups loaded from {lg_file}" in c.last_result.err
diff --git a/tests/test_tax_utils.py b/tests/test_tax_utils.py
index 00344ec0d0..a362984532 100644
--- a/tests/test_tax_utils.py
+++ b/tests/test_tax_utils.py
@@ -11,20 +11,40 @@
 
 import sourmash_tst_utils as utils
 
-from sourmash.tax.tax_utils import (ascending_taxlist, get_ident, load_gather_results,
-                                    collect_gather_csvs, check_and_load_gather_csvs,
-                                    LineagePair, QueryInfo, GatherRow, TaxResult, QueryTaxResult,
-                                    SummarizedGatherResult, ClassificationResult, AnnotateTaxResult,
-                                    BaseLineageInfo, RankLineageInfo, LINLineageInfo,
-                                    aggregate_by_lineage_at_rank, format_for_krona,
-                                    write_krona, write_lineage_sample_frac, read_lingroups,
-                                    LineageTree, LineageDB, LineageDB_Sqlite, MultiLineageDB)
+from sourmash.tax.tax_utils import (
+    ascending_taxlist,
+    get_ident,
+    load_gather_results,
+    collect_gather_csvs,
+    check_and_load_gather_csvs,
+    LineagePair,
+    QueryInfo,
+    GatherRow,
+    TaxResult,
+    QueryTaxResult,
+    SummarizedGatherResult,
+    ClassificationResult,
+    AnnotateTaxResult,
+    BaseLineageInfo,
+    RankLineageInfo,
+    LINLineageInfo,
+    aggregate_by_lineage_at_rank,
+    format_for_krona,
+    write_krona,
+    write_lineage_sample_frac,
+    read_lingroups,
+    LineageTree,
+    LineageDB,
+    LineageDB_Sqlite,
+    MultiLineageDB,
+)
+
 
 # utility functions for testing
 def make_mini_taxonomy(tax_info, LIN=False):
-    #pass in list of tuples: (name, lineage)
+    # pass in list of tuples: (name, lineage)
     taxD = {}
-    for (name, lin) in tax_info:
+    for name, lin in tax_info:
         if LIN:
             lineage = LINLineageInfo(lineage_str=lin)
         else:
@@ -32,15 +52,16 @@ def make_mini_taxonomy(tax_info, LIN=False):
         taxD[name] = lineage.filled_lineage
     return taxD
 
+
 def make_mini_taxonomy_with_taxids(tax_info, LIN=False):
     taxD = {}
-    for (name, lin, taxids) in tax_info:
+    for name, lin, taxids in tax_info:
         if LIN:
             lineage = LINLineageInfo(lineage_str=lin)
         else:
             ranks = RankLineageInfo.ranks
-            txs = taxids.split(';')
-            lns = lin.split(';')
+            txs = taxids.split(";")
+            lns = lin.split(";")
             lineage_tups = []
             for n, taxname in enumerate(lns):
                 rk = ranks[n]
@@ -51,20 +72,23 @@ def make_mini_taxonomy_with_taxids(tax_info, LIN=False):
         taxD[name] = lineage.filled_lineage
     return taxD
 
+
 def make_GatherRow(gather_dict=None, exclude_cols=[]):
     """Load artificial gather row (dict) into GatherRow class"""
     # default contains just the essential cols
-    gatherD = {'query_name': 'q1',
-               'query_md5': 'md5',
-               'query_filename': 'query_fn',
-               'name': 'gA',
-               'f_unique_weighted': 0.2,
-               'f_unique_to_query': 0.1,
-               'query_bp':100,
-               'unique_intersect_bp': 20,
-               'remaining_bp': 1,
-               'ksize': 31,
-               'scaled': 1}
+    gatherD = {
+        "query_name": "q1",
+        "query_md5": "md5",
+        "query_filename": "query_fn",
+        "name": "gA",
+        "f_unique_weighted": 0.2,
+        "f_unique_to_query": 0.1,
+        "query_bp": 100,
+        "unique_intersect_bp": 20,
+        "remaining_bp": 1,
+        "ksize": 31,
+        "scaled": 1,
+    }
     if gather_dict is not None:
         gatherD.update(gather_dict)
     for col in exclude_cols:
@@ -73,39 +97,73 @@ def make_GatherRow(gather_dict=None, exclude_cols=[]):
     return gatherRaw
 
 
-def make_TaxResult(gather_dict=None, taxD=None, keep_full_ident=False, keep_ident_version=False, skip_idents=None, LIN=False):
+def make_TaxResult(
+    gather_dict=None,
+    taxD=None,
+    keep_full_ident=False,
+    keep_ident_version=False,
+    skip_idents=None,
+    LIN=False,
+):
     """Make TaxResult from artificial gather row (dict)"""
     gRow = make_GatherRow(gather_dict)
-    taxres = TaxResult(raw=gRow, keep_full_identifiers=keep_full_ident,
-                       keep_identifier_versions=keep_ident_version, lins=LIN)
+    taxres = TaxResult(
+        raw=gRow,
+        keep_full_identifiers=keep_full_ident,
+        keep_identifier_versions=keep_ident_version,
+        lins=LIN,
+    )
     if taxD is not None:
         taxres.get_match_lineage(tax_assignments=taxD, skip_idents=skip_idents)
     return taxres
 
 
-def make_QueryTaxResults(gather_info, taxD=None, single_query=False, keep_full_ident=False, keep_ident_version=False,
-                        skip_idents=None, summarize=False, classify=False, classify_rank=None, c_thresh=0.1, ani_thresh=None,
-                        LIN=False):
+def make_QueryTaxResults(
+    gather_info,
+    taxD=None,
+    single_query=False,
+    keep_full_ident=False,
+    keep_ident_version=False,
+    skip_idents=None,
+    summarize=False,
+    classify=False,
+    classify_rank=None,
+    c_thresh=0.1,
+    ani_thresh=None,
+    LIN=False,
+):
     """Make QueryTaxResult(s) from artificial gather information, formatted as list of gather rows (dicts)"""
     gather_results = {}
     this_querytaxres = None
     for gather_infoD in gather_info:
-        taxres = make_TaxResult(gather_infoD, taxD=taxD,  keep_full_ident=keep_full_ident,
-                                keep_ident_version=keep_ident_version, skip_idents=skip_idents, LIN=LIN)
+        taxres = make_TaxResult(
+            gather_infoD,
+            taxD=taxD,
+            keep_full_ident=keep_full_ident,
+            keep_ident_version=keep_ident_version,
+            skip_idents=skip_idents,
+            LIN=LIN,
+        )
         query_name = taxres.query_name
         # add to matching QueryTaxResult or create new one
         if not this_querytaxres or not this_querytaxres.is_compatible(taxres):
             # get existing or initialize new
-            this_querytaxres = gather_results.get(query_name, QueryTaxResult(taxres.query_info, lins=LIN))
+            this_querytaxres = gather_results.get(
+                query_name, QueryTaxResult(taxres.query_info, lins=LIN)
+            )
         this_querytaxres.add_taxresult(taxres)
-#        print('missed_ident?', taxres.missed_ident)
+        #        print('missed_ident?', taxres.missed_ident)
         gather_results[query_name] = this_querytaxres
     if summarize:
         for query_name, qres in gather_results.items():
             qres.build_summarized_result()
     if classify:
         for query_name, qres in gather_results.items():
-            qres.build_classification_result(rank=classify_rank, containment_threshold=c_thresh, ani_threshold=ani_thresh)
+            qres.build_classification_result(
+                rank=classify_rank,
+                containment_threshold=c_thresh,
+                ani_threshold=ani_thresh,
+            )
     # for convenience: If working with single query, just return that QueryTaxResult.
     if single_query:
         if len(gather_results.keys()) > 1:
@@ -117,17 +175,43 @@ def make_QueryTaxResults(gather_info, taxD=None, single_query=False, keep_full_i
 
 ## tests
 def test_ascending_taxlist_1():
-    assert list(ascending_taxlist()) ==  ['strain', 'species', 'genus', 'family', 'order', 'class', 'phylum', 'superkingdom']
+    assert list(ascending_taxlist()) == [
+        "strain",
+        "species",
+        "genus",
+        "family",
+        "order",
+        "class",
+        "phylum",
+        "superkingdom",
+    ]
 
 
 def test_ascending_taxlist_2():
-    assert list(ascending_taxlist(include_strain=False)) ==  ['species', 'genus', 'family', 'order', 'class', 'phylum', 'superkingdom']
+    assert list(ascending_taxlist(include_strain=False)) == [
+        "species",
+        "genus",
+        "family",
+        "order",
+        "class",
+        "phylum",
+        "superkingdom",
+    ]
 
 
 def test_QueryInfo_basic():
     "basic functionality of QueryInfo dataclass"
-    qInf = QueryInfo(query_name='q1', query_md5='md5', query_filename='f1',query_bp='100',query_n_hashes='10',ksize='31',scaled='10', total_weighted_hashes='200')
-    assert qInf.query_name == 'q1'
+    qInf = QueryInfo(
+        query_name="q1",
+        query_md5="md5",
+        query_filename="f1",
+        query_bp="100",
+        query_n_hashes="10",
+        ksize="31",
+        scaled="10",
+        total_weighted_hashes="200",
+    )
+    assert qInf.query_name == "q1"
     assert isinstance(qInf.query_n_hashes, int)
     assert isinstance(qInf.ksize, int)
     assert isinstance(qInf.scaled, int)
@@ -137,8 +221,15 @@ def test_QueryInfo_basic():
 
 def test_QueryInfo_no_hash_info():
     "QueryInfo dataclass for older gather results without query_n_hashes or total_weighted_hashes"
-    qInf = QueryInfo(query_name='q1', query_md5='md5', query_filename='f1',query_bp='100',ksize=31,scaled=10)
-    assert qInf.query_name == 'q1'
+    qInf = QueryInfo(
+        query_name="q1",
+        query_md5="md5",
+        query_filename="f1",
+        query_bp="100",
+        ksize=31,
+        scaled=10,
+    )
+    assert qInf.query_name == "q1"
     assert qInf.query_n_hashes == 0
     assert qInf.total_weighted_hashes == 0
     assert qInf.total_weighted_bp == 0
@@ -147,89 +238,213 @@ def test_QueryInfo_no_hash_info():
 def test_QueryInfo_missing():
     "check that required args"
     with pytest.raises(TypeError) as exc:
-        QueryInfo(query_name='q1', query_filename='f1',query_bp='100',query_n_hashes='10',ksize=31,scaled=10, total_weighted_hashes=200)
+        QueryInfo(
+            query_name="q1",
+            query_filename="f1",
+            query_bp="100",
+            query_n_hashes="10",
+            ksize=31,
+            scaled=10,
+            total_weighted_hashes=200,
+        )
     print(str(exc))
     assert "missing 1 required positional argument: 'query_md5'" in str(exc)
 
 
 def test_SummarizedGatherResult():
     "basic functionality of SummarizedGatherResult dataclass"
-    qInf = QueryInfo(query_name='q1', query_md5='md5', query_filename='f1',query_bp='100',
-                     query_n_hashes='10',ksize='31',scaled='10', total_weighted_hashes='200')
-    sgr = SummarizedGatherResult(rank="phylum", fraction=0.2, lineage=RankLineageInfo(lineage_str="a;b"),
-                                 f_weighted_at_rank=0.3, bp_match_at_rank=30)
+    qInf = QueryInfo(
+        query_name="q1",
+        query_md5="md5",
+        query_filename="f1",
+        query_bp="100",
+        query_n_hashes="10",
+        ksize="31",
+        scaled="10",
+        total_weighted_hashes="200",
+    )
+    sgr = SummarizedGatherResult(
+        rank="phylum",
+        fraction=0.2,
+        lineage=RankLineageInfo(lineage_str="a;b"),
+        f_weighted_at_rank=0.3,
+        bp_match_at_rank=30,
+    )
     print(sgr)
-    assert sgr.rank=='phylum'
+    assert sgr.rank == "phylum"
     sumD = sgr.as_summary_dict(query_info=qInf)
     print(sumD)
-    assert sumD == {'rank': 'phylum', 'fraction': "0.2", 'lineage': 'a;b', 'f_weighted_at_rank': "0.3",
-                    'bp_match_at_rank': "30", 'query_ani_at_rank': None, 'query_name': 'q1',
-                    'query_md5': 'md5', 'query_filename': 'f1', 'total_weighted_hashes': "200"}
+    assert sumD == {
+        "rank": "phylum",
+        "fraction": "0.2",
+        "lineage": "a;b",
+        "f_weighted_at_rank": "0.3",
+        "bp_match_at_rank": "30",
+        "query_ani_at_rank": None,
+        "query_name": "q1",
+        "query_md5": "md5",
+        "query_filename": "f1",
+        "total_weighted_hashes": "200",
+    }
     hD = sgr.as_human_friendly_dict(query_info=qInf)
     print(hD)
-    assert hD == {'rank': 'phylum', 'fraction': '0.200', 'lineage': 'a;b', 'f_weighted_at_rank': '30.0%',
-                  'bp_match_at_rank': "30", 'query_ani_at_rank': '-    ', 'query_name': 'q1',
-                  'query_md5': 'md5', 'query_filename': 'f1', 'total_weighted_hashes': "200"}
+    assert hD == {
+        "rank": "phylum",
+        "fraction": "0.200",
+        "lineage": "a;b",
+        "f_weighted_at_rank": "30.0%",
+        "bp_match_at_rank": "30",
+        "query_ani_at_rank": "-    ",
+        "query_name": "q1",
+        "query_md5": "md5",
+        "query_filename": "f1",
+        "total_weighted_hashes": "200",
+    }
     krD = sgr.as_kreport_dict(query_info=qInf)
     print(krD)
-    assert krD == {'ncbi_taxid': None, 'sci_name': 'b', 'rank_code': 'P', 'num_bp_assigned': "0",
-                   'percent_containment': '30.00', 'num_bp_contained': "600"}
-    lD = sgr.as_lineage_dict(ranks = RankLineageInfo().ranks, query_info=qInf)
+    assert krD == {
+        "ncbi_taxid": None,
+        "sci_name": "b",
+        "rank_code": "P",
+        "num_bp_assigned": "0",
+        "percent_containment": "30.00",
+        "num_bp_contained": "600",
+    }
+    lD = sgr.as_lineage_dict(ranks=RankLineageInfo().ranks, query_info=qInf)
     print(lD)
-    assert lD == {'ident': 'q1', 'superkingdom': 'a', 'phylum': 'b', 'class': '', 'order': '',
-                  'family': '', 'genus': '', 'species': '', 'strain': ''}
+    assert lD == {
+        "ident": "q1",
+        "superkingdom": "a",
+        "phylum": "b",
+        "class": "",
+        "order": "",
+        "family": "",
+        "genus": "",
+        "species": "",
+        "strain": "",
+    }
     cami = sgr.as_cami_bioboxes()
     print(cami)
-    assert cami == [None, 'phylum', None, 'a|b', '30.00']
+    assert cami == [None, "phylum", None, "a|b", "30.00"]
 
 
 def test_SummarizedGatherResult_withtaxids():
     "basic functionality of SummarizedGatherResult dataclass"
-    qInf = QueryInfo(query_name='q1', query_md5='md5', query_filename='f1',query_bp='100',
-                     query_n_hashes='10',ksize='31',scaled='10', total_weighted_hashes='200')
-    lin = [LineagePair(rank='superkingdom', name='a', taxid='1'), LineagePair(rank='phylum', name='b', taxid=2)]
-    sgr = SummarizedGatherResult(rank="phylum", fraction=0.2, lineage=RankLineageInfo(lineage=lin),
-                                 f_weighted_at_rank=0.3, bp_match_at_rank=30)
+    qInf = QueryInfo(
+        query_name="q1",
+        query_md5="md5",
+        query_filename="f1",
+        query_bp="100",
+        query_n_hashes="10",
+        ksize="31",
+        scaled="10",
+        total_weighted_hashes="200",
+    )
+    lin = [
+        LineagePair(rank="superkingdom", name="a", taxid="1"),
+        LineagePair(rank="phylum", name="b", taxid=2),
+    ]
+    sgr = SummarizedGatherResult(
+        rank="phylum",
+        fraction=0.2,
+        lineage=RankLineageInfo(lineage=lin),
+        f_weighted_at_rank=0.3,
+        bp_match_at_rank=30,
+    )
     print(sgr)
-    assert sgr.rank=='phylum'
+    assert sgr.rank == "phylum"
     sumD = sgr.as_summary_dict(query_info=qInf)
     print(sumD)
-    assert sumD == {'rank': 'phylum', 'fraction': "0.2", 'lineage': 'a;b', 'f_weighted_at_rank': "0.3",
-                    'bp_match_at_rank': "30", 'query_ani_at_rank': None, 'query_name': 'q1',
-                    'query_md5': 'md5', 'query_filename': 'f1', 'total_weighted_hashes': "200"}
+    assert sumD == {
+        "rank": "phylum",
+        "fraction": "0.2",
+        "lineage": "a;b",
+        "f_weighted_at_rank": "0.3",
+        "bp_match_at_rank": "30",
+        "query_ani_at_rank": None,
+        "query_name": "q1",
+        "query_md5": "md5",
+        "query_filename": "f1",
+        "total_weighted_hashes": "200",
+    }
     hD = sgr.as_human_friendly_dict(query_info=qInf)
     print(hD)
-    assert hD == {'rank': 'phylum', 'fraction': '0.200', 'lineage': 'a;b', 'f_weighted_at_rank': '30.0%',
-                  'bp_match_at_rank': "30", 'query_ani_at_rank': '-    ', 'query_name': 'q1',
-                  'query_md5': 'md5', 'query_filename': 'f1', 'total_weighted_hashes': "200"}
+    assert hD == {
+        "rank": "phylum",
+        "fraction": "0.200",
+        "lineage": "a;b",
+        "f_weighted_at_rank": "30.0%",
+        "bp_match_at_rank": "30",
+        "query_ani_at_rank": "-    ",
+        "query_name": "q1",
+        "query_md5": "md5",
+        "query_filename": "f1",
+        "total_weighted_hashes": "200",
+    }
     krD = sgr.as_kreport_dict(query_info=qInf)
     print(krD)
-    assert krD == {'ncbi_taxid': '2', 'sci_name': 'b', 'rank_code': 'P', 'num_bp_assigned': "0",
-                   'percent_containment': '30.00', 'num_bp_contained': "600"}
-    lD = sgr.as_lineage_dict(ranks = RankLineageInfo().ranks, query_info=qInf)
+    assert krD == {
+        "ncbi_taxid": "2",
+        "sci_name": "b",
+        "rank_code": "P",
+        "num_bp_assigned": "0",
+        "percent_containment": "30.00",
+        "num_bp_contained": "600",
+    }
+    lD = sgr.as_lineage_dict(ranks=RankLineageInfo().ranks, query_info=qInf)
     print(lD)
-    assert lD == {'ident': 'q1', 'superkingdom': 'a', 'phylum': 'b', 'class': '', 'order': '',
-                  'family': '', 'genus': '', 'species': '', 'strain': ''}
+    assert lD == {
+        "ident": "q1",
+        "superkingdom": "a",
+        "phylum": "b",
+        "class": "",
+        "order": "",
+        "family": "",
+        "genus": "",
+        "species": "",
+        "strain": "",
+    }
     cami = sgr.as_cami_bioboxes()
     print(cami)
-    assert cami == ['2', 'phylum', '1|2', 'a|b', '30.00']
+    assert cami == ["2", "phylum", "1|2", "a|b", "30.00"]
 
 
 def test_SummarizedGatherResult_LINs():
     "SummarizedGatherResult with LINs"
-    qInf = QueryInfo(query_name='q1', query_md5='md5', query_filename='f1',query_bp='100',
-                     query_n_hashes='10',ksize='31',scaled='10', total_weighted_hashes='200')
-    sgr = SummarizedGatherResult(rank="phylum", fraction=0.2, lineage=LINLineageInfo(lineage_str="0;0;1"),
-                                 f_weighted_at_rank=0.3, bp_match_at_rank=30)
+    qInf = QueryInfo(
+        query_name="q1",
+        query_md5="md5",
+        query_filename="f1",
+        query_bp="100",
+        query_n_hashes="10",
+        ksize="31",
+        scaled="10",
+        total_weighted_hashes="200",
+    )
+    sgr = SummarizedGatherResult(
+        rank="phylum",
+        fraction=0.2,
+        lineage=LINLineageInfo(lineage_str="0;0;1"),
+        f_weighted_at_rank=0.3,
+        bp_match_at_rank=30,
+    )
 
     lgD = sgr.as_lingroup_dict(query_info=qInf, lg_name="lg_name")
     print(lgD)
-    assert lgD == {'name': "lg_name", "lin": "0;0;1",
-                   'percent_containment': '30.00', 'num_bp_contained': "600"}
+    assert lgD == {
+        "name": "lg_name",
+        "lin": "0;0;1",
+        "percent_containment": "30.00",
+        "num_bp_contained": "600",
+    }
     lgD = sgr.as_lingroup_dict(query_info=qInf, lg_name="lg_name")
     print(lgD)
-    assert lgD == {'name': "lg_name", "lin": "0;0;1",
-                   'percent_containment': '30.00', 'num_bp_contained': "600"}
+    assert lgD == {
+        "name": "lg_name",
+        "lin": "0;0;1",
+        "percent_containment": "30.00",
+        "num_bp_contained": "600",
+    }
     with pytest.raises(ValueError) as exc:
         sgr.as_kreport_dict(query_info=qInf)
     print(str(exc))
@@ -242,164 +457,344 @@ def test_SummarizedGatherResult_LINs():
 
 def test_SummarizedGatherResult_set_query_ani():
     "Check ANI estimation within SummarizedGatherResult dataclass"
-    qInf = QueryInfo(query_name='q1', query_md5='md5', query_filename='f1',query_bp='100',
-                     query_n_hashes='10',ksize='31',scaled='10', total_weighted_hashes='200')
-    sgr = SummarizedGatherResult(rank="phylum", fraction=0.2, lineage=RankLineageInfo(lineage_str="a;b"),
-                                 f_weighted_at_rank=0.3, bp_match_at_rank=30)
+    qInf = QueryInfo(
+        query_name="q1",
+        query_md5="md5",
+        query_filename="f1",
+        query_bp="100",
+        query_n_hashes="10",
+        ksize="31",
+        scaled="10",
+        total_weighted_hashes="200",
+    )
+    sgr = SummarizedGatherResult(
+        rank="phylum",
+        fraction=0.2,
+        lineage=RankLineageInfo(lineage_str="a;b"),
+        f_weighted_at_rank=0.3,
+        bp_match_at_rank=30,
+    )
     sgr.set_query_ani(query_info=qInf)
     print(sgr.query_ani_at_rank)
-    assert sgr.query_ani_at_rank == approx(0.949,  rel=1e-3)
+    assert sgr.query_ani_at_rank == approx(0.949, rel=1e-3)
     # ANI can be calculated with query_bp OR query_n_hashes. Remove each and check the results are identical
-    qInf = QueryInfo(query_name='q1', query_md5='md5', query_filename='f1',query_bp='100',
-                     query_n_hashes=0,ksize='31',scaled='10', total_weighted_hashes='200')
-    sgr = SummarizedGatherResult(rank="phylum", fraction=0.2, lineage=RankLineageInfo(lineage_str="a;b"),
-                                 f_weighted_at_rank=0.3, bp_match_at_rank=30)
+    qInf = QueryInfo(
+        query_name="q1",
+        query_md5="md5",
+        query_filename="f1",
+        query_bp="100",
+        query_n_hashes=0,
+        ksize="31",
+        scaled="10",
+        total_weighted_hashes="200",
+    )
+    sgr = SummarizedGatherResult(
+        rank="phylum",
+        fraction=0.2,
+        lineage=RankLineageInfo(lineage_str="a;b"),
+        f_weighted_at_rank=0.3,
+        bp_match_at_rank=30,
+    )
     sgr.set_query_ani(query_info=qInf)
     print(sgr.query_ani_at_rank)
-    assert sgr.query_ani_at_rank == approx(0.949,  rel=1e-3)
+    assert sgr.query_ani_at_rank == approx(0.949, rel=1e-3)
     # try without query_bp
-    qInf = QueryInfo(query_name='q1', query_md5='md5', query_filename='f1',query_bp=0,
-                     query_n_hashes='10',ksize='31',scaled='10', total_weighted_hashes='200')
-    sgr = SummarizedGatherResult(rank="phylum", fraction=0.2, lineage=RankLineageInfo(lineage_str="a;b"),
-                                 f_weighted_at_rank=0.3, bp_match_at_rank=30)
+    qInf = QueryInfo(
+        query_name="q1",
+        query_md5="md5",
+        query_filename="f1",
+        query_bp=0,
+        query_n_hashes="10",
+        ksize="31",
+        scaled="10",
+        total_weighted_hashes="200",
+    )
+    sgr = SummarizedGatherResult(
+        rank="phylum",
+        fraction=0.2,
+        lineage=RankLineageInfo(lineage_str="a;b"),
+        f_weighted_at_rank=0.3,
+        bp_match_at_rank=30,
+    )
     sgr.set_query_ani(query_info=qInf)
     print(sgr.query_ani_at_rank)
-    assert sgr.query_ani_at_rank == approx(0.949,  rel=1e-3)
+    assert sgr.query_ani_at_rank == approx(0.949, rel=1e-3)
 
 
 def test_SummarizedGatherResult_greater_than_1():
     "basic functionality of SummarizedGatherResult dataclass"
     # fraction > 1
     with pytest.raises(ValueError) as exc:
-        SummarizedGatherResult(rank="phylum", fraction=0.3, lineage=RankLineageInfo(lineage_str="a;b"),
-                                 f_weighted_at_rank=1.2, bp_match_at_rank=30)
+        SummarizedGatherResult(
+            rank="phylum",
+            fraction=0.3,
+            lineage=RankLineageInfo(lineage_str="a;b"),
+            f_weighted_at_rank=1.2,
+            bp_match_at_rank=30,
+        )
     print(str(exc))
     assert "> 100% of the query!" in str(exc)
     # f_weighted > 1
     with pytest.raises(ValueError) as exc:
-        SummarizedGatherResult(rank="phylum", fraction=1.2, lineage=RankLineageInfo(lineage_str="a;b"),
-                                 f_weighted_at_rank=0.3, bp_match_at_rank=30)
+        SummarizedGatherResult(
+            rank="phylum",
+            fraction=1.2,
+            lineage=RankLineageInfo(lineage_str="a;b"),
+            f_weighted_at_rank=0.3,
+            bp_match_at_rank=30,
+        )
     print(str(exc))
     assert "> 100% of the query!" in str(exc)
 
 
 def test_SummarizedGatherResult_0_fraction():
     with pytest.raises(ValueError) as exc:
-        SummarizedGatherResult(rank="phylum", fraction=-.1, lineage=RankLineageInfo(lineage_str="a;b"),
-                                 f_weighted_at_rank=0.3, bp_match_at_rank=30)
+        SummarizedGatherResult(
+            rank="phylum",
+            fraction=-0.1,
+            lineage=RankLineageInfo(lineage_str="a;b"),
+            f_weighted_at_rank=0.3,
+            bp_match_at_rank=30,
+        )
     err_msg = "Summarized fraction is <=0% of the query! This should not occur."
     assert err_msg in str(exc)
-    #assert cr.status == 'nomatch'
-    
+    # assert cr.status == 'nomatch'
+
     with pytest.raises(ValueError) as exc:
-        SummarizedGatherResult(rank="phylum", fraction=.1, lineage=RankLineageInfo(lineage_str="a;b"),
-                                 f_weighted_at_rank=0, bp_match_at_rank=30)
+        SummarizedGatherResult(
+            rank="phylum",
+            fraction=0.1,
+            lineage=RankLineageInfo(lineage_str="a;b"),
+            f_weighted_at_rank=0,
+            bp_match_at_rank=30,
+        )
     print(str(exc))
     assert err_msg in str(exc)
 
 
 def test_SummarizedGatherResult_species_kreport():
     "basic functionality of SummarizedGatherResult dataclass"
-    qInf = QueryInfo(query_name='q1', query_md5='md5', query_filename='f1',query_bp='100',
-                     query_n_hashes='10',ksize='31',scaled='10', total_weighted_hashes='200')
-    sgr = SummarizedGatherResult(rank="species", fraction=0.2, lineage=RankLineageInfo(lineage_str="a;b;c;d;e;f;g"),
-                                 f_weighted_at_rank=0.3, bp_match_at_rank=30)
+    qInf = QueryInfo(
+        query_name="q1",
+        query_md5="md5",
+        query_filename="f1",
+        query_bp="100",
+        query_n_hashes="10",
+        ksize="31",
+        scaled="10",
+        total_weighted_hashes="200",
+    )
+    sgr = SummarizedGatherResult(
+        rank="species",
+        fraction=0.2,
+        lineage=RankLineageInfo(lineage_str="a;b;c;d;e;f;g"),
+        f_weighted_at_rank=0.3,
+        bp_match_at_rank=30,
+    )
     print(sgr)
-    assert sgr.rank=='species'
+    assert sgr.rank == "species"
     krD = sgr.as_kreport_dict(query_info=qInf)
     print(krD)
-    assert krD == {'ncbi_taxid': None, 'sci_name': 'g', 'rank_code': 'S', 'num_bp_assigned': "600",
-                   'percent_containment': '30.00', 'num_bp_contained': "600"}
+    assert krD == {
+        "ncbi_taxid": None,
+        "sci_name": "g",
+        "rank_code": "S",
+        "num_bp_assigned": "600",
+        "percent_containment": "30.00",
+        "num_bp_contained": "600",
+    }
 
 
 def test_SummarizedGatherResult_summary_dict_limit_float():
     "basic functionality of SummarizedGatherResult dataclass"
-    qInf = QueryInfo(query_name='q1', query_md5='md5', query_filename='f1',query_bp='100',
-                     query_n_hashes='10',ksize='31',scaled='10', total_weighted_hashes='200')
-    sgr = SummarizedGatherResult(rank="phylum", fraction=0.123456, lineage=RankLineageInfo(lineage_str="a;b"),
-                                 f_weighted_at_rank=0.345678, bp_match_at_rank=30)
+    qInf = QueryInfo(
+        query_name="q1",
+        query_md5="md5",
+        query_filename="f1",
+        query_bp="100",
+        query_n_hashes="10",
+        ksize="31",
+        scaled="10",
+        total_weighted_hashes="200",
+    )
+    sgr = SummarizedGatherResult(
+        rank="phylum",
+        fraction=0.123456,
+        lineage=RankLineageInfo(lineage_str="a;b"),
+        f_weighted_at_rank=0.345678,
+        bp_match_at_rank=30,
+    )
     print(sgr)
-    assert sgr.rank=='phylum'
+    assert sgr.rank == "phylum"
     sumD = sgr.as_summary_dict(query_info=qInf)
     print(sumD)
-    assert sumD == {'rank': 'phylum', 'fraction': "0.123456", 'lineage': 'a;b', 'f_weighted_at_rank': "0.345678",
-                    'bp_match_at_rank': "30", 'query_ani_at_rank': None, 'query_name': 'q1',
-                    'query_md5': 'md5', 'query_filename': 'f1', 'total_weighted_hashes': "200"}
-    
+    assert sumD == {
+        "rank": "phylum",
+        "fraction": "0.123456",
+        "lineage": "a;b",
+        "f_weighted_at_rank": "0.345678",
+        "bp_match_at_rank": "30",
+        "query_ani_at_rank": None,
+        "query_name": "q1",
+        "query_md5": "md5",
+        "query_filename": "f1",
+        "total_weighted_hashes": "200",
+    }
+
     sumD = sgr.as_summary_dict(query_info=qInf, limit_float=True)
     print(sumD)
-    assert sumD == {'rank': 'phylum', 'fraction': "0.123", 'lineage': 'a;b', 'f_weighted_at_rank': "0.346",
-                    'bp_match_at_rank': "30", 'query_ani_at_rank': None, 'query_name': 'q1',
-                    'query_md5': 'md5', 'query_filename': 'f1', 'total_weighted_hashes': "200"}
+    assert sumD == {
+        "rank": "phylum",
+        "fraction": "0.123",
+        "lineage": "a;b",
+        "f_weighted_at_rank": "0.346",
+        "bp_match_at_rank": "30",
+        "query_ani_at_rank": None,
+        "query_name": "q1",
+        "query_md5": "md5",
+        "query_filename": "f1",
+        "total_weighted_hashes": "200",
+    }
 
 
 def test_ClassificationResult():
     "basic functionality of ClassificationResult dataclass"
-    qInf = QueryInfo(query_name='q1', query_md5='md5', query_filename='f1',query_bp='100',
-                     query_n_hashes='10',ksize='31',scaled='10', total_weighted_hashes='200')
-    cr = ClassificationResult(rank="phylum", fraction=0.2, lineage=RankLineageInfo(lineage_str="a;b"),
-                              f_weighted_at_rank=0.3, bp_match_at_rank=30, query_ani_at_rank=0.97)
+    qInf = QueryInfo(
+        query_name="q1",
+        query_md5="md5",
+        query_filename="f1",
+        query_bp="100",
+        query_n_hashes="10",
+        ksize="31",
+        scaled="10",
+        total_weighted_hashes="200",
+    )
+    cr = ClassificationResult(
+        rank="phylum",
+        fraction=0.2,
+        lineage=RankLineageInfo(lineage_str="a;b"),
+        f_weighted_at_rank=0.3,
+        bp_match_at_rank=30,
+        query_ani_at_rank=0.97,
+    )
     cr.set_status(query_info=qInf, containment_threshold=0.1)
-    assert cr.status == 'match'
+    assert cr.status == "match"
     print(cr.query_ani_at_rank)
-    assert cr.query_ani_at_rank == approx(0.949,  rel=1e-3)
+    assert cr.query_ani_at_rank == approx(0.949, rel=1e-3)
     cr.set_status(query_info=qInf, containment_threshold=0.35)
-    assert cr.status == 'below_threshold'
-    lD = cr.as_lineage_dict(ranks = RankLineageInfo().ranks, query_info=qInf)
+    assert cr.status == "below_threshold"
+    lD = cr.as_lineage_dict(ranks=RankLineageInfo().ranks, query_info=qInf)
     print(lD)
-    assert lD == {'ident': 'q1', 'superkingdom': 'a', 'phylum': 'b', 'class': '', 'order': '',
-                  'family': '', 'genus': '', 'species': '', 'strain': ''}
+    assert lD == {
+        "ident": "q1",
+        "superkingdom": "a",
+        "phylum": "b",
+        "class": "",
+        "order": "",
+        "family": "",
+        "genus": "",
+        "species": "",
+        "strain": "",
+    }
 
 
 def test_ClassificationResult_greater_than_1():
     "basic functionality of SummarizedGatherResult dataclass"
     # fraction > 1
     with pytest.raises(ValueError) as exc:
-        ClassificationResult(rank="phylum", fraction=0.3, lineage=RankLineageInfo(lineage_str="a;b"),
-                                 f_weighted_at_rank=1.2, bp_match_at_rank=30)
+        ClassificationResult(
+            rank="phylum",
+            fraction=0.3,
+            lineage=RankLineageInfo(lineage_str="a;b"),
+            f_weighted_at_rank=1.2,
+            bp_match_at_rank=30,
+        )
     print(str(exc))
     assert "> 100% of the query!" in str(exc)
     # f_weighted > 1
     with pytest.raises(ValueError) as exc:
-        ClassificationResult(rank="phylum", fraction=1.2, lineage=RankLineageInfo(lineage_str="a;b"),
-                                 f_weighted_at_rank=0.3, bp_match_at_rank=30)
+        ClassificationResult(
+            rank="phylum",
+            fraction=1.2,
+            lineage=RankLineageInfo(lineage_str="a;b"),
+            f_weighted_at_rank=0.3,
+            bp_match_at_rank=30,
+        )
     print(str(exc))
     assert "> 100% of the query!" in str(exc)
 
 
 def test_ClassificationResult_0_fraction():
     with pytest.raises(ValueError) as exc:
-        ClassificationResult(rank="phylum", fraction=-.1, lineage=RankLineageInfo(lineage_str="a;b"),
-                                 f_weighted_at_rank=0.3, bp_match_at_rank=30)
+        ClassificationResult(
+            rank="phylum",
+            fraction=-0.1,
+            lineage=RankLineageInfo(lineage_str="a;b"),
+            f_weighted_at_rank=0.3,
+            bp_match_at_rank=30,
+        )
     err_msg = "Summarized fraction is <=0% of the query! This should not occur."
     assert err_msg in str(exc)
-    #assert cr.status == 'nomatch'
-    
+    # assert cr.status == 'nomatch'
+
     with pytest.raises(ValueError) as exc:
-        ClassificationResult(rank="phylum", fraction=.1, lineage=RankLineageInfo(lineage_str="a;b"),
-                                 f_weighted_at_rank=0, bp_match_at_rank=30)
+        ClassificationResult(
+            rank="phylum",
+            fraction=0.1,
+            lineage=RankLineageInfo(lineage_str="a;b"),
+            f_weighted_at_rank=0,
+            bp_match_at_rank=30,
+        )
     print(str(exc))
     assert err_msg in str(exc)
 
 
 def test_ClassificationResult_build_krona_result():
-    qInf = QueryInfo(query_name='q1', query_md5='md5', query_filename='f1',query_bp='100',
-                     query_n_hashes='10',ksize='31',scaled='10', total_weighted_hashes='200')
-    cr = ClassificationResult(rank="phylum", fraction=0.2, lineage=RankLineageInfo(lineage_str="a;b"),
-                              f_weighted_at_rank=0.3, bp_match_at_rank=30, query_ani_at_rank=0.97)
-    #cr.set_status(query_info=qInf, rank='phylum')
-    kr, ukr = cr.build_krona_result(rank='phylum')
+    QueryInfo(
+        query_name="q1",
+        query_md5="md5",
+        query_filename="f1",
+        query_bp="100",
+        query_n_hashes="10",
+        ksize="31",
+        scaled="10",
+        total_weighted_hashes="200",
+    )
+    cr = ClassificationResult(
+        rank="phylum",
+        fraction=0.2,
+        lineage=RankLineageInfo(lineage_str="a;b"),
+        f_weighted_at_rank=0.3,
+        bp_match_at_rank=30,
+        query_ani_at_rank=0.97,
+    )
+    # cr.set_status(query_info=qInf, rank='phylum')
+    kr, ukr = cr.build_krona_result(rank="phylum")
     print(kr)
-    assert kr == (0.2, 'a', 'b')
+    assert kr == (0.2, "a", "b")
     print(ukr)
-    assert ukr == (0.8, 'unclassified', 'unclassified')  
+    assert ukr == (0.8, "unclassified", "unclassified")
 
 
 def test_ClassificationResult_build_krona_result_no_rank():
-    qInf = QueryInfo(query_name='q1', query_md5='md5', query_filename='f1',query_bp='100',
-                     query_n_hashes='10',ksize='31',scaled='10', total_weighted_hashes='200')
-    cr = ClassificationResult(rank="phylum", fraction=0.2, lineage=RankLineageInfo(lineage_str="a;b"),
-                              f_weighted_at_rank=0.3, bp_match_at_rank=30, query_ani_at_rank=0.97)
+    qInf = QueryInfo(
+        query_name="q1",
+        query_md5="md5",
+        query_filename="f1",
+        query_bp="100",
+        query_n_hashes="10",
+        ksize="31",
+        scaled="10",
+        total_weighted_hashes="200",
+    )
+    cr = ClassificationResult(
+        rank="phylum",
+        fraction=0.2,
+        lineage=RankLineageInfo(lineage_str="a;b"),
+        f_weighted_at_rank=0.3,
+        bp_match_at_rank=30,
+        query_ani_at_rank=0.97,
+    )
     cr.set_status(query_info=qInf, containment_threshold=0.1)
 
 
@@ -407,7 +802,7 @@ def test_GatherRow_old_gather():
     # gather does not contain query_name column
     gA = {"name": "gA.1 name"}
     with pytest.raises(TypeError) as exc:
-        make_GatherRow(gA, exclude_cols=['query_bp'])
+        make_GatherRow(gA, exclude_cols=["query_bp"])
     print(str(exc))
     assert "__init__() missing 1 required positional argument: 'query_bp'" in str(exc)
 
@@ -433,7 +828,12 @@ def test_AnnotateTaxResult_get_ident_default():
 
 
 def test_AnnotateTaxResult_get_ident_idcol():
-    gA = {"name": "n1", "match_name": "n2", "ident": "n3", "accession": "n4"}  # gather result with match name as GCF_001881345.1
+    gA = {
+        "name": "n1",
+        "match_name": "n2",
+        "ident": "n3",
+        "accession": "n4",
+    }  # gather result with match name as GCF_001881345.1
     taxres = AnnotateTaxResult(raw=gA)
     print(taxres.match_ident)
     assert taxres.match_ident == "n1"
@@ -449,7 +849,12 @@ def test_AnnotateTaxResult_get_ident_idcol():
 
 
 def test_AnnotateTaxResult_get_ident_idcol_fail():
-    gA = {"name": "n1", "match_name": "n2", "ident": "n3", "accession": "n4"}  # gather result with match name as GCF_001881345.1
+    gA = {
+        "name": "n1",
+        "match_name": "n2",
+        "ident": "n3",
+        "accession": "n4",
+    }  # gather result with match name as GCF_001881345.1
     with pytest.raises(ValueError) as exc:
         AnnotateTaxResult(raw=gA, id_col="NotACol")
     print(str(exc))
@@ -467,7 +872,7 @@ def test_TaxResult_get_ident_split_but_keep_version():
     taxres = make_TaxResult(gA, keep_ident_version=True)
     print("raw ident: ", taxres.raw.name)
     print("keep_full?: ", taxres.keep_full_identifiers)
-    print("keep_version?: ",taxres.keep_identifier_versions)
+    print("keep_version?: ", taxres.keep_identifier_versions)
     print("final ident: ", taxres.match_ident)
     assert taxres.match_ident == "GCF_001881345.1"
 
@@ -475,9 +880,9 @@ def test_TaxResult_get_ident_split_but_keep_version():
 def test_AnnotateTaxResult_get_ident_split_but_keep_version():
     gA = {"name": "GCF_001881345.1 secondname"}
     taxres = AnnotateTaxResult(gA, keep_identifier_versions=True)
-    print("raw ident: ", taxres.raw['name'])
+    print("raw ident: ", taxres.raw["name"])
     print("keep_full?: ", taxres.keep_full_identifiers)
-    print("keep_version?: ",taxres.keep_identifier_versions)
+    print("keep_version?: ", taxres.keep_identifier_versions)
     print("final ident: ", taxres.match_ident)
     assert taxres.match_ident == "GCF_001881345.1"
 
@@ -493,7 +898,7 @@ def test_TaxResult_get_ident_keep_full():
     taxres = make_TaxResult(gA, keep_full_ident=True)
     print("raw ident: ", taxres.raw.name)
     print("keep_full?: ", taxres.keep_full_identifiers)
-    print("keep_version?: ",taxres.keep_identifier_versions)
+    print("keep_version?: ", taxres.keep_identifier_versions)
     print("final ident: ", taxres.match_ident)
     assert taxres.match_ident == "GCF_001881345.1 secondname"
 
@@ -501,32 +906,32 @@ def test_TaxResult_get_ident_keep_full():
 def test_AnnotateTaxResult_get_ident_keep_full():
     gA = {"name": "GCF_001881345.1 secondname"}
     taxres = AnnotateTaxResult(gA, keep_full_identifiers=True)
-    print("raw ident: ", taxres.raw['name'])
+    print("raw ident: ", taxres.raw["name"])
     print("keep_full?: ", taxres.keep_full_identifiers)
-    print("keep_version?: ",taxres.keep_identifier_versions)
+    print("keep_version?: ", taxres.keep_identifier_versions)
     print("final ident: ", taxres.match_ident)
     assert taxres.match_ident == "GCF_001881345.1 secondname"
 
 
 def test_collect_gather_csvs(runtmp):
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
     from_file = runtmp.output("tmp-from-file.txt")
-    with open(from_file, 'w') as fp:
+    with open(from_file, "w") as fp:
         fp.write(f"{g_csv}\n")
 
     gather_files = collect_gather_csvs([g_csv], from_file=from_file)
     print("gather_files: ", gather_files)
     assert len(gather_files) == 1
-    assert basename(gather_files[0]) == 'test1.gather.csv'
+    assert basename(gather_files[0]) == "test1.gather.csv"
 
 
 def test_check_and_load_gather_csvs_empty(runtmp):
-    g_res = runtmp.output('empty.gather.csv')
-    with open(g_res, 'w') as fp:
+    g_res = runtmp.output("empty.gather.csv")
+    with open(g_res, "w") as fp:
         fp.write("")
     csvs = [g_res]
     # load taxonomy csv
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
     tax_assign = MultiLineageDB.load([taxonomy_csv], keep_full_identifiers=1)
 
     print(tax_assign)
@@ -537,24 +942,27 @@ def test_check_and_load_gather_csvs_empty(runtmp):
 
 
 def test_check_and_load_gather_csvs_with_empty_force(runtmp):
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
     #  make gather results with taxonomy name not in tax_assign
-    g_res2 = runtmp.output('gA.gather.csv')
-    g_results = [x.replace("GCF_001881345.1", "gA") + "\n" for x in Path(g_csv).read_text().splitlines()]
-    with open(g_res2, 'w') as fp:
+    g_res2 = runtmp.output("gA.gather.csv")
+    g_results = [
+        x.replace("GCF_001881345.1", "gA") + "\n"
+        for x in Path(g_csv).read_text().splitlines()
+    ]
+    with open(g_res2, "w") as fp:
         fp.writelines(g_results)
     # make empty gather results
-    g_res3 = runtmp.output('empty.gather.csv')
-    with open(g_res3, 'w') as fp:
+    g_res3 = runtmp.output("empty.gather.csv")
+    with open(g_res3, "w") as fp:
         fp.write("")
 
     csvs = [g_res2, g_res3]
 
     # load taxonomy csv
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    tax_assign = MultiLineageDB.load([taxonomy_csv],
-                                     keep_full_identifiers=False,
-                                     keep_identifier_versions=False)
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    tax_assign = MultiLineageDB.load(
+        [taxonomy_csv], keep_full_identifiers=False, keep_identifier_versions=False
+    )
     print(tax_assign)
     # check gather results and missing ids
     gather_results = check_and_load_gather_csvs(csvs, tax_assign, force=True)
@@ -562,214 +970,269 @@ def test_check_and_load_gather_csvs_with_empty_force(runtmp):
     q_res = gather_results[0]
     assert len(q_res.raw_taxresults) == 4
     assert q_res.n_missed == 1
-    assert 'gA' in q_res.missed_idents
+    assert "gA" in q_res.missed_idents
     assert q_res.n_skipped == 0
 
 
 def test_check_and_load_gather_lineage_csvs_empty(runtmp):
     # try loading an empty annotated gather file
-    g_res = runtmp.output('empty.gather-tax.csv')
-    with open(g_res, 'w') as fp:
+    g_res = runtmp.output("empty.gather-tax.csv")
+    with open(g_res, "w") as fp:
         fp.write("")
 
     with pytest.raises(ValueError) as exc:
-        tax_assign = LineageDB.load_from_gather_with_lineages(g_res)
+        LineageDB.load_from_gather_with_lineages(g_res)
     assert "cannot read taxonomy assignments" in str(exc.value)
 
 
 def test_check_and_load_gather_lineage_csvs_bad_header(runtmp):
     # test on file with wrong headers
-    g_res = runtmp.output('empty.gather-tax.csv')
-    with open(g_res, 'w', newline="") as fp:
+    g_res = runtmp.output("empty.gather-tax.csv")
+    with open(g_res, "w", newline="") as fp:
         fp.write("x,y,z")
 
     with pytest.raises(ValueError) as exc:
-        tax_assign = LineageDB.load_from_gather_with_lineages(g_res)
-    assert "Expected headers 'name' and 'lineage' not found. Is this a with-lineages file?" in str(exc.value)
+        LineageDB.load_from_gather_with_lineages(g_res)
+    assert (
+        "Expected headers 'name' and 'lineage' not found. Is this a with-lineages file?"
+        in str(exc.value)
+    )
 
 
 def test_check_and_load_gather_lineage_csvs_dne(runtmp):
     # test loading with-lineage file that does not exist
-    g_res = runtmp.output('empty.gather-tax.csv')
+    g_res = runtmp.output("empty.gather-tax.csv")
 
     with pytest.raises(ValueError) as exc:
-        tax_assign = LineageDB.load_from_gather_with_lineages(g_res)
+        LineageDB.load_from_gather_with_lineages(g_res)
     assert "does not exist" in str(exc.value)
 
 
 def test_check_and_load_gather_lineage_csvs_isdir(runtmp):
     # test loading a with-lineage file that is actually a directory
-    g_res = runtmp.output('empty.gather-tax.csv')
+    g_res = runtmp.output("empty.gather-tax.csv")
     os.mkdir(g_res)
 
     with pytest.raises(ValueError) as exc:
-        tax_assign = LineageDB.load_from_gather_with_lineages(g_res)
+        LineageDB.load_from_gather_with_lineages(g_res)
     assert "is a directory" in str(exc.value)
 
 
 def test_check_and_load_gather_csvs_fail_on_missing(runtmp):
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
     # make gather results with taxonomy name not in tax_assign
-    g_res2 = runtmp.output('gA.gather.csv')
-    g_results = [x.replace("GCF_001881345.1", "gA") + "\n" for x in Path(g_csv).read_text().splitlines()]
-    with open(g_res2, 'w') as fp:
+    g_res2 = runtmp.output("gA.gather.csv")
+    g_results = [
+        x.replace("GCF_001881345.1", "gA") + "\n"
+        for x in Path(g_csv).read_text().splitlines()
+    ]
+    with open(g_res2, "w") as fp:
         fp.writelines(g_results)
 
     csvs = [g_res2]
 
     # load taxonomy csv
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
     tax_assign = MultiLineageDB.load([taxonomy_csv], keep_full_identifiers=1)
     print(tax_assign)
     # check gather results and missing ids
     with pytest.raises(ValueError) as exc:
-        check_and_load_gather_csvs(csvs, tax_assign, fail_on_missing_taxonomy=True, force=True)
+        check_and_load_gather_csvs(
+            csvs, tax_assign, fail_on_missing_taxonomy=True, force=True
+        )
     assert "Failing, as requested via --fail-on-missing-taxonomy" in str(exc)
 
 
 def test_load_gather_results():
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    tax_assign = MultiLineageDB.load([taxonomy_csv],
-                                     keep_full_identifiers=False,
-                                     keep_identifier_versions=False)
-    gather_csv = utils.get_test_data('tax/test1.gather.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    tax_assign = MultiLineageDB.load(
+        [taxonomy_csv], keep_full_identifiers=False, keep_identifier_versions=False
+    )
+    gather_csv = utils.get_test_data("tax/test1.gather.csv")
     gather_results, header = load_gather_results(gather_csv, tax_assignments=tax_assign)
     assert len(gather_results) == 1
     for query_name, res in gather_results.items():
-        assert query_name == 'test1'
+        assert query_name == "test1"
         assert len(res.raw_taxresults) == 4
 
 
 def test_load_gather_results_gzipped(runtmp):
-    gather_csv = utils.get_test_data('tax/test1.gather.csv')
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    tax_assign = MultiLineageDB.load([taxonomy_csv],
-                                     keep_full_identifiers=False,
-                                     keep_identifier_versions=False)
-    gather_csv = utils.get_test_data('tax/test1.gather.csv')
+    gather_csv = utils.get_test_data("tax/test1.gather.csv")
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    tax_assign = MultiLineageDB.load(
+        [taxonomy_csv], keep_full_identifiers=False, keep_identifier_versions=False
+    )
+    gather_csv = utils.get_test_data("tax/test1.gather.csv")
 
     # rewrite gather_csv as gzipped csv
-    gz_gather = runtmp.output('g.csv.gz')
-    with open(gather_csv, 'rb') as f_in, gzip.open(gz_gather, 'wb') as f_out:
+    gz_gather = runtmp.output("g.csv.gz")
+    with open(gather_csv, "rb") as f_in, gzip.open(gz_gather, "wb") as f_out:
         f_out.writelines(f_in)
-    #gather_results, header, seen_queries = load_gather_results(gz_gather)
+    # gather_results, header, seen_queries = load_gather_results(gz_gather)
     gather_results, header = load_gather_results(gz_gather, tax_assignments=tax_assign)
     assert len(gather_results) == 1
     for query_name, res in gather_results.items():
-        assert query_name == 'test1'
+        assert query_name == "test1"
         assert len(res.raw_taxresults) == 4
 
 
 def test_load_gather_results_bad_header(runtmp):
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    tax_assign = MultiLineageDB.load([taxonomy_csv],
-                                     keep_full_identifiers=False,
-                                     keep_identifier_versions=False)
-    g_csv = utils.get_test_data('tax/test1.gather.csv')
-
-    bad_g_csv = runtmp.output('g.csv')
-
-    #creates bad gather result
-    bad_g = [x.replace("f_unique_to_query", "nope") + "\n" for x in Path(g_csv).read_text().splitlines()]
-    with open(bad_g_csv, 'w') as fp:
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    tax_assign = MultiLineageDB.load(
+        [taxonomy_csv], keep_full_identifiers=False, keep_identifier_versions=False
+    )
+    g_csv = utils.get_test_data("tax/test1.gather.csv")
+
+    bad_g_csv = runtmp.output("g.csv")
+
+    # creates bad gather result
+    bad_g = [
+        x.replace("f_unique_to_query", "nope") + "\n"
+        for x in Path(g_csv).read_text().splitlines()
+    ]
+    with open(bad_g_csv, "w") as fp:
         fp.writelines(bad_g)
     print("bad_gather_results: \n", bad_g)
 
     with pytest.raises(ValueError) as exc:
-        gather_results, header = load_gather_results(bad_g_csv, tax_assignments=tax_assign)
-    assert f"'{bad_g_csv}' is missing columns needed for taxonomic summarization" in str(exc.value)
+        gather_results, header = load_gather_results(
+            bad_g_csv, tax_assignments=tax_assign
+        )
+    assert (
+        f"'{bad_g_csv}' is missing columns needed for taxonomic summarization"
+        in str(exc.value)
+    )
 
 
 def test_load_gather_results_empty(runtmp):
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    tax_assign = MultiLineageDB.load([taxonomy_csv],
-                                     keep_full_identifiers=False,
-                                     keep_identifier_versions=False)
-    empty_csv = runtmp.output('g.csv')
-
-    #creates empty gather result
-    with open(empty_csv, 'w') as fp:
-        fp.write('')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    tax_assign = MultiLineageDB.load(
+        [taxonomy_csv], keep_full_identifiers=False, keep_identifier_versions=False
+    )
+    empty_csv = runtmp.output("g.csv")
+
+    # creates empty gather result
+    with open(empty_csv, "w") as fp:
+        fp.write("")
 
     with pytest.raises(ValueError) as exc:
-        gather_results, header = load_gather_results(empty_csv, tax_assignments=tax_assign)
-    assert f"Cannot read gather results from '{empty_csv}'. Is file empty?" in str(exc.value)
+        gather_results, header = load_gather_results(
+            empty_csv, tax_assignments=tax_assign
+        )
+    assert f"Cannot read gather results from '{empty_csv}'. Is file empty?" in str(
+        exc.value
+    )
 
 
 def test_load_taxonomy_csv():
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
     tax_assign = MultiLineageDB.load([taxonomy_csv])
     print("taxonomy assignments: \n", tax_assign)
-    assert list(tax_assign.keys()) == ['GCF_001881345.1', 'GCF_009494285.1', 'GCF_013368705.1', 'GCF_003471795.1', 'GCF_000017325.1', 'GCF_000021665.1']
-    assert len(tax_assign) == 6 # should have read 6 rows
+    assert list(tax_assign.keys()) == [
+        "GCF_001881345.1",
+        "GCF_009494285.1",
+        "GCF_013368705.1",
+        "GCF_003471795.1",
+        "GCF_000017325.1",
+        "GCF_000021665.1",
+    ]
+    assert len(tax_assign) == 6  # should have read 6 rows
 
 
 def test_load_taxonomy_csv_LIN():
-    taxonomy_csv = utils.get_test_data('tax/test.LIN-taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.LIN-taxonomy.csv")
     tax_assign = MultiLineageDB.load([taxonomy_csv], lins=True)
     print("taxonomy assignments: \n", tax_assign)
-    assert list(tax_assign.keys()) == ['GCF_001881345.1', 'GCF_009494285.1', 'GCF_013368705.1', 'GCF_003471795.1', 'GCF_000017325.1', 'GCF_000021665.1']
-    #assert list(tax_assign.keys()) == ["GCF_000010525.1", "GCF_000007365.1", "GCF_000007725.1", "GCF_000009605.1", "GCF_000021065.1", "GCF_000021085.1"]
-    assert len(tax_assign) == 6 # should have read 6 rows
+    assert list(tax_assign.keys()) == [
+        "GCF_001881345.1",
+        "GCF_009494285.1",
+        "GCF_013368705.1",
+        "GCF_003471795.1",
+        "GCF_000017325.1",
+        "GCF_000021665.1",
+    ]
+    # assert list(tax_assign.keys()) == ["GCF_000010525.1", "GCF_000007365.1", "GCF_000007725.1", "GCF_000009605.1", "GCF_000021065.1", "GCF_000021085.1"]
+    assert len(tax_assign) == 6  # should have read 6 rows
     print(tax_assign.available_ranks)
-    assert tax_assign.available_ranks == {str(x) for x in range(0,20)}
+    assert tax_assign.available_ranks == {str(x) for x in range(0, 20)}
 
 
 def test_load_taxonomy_csv_LIN_fail():
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
     with pytest.raises(ValueError) as exc:
         MultiLineageDB.load([taxonomy_csv], lins=True)
-    assert f"'lin' column not found: cannot read LIN taxonomy assignments from {taxonomy_csv}." in str(exc.value)
+    assert (
+        f"'lin' column not found: cannot read LIN taxonomy assignments from {taxonomy_csv}."
+        in str(exc.value)
+    )
 
 
 def test_load_taxonomy_csv_LIN_mismatch_in_taxfile(runtmp):
-    taxonomy_csv = utils.get_test_data('tax/test.LIN-taxonomy.csv')
-    mimatchLIN_csv = runtmp.output('mmLIN-taxonomy.csv')
-    with open(mimatchLIN_csv, 'w') as mm:
-        tax21=[]
+    taxonomy_csv = utils.get_test_data("tax/test.LIN-taxonomy.csv")
+    mimatchLIN_csv = runtmp.output("mmLIN-taxonomy.csv")
+    with open(mimatchLIN_csv, "w") as mm:
+        tax21 = []
         tax = [x.rstrip() for x in Path(taxonomy_csv).read_text().splitlines()]
         for n, taxline in enumerate(tax):
-            if n == 2: # add ;0 to a LIN
-                taxlist = taxline.split(',')
-                taxlist[1] += ';0' # add 21st position to LIN
+            if n == 2:  # add ;0 to a LIN
+                taxlist = taxline.split(",")
+                taxlist[1] += ";0"  # add 21st position to LIN
                 tax21.append(",".join(taxlist))
             else:
                 tax21.append(taxline)
         mm.write("\n".join(tax21))
     with pytest.raises(ValueError) as exc:
         MultiLineageDB.load([mimatchLIN_csv], lins=True)
-    assert "For taxonomic summarization, all LIN assignments must use the same number of LIN positions." in str(exc.value)
+    assert (
+        "For taxonomic summarization, all LIN assignments must use the same number of LIN positions."
+        in str(exc.value)
+    )
 
 
 def test_load_taxonomy_csv_gzip(runtmp):
     # test loading a gzipped taxonomy csv file
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    tax_gz = runtmp.output('tax.csv.gz')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    tax_gz = runtmp.output("tax.csv.gz")
 
-    with gzip.open(tax_gz, 'wt') as outfp:
-        with open(taxonomy_csv, 'rt') as infp:
+    with gzip.open(tax_gz, "wt") as outfp:
+        with open(taxonomy_csv) as infp:
             data = infp.read()
         outfp.write(data)
 
     tax_assign = MultiLineageDB.load([tax_gz])
     print("taxonomy assignments: \n", tax_assign)
-    assert list(tax_assign.keys()) == ['GCF_001881345.1', 'GCF_009494285.1', 'GCF_013368705.1', 'GCF_003471795.1', 'GCF_000017325.1', 'GCF_000021665.1']
-    assert len(tax_assign) == 6 # should have read 6 rows
+    assert list(tax_assign.keys()) == [
+        "GCF_001881345.1",
+        "GCF_009494285.1",
+        "GCF_013368705.1",
+        "GCF_003471795.1",
+        "GCF_000017325.1",
+        "GCF_000021665.1",
+    ]
+    assert len(tax_assign) == 6  # should have read 6 rows
 
 
 def test_load_taxonomy_csv_split_id():
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    tax_assign = MultiLineageDB.load([taxonomy_csv], keep_full_identifiers=0,
-                                     keep_identifier_versions=False)
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    tax_assign = MultiLineageDB.load(
+        [taxonomy_csv], keep_full_identifiers=0, keep_identifier_versions=False
+    )
     print("taxonomy assignments: \n", tax_assign)
-    assert list(tax_assign.keys()) == ['GCF_001881345', 'GCF_009494285', 'GCF_013368705', 'GCF_003471795', 'GCF_000017325', 'GCF_000021665']
-    assert len(tax_assign) == 6 # should have read 6 rows
+    assert list(tax_assign.keys()) == [
+        "GCF_001881345",
+        "GCF_009494285",
+        "GCF_013368705",
+        "GCF_003471795",
+        "GCF_000017325",
+        "GCF_000021665",
+    ]
+    assert len(tax_assign) == 6  # should have read 6 rows
 
 
 def test_load_taxonomy_csv_with_ncbi_id(runtmp):
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
     upd_csv = runtmp.output("updated_taxonomy.csv")
-    with open(upd_csv, 'w') as new_tax:
+    with open(upd_csv, "w") as new_tax:
         tax = [x.rstrip() for x in Path(taxonomy_csv).read_text().splitlines()]
         ncbi_id = "ncbi_id after_space"
         fake_lin = [ncbi_id] + ["sk", "phy", "cls", "ord", "fam", "gen", "sp"]
@@ -779,14 +1242,22 @@ def test_load_taxonomy_csv_with_ncbi_id(runtmp):
 
     tax_assign = MultiLineageDB.load([upd_csv], keep_full_identifiers=True)
     print("taxonomy assignments: \n", tax_assign)
-    assert list(tax_assign.keys()) == ['GCF_001881345.1', 'GCF_009494285.1', 'GCF_013368705.1', 'GCF_003471795.1', 'GCF_000017325.1', 'GCF_000021665.1', "ncbi_id after_space"]
+    assert list(tax_assign.keys()) == [
+        "GCF_001881345.1",
+        "GCF_009494285.1",
+        "GCF_013368705.1",
+        "GCF_003471795.1",
+        "GCF_000017325.1",
+        "GCF_000021665.1",
+        "ncbi_id after_space",
+    ]
     assert len(tax_assign) == 7  # should have read 7 rows
 
 
 def test_load_taxonomy_csv_split_id_ncbi(runtmp):
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
     upd_csv = runtmp.output("updated_taxonomy.csv")
-    with open(upd_csv, 'w') as new_tax:
+    with open(upd_csv, "w") as new_tax:
         tax = [x.rstrip() for x in Path(taxonomy_csv).read_text().splitlines()]
         ncbi_id = "ncbi_id after_space"
         fake_lin = [ncbi_id] + ["sk", "phy", "cls", "ord", "fam", "gen", "sp"]
@@ -794,24 +1265,34 @@ def test_load_taxonomy_csv_split_id_ncbi(runtmp):
         tax.append(ncbi_tax)
         new_tax.write("\n".join(tax))
 
-    tax_assign = MultiLineageDB.load([upd_csv], keep_full_identifiers=False,
-                                     keep_identifier_versions=False)
+    tax_assign = MultiLineageDB.load(
+        [upd_csv], keep_full_identifiers=False, keep_identifier_versions=False
+    )
     print("taxonomy assignments: \n", tax_assign)
-    assert list(tax_assign.keys()) == ['GCF_001881345', 'GCF_009494285', 'GCF_013368705', 'GCF_003471795', 'GCF_000017325', 'GCF_000021665', "ncbi_id"]
-    assert len(tax_assign) == 7 # should have read 7 rows
+    assert list(tax_assign.keys()) == [
+        "GCF_001881345",
+        "GCF_009494285",
+        "GCF_013368705",
+        "GCF_003471795",
+        "GCF_000017325",
+        "GCF_000021665",
+        "ncbi_id",
+    ]
+    assert len(tax_assign) == 7  # should have read 7 rows
 
     # check for non-sensical args.
     with pytest.raises(ValueError):
-        tax_assign = MultiLineageDB.load([upd_csv], keep_full_identifiers=1,
-                                         keep_identifier_versions=False)
+        tax_assign = MultiLineageDB.load(
+            [upd_csv], keep_full_identifiers=1, keep_identifier_versions=False
+        )
 
 
 def test_load_taxonomy_csv_duplicate(runtmp):
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
     duplicated_csv = runtmp.output("duplicated_taxonomy.csv")
-    with open(duplicated_csv, 'w') as dup:
+    with open(duplicated_csv, "w") as dup:
         tax = [x.rstrip() for x in Path(taxonomy_csv).read_text().splitlines()]
-        tax.append(tax[1] + 'FOO') # add first tax_assign again
+        tax.append(tax[1] + "FOO")  # add first tax_assign again
         print(tax[-1])
         dup.write("\n".join(tax))
 
@@ -823,73 +1304,132 @@ def test_load_taxonomy_csv_duplicate(runtmp):
 
 
 def test_load_taxonomy_csv_duplicate_force(runtmp):
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
     duplicated_csv = runtmp.output("duplicated_taxonomy.csv")
-    with open(duplicated_csv, 'w') as dup:
+    with open(duplicated_csv, "w") as dup:
         tax = [x.rstrip() for x in Path(taxonomy_csv).read_text().splitlines()]
-        tax.append(tax[1]) # add first tax_assign again
+        tax.append(tax[1])  # add first tax_assign again
         dup.write("\n".join(tax))
 
     # now force
     tax_assign = MultiLineageDB.load([duplicated_csv], force=True)
 
     print("taxonomy assignments: \n", tax_assign)
-    assert list(tax_assign.keys()) == ['GCF_001881345.1', 'GCF_009494285.1', 'GCF_013368705.1', 'GCF_003471795.1', 'GCF_000017325.1', 'GCF_000021665.1']
+    assert list(tax_assign.keys()) == [
+        "GCF_001881345.1",
+        "GCF_009494285.1",
+        "GCF_013368705.1",
+        "GCF_003471795.1",
+        "GCF_000017325.1",
+        "GCF_000021665.1",
+    ]
 
 
 def test_format_for_krona_summarization():
     """test format for krona"""
     # make gather results
-     # make mini taxonomy
+    # make mini taxonomy
     gA_tax = ("gA", "a;b")
     gB_tax = ("gB", "a;c")
-    taxD = make_mini_taxonomy([gA_tax,gB_tax])
+    taxD = make_mini_taxonomy([gA_tax, gB_tax])
 
-    gather_results = [{'query_name': 'queryA', 'name': 'gA', 'f_unique_weighted': 0.2,'f_unique_to_query': 0.2,'unique_intersect_bp': 50}, 
-                      {'query_name': 'queryA', "name": 'gB', 'f_unique_weighted': 0.3,'f_unique_to_query': 0.3,'unique_intersect_bp': 30}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, summarize=True, single_query=True)
-    kres, header = format_for_krona([q_res], 'superkingdom')
-    assert header == ['fraction', 'superkingdom']
+    gather_results = [
+        {
+            "query_name": "queryA",
+            "name": "gA",
+            "f_unique_weighted": 0.2,
+            "f_unique_to_query": 0.2,
+            "unique_intersect_bp": 50,
+        },
+        {
+            "query_name": "queryA",
+            "name": "gB",
+            "f_unique_weighted": 0.3,
+            "f_unique_to_query": 0.3,
+            "unique_intersect_bp": 30,
+        },
+    ]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, summarize=True, single_query=True
+    )
+    kres, header = format_for_krona([q_res], "superkingdom")
+    assert header == ["fraction", "superkingdom"]
     print("krona_res: ", kres)
-    assert kres == [(0.5, 'a'), (0.5, 'unclassified')]
-    kres, header = format_for_krona([q_res], 'phylum')
-    assert header == ['fraction', 'superkingdom', 'phylum']
-    assert kres == [(0.3, 'a', 'c'), (0.2, 'a', 'b'), (0.5, 'unclassified', 'unclassified')]
+    assert kres == [(0.5, "a"), (0.5, "unclassified")]
+    kres, header = format_for_krona([q_res], "phylum")
+    assert header == ["fraction", "superkingdom", "phylum"]
+    assert kres == [
+        (0.3, "a", "c"),
+        (0.2, "a", "b"),
+        (0.5, "unclassified", "unclassified"),
+    ]
 
 
 def test_format_for_krona_classification():
     """test format for krona"""
     # make gather results
-     # make mini taxonomy
+    # make mini taxonomy
     gA_tax = ("gA", "a;b")
     gB_tax = ("gB", "a;c")
-    taxD = make_mini_taxonomy([gA_tax,gB_tax])
+    taxD = make_mini_taxonomy([gA_tax, gB_tax])
 
-    gather_results = [{'query_name': 'queryA', 'name': 'gA', 'f_unique_weighted': 0.2,'f_unique_to_query': 0.2,'unique_intersect_bp': 50}, 
-                      {'query_name': 'queryA', "name": 'gB', 'f_unique_weighted': 0.3,'f_unique_to_query': 0.3,'unique_intersect_bp': 30}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, classify=True, single_query=True)
-    kres, header = format_for_krona([q_res], 'superkingdom', classification=True)
-    assert header == ['fraction', 'superkingdom']
+    gather_results = [
+        {
+            "query_name": "queryA",
+            "name": "gA",
+            "f_unique_weighted": 0.2,
+            "f_unique_to_query": 0.2,
+            "unique_intersect_bp": 50,
+        },
+        {
+            "query_name": "queryA",
+            "name": "gB",
+            "f_unique_weighted": 0.3,
+            "f_unique_to_query": 0.3,
+            "unique_intersect_bp": 30,
+        },
+    ]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, classify=True, single_query=True
+    )
+    kres, header = format_for_krona([q_res], "superkingdom", classification=True)
+    assert header == ["fraction", "superkingdom"]
     print("krona_res: ", kres)
-    assert kres == [(0.5, 'a')]#, (0.5, 'unclassified')]
-    kres, header = format_for_krona([q_res], 'phylum', classification=True)
-    assert header == ['fraction', 'superkingdom', 'phylum']
-    assert kres == [(0.3, 'a', 'c')]#, (0.7, 'unclassified', 'unclassified')]
+    assert kres == [(0.5, "a")]  # , (0.5, 'unclassified')]
+    kres, header = format_for_krona([q_res], "phylum", classification=True)
+    assert header == ["fraction", "superkingdom", "phylum"]
+    assert kres == [(0.3, "a", "c")]  # , (0.7, 'unclassified', 'unclassified')]
 
 
 def test_format_for_krona_improper_rank():
     """test format for krona"""
     # make gather results
-     # make mini taxonomy
+    # make mini taxonomy
     gA_tax = ("gA", "a;b")
     gB_tax = ("gB", "a;c")
-    taxD = make_mini_taxonomy([gA_tax,gB_tax])
+    taxD = make_mini_taxonomy([gA_tax, gB_tax])
 
-    gather_results = [{'query_name': 'queryA', 'name': 'gA', 'f_unique_weighted': 0.2,'f_unique_to_query': 0.2,'unique_intersect_bp': 50}, 
-                      {'query_name': 'queryA', "name": 'gB', 'f_unique_weighted': 0.3,'f_unique_to_query': 0.3,'unique_intersect_bp': 30}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, summarize=True, single_query=True)
+    gather_results = [
+        {
+            "query_name": "queryA",
+            "name": "gA",
+            "f_unique_weighted": 0.2,
+            "f_unique_to_query": 0.2,
+            "unique_intersect_bp": 50,
+        },
+        {
+            "query_name": "queryA",
+            "name": "gB",
+            "f_unique_weighted": 0.3,
+            "f_unique_to_query": 0.3,
+            "unique_intersect_bp": 30,
+        },
+    ]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, summarize=True, single_query=True
+    )
     with pytest.raises(ValueError) as exc:
-        format_for_krona([q_res], 'NotARank')
+        format_for_krona([q_res], "NotARank")
     print(str(exc))
     assert "Rank 'NotARank' not present in summarized ranks." in str(exc)
 
@@ -897,33 +1437,57 @@ def test_format_for_krona_improper_rank():
 def test_format_for_krona_summarization_two_queries():
     """test format for krona with multiple queries (normalize by n_queries)"""
     # make gather results
-     # make mini taxonomy
+    # make mini taxonomy
     gA_tax = ("gA", "a;b")
     gB_tax = ("gB", "a;c")
-    taxD = make_mini_taxonomy([gA_tax,gB_tax])
+    taxD = make_mini_taxonomy([gA_tax, gB_tax])
 
-    gather_results = [{'query_name': 'queryA', 'name': 'gA', 'f_unique_weighted': 0.2,'f_unique_to_query': 0.2,'unique_intersect_bp': 50}, 
-                      {'query_name': 'queryA', "name": 'gB', 'f_unique_weighted': 0.3,'f_unique_to_query': 0.3,'unique_intersect_bp': 30},
-                      {'query_name': 'queryB', "name": 'gB', 'f_unique_weighted': 0.5,'f_unique_to_query': 0.5,'unique_intersect_bp': 50}]
+    gather_results = [
+        {
+            "query_name": "queryA",
+            "name": "gA",
+            "f_unique_weighted": 0.2,
+            "f_unique_to_query": 0.2,
+            "unique_intersect_bp": 50,
+        },
+        {
+            "query_name": "queryA",
+            "name": "gB",
+            "f_unique_weighted": 0.3,
+            "f_unique_to_query": 0.3,
+            "unique_intersect_bp": 30,
+        },
+        {
+            "query_name": "queryB",
+            "name": "gB",
+            "f_unique_weighted": 0.5,
+            "f_unique_to_query": 0.5,
+            "unique_intersect_bp": 50,
+        },
+    ]
     gres = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, summarize=True)
-    kres, header = format_for_krona(list(gres.values()), 'superkingdom')
-    assert header == ['fraction', 'superkingdom']
+    kres, header = format_for_krona(list(gres.values()), "superkingdom")
+    assert header == ["fraction", "superkingdom"]
     print("krona_res: ", kres)
-    assert kres == [(0.5, 'a'), (0.5, 'unclassified')]
-    kres, header = format_for_krona(list(gres.values()), 'phylum')
-    assert header == ['fraction', 'superkingdom', 'phylum']
-    assert kres == [(0.4, 'a', 'c'), (0.1, 'a', 'b'), (0.5, 'unclassified', 'unclassified')]
+    assert kres == [(0.5, "a"), (0.5, "unclassified")]
+    kres, header = format_for_krona(list(gres.values()), "phylum")
+    assert header == ["fraction", "superkingdom", "phylum"]
+    assert kres == [
+        (0.4, "a", "c"),
+        (0.1, "a", "b"),
+        (0.5, "unclassified", "unclassified"),
+    ]
 
 
 def test_write_krona(runtmp):
     """test two matches, equal f_unique_to_query"""
-    krona_results =  [(0.5, 'a', 'b', 'c'), (0.5, 'a', 'b', 'd')]
-    header = ['fraction', 'superkingdom', 'phylum', 'class']
-    outk= runtmp.output("outkrona.tsv")
-    with open(outk, 'w') as out_fp:
+    krona_results = [(0.5, "a", "b", "c"), (0.5, "a", "b", "d")]
+    header = ["fraction", "superkingdom", "phylum", "class"]
+    outk = runtmp.output("outkrona.tsv")
+    with open(outk, "w") as out_fp:
         write_krona(header, krona_results, out_fp)
 
-    kr = [x.strip().split('\t') for x in Path(outk).read_text().splitlines()]
+    kr = [x.strip().split("\t") for x in Path(outk).read_text().splitlines()]
     print("krona_results_from_file: \n", kr)
     assert kr[0] == ["fraction", "superkingdom", "phylum", "class"]
     assert kr[1] == ["0.5", "a", "b", "c"]
@@ -931,65 +1495,73 @@ def test_write_krona(runtmp):
 
 
 def test_write_lineage_sample_frac(runtmp):
-    outfrac = runtmp.output('outfrac.csv')
-    sample_names = ['sample1', 'sample2']
-    sk_linD = {'a': {'sample1': '0.500' ,'sample2': '0.700'}}
-    with open(outfrac, 'w') as out_fp:
+    outfrac = runtmp.output("outfrac.csv")
+    sample_names = ["sample1", "sample2"]
+    sk_linD = {"a": {"sample1": "0.500", "sample2": "0.700"}}
+    with open(outfrac, "w") as out_fp:
         write_lineage_sample_frac(sample_names, sk_linD, out_fp)
 
-    frac_lines = [x.strip().split('\t') for x in Path(outfrac).read_text().splitlines()]
+    frac_lines = [x.strip().split("\t") for x in Path(outfrac).read_text().splitlines()]
     print("csv_lines: ", frac_lines)
-    assert frac_lines == [['lineage', 'sample1', 'sample2'], ['a', '0.500', '0.700']]
+    assert frac_lines == [["lineage", "sample1", "sample2"], ["a", "0.500", "0.700"]]
 
-    phy_linD = {'a;b': {'sample1': '0.500'}, 'a;c': {'sample2': '0.700'}}
-    with open(outfrac, 'w') as out_fp:
+    phy_linD = {"a;b": {"sample1": "0.500"}, "a;c": {"sample2": "0.700"}}
+    with open(outfrac, "w") as out_fp:
         write_lineage_sample_frac(sample_names, phy_linD, out_fp)
 
-    frac_lines = [x.strip().split('\t') for x in Path(outfrac).read_text().splitlines()]
+    frac_lines = [x.strip().split("\t") for x in Path(outfrac).read_text().splitlines()]
     print("csv_lines: ", frac_lines)
-    assert frac_lines == [['lineage', 'sample1', 'sample2'], ['a;b', '0.500', '0'],  ['a;c', '0', '0.700']]
+    assert frac_lines == [
+        ["lineage", "sample1", "sample2"],
+        ["a;b", "0.500", "0"],
+        ["a;c", "0", "0.700"],
+    ]
 
 
 def test_write_lineage_sample_frac_format_lineage(runtmp):
-    outfrac = runtmp.output('outfrac.csv')
-    sample_names = ['sample1', 'sample2']
-    sk_lineage='a'
+    outfrac = runtmp.output("outfrac.csv")
+    sample_names = ["sample1", "sample2"]
+    sk_lineage = "a"
     print(sk_lineage)
-    sk_linD = {sk_lineage: {'sample1': '0.500' ,'sample2': '0.700'}}
-    with open(outfrac, 'w') as out_fp:
+    sk_linD = {sk_lineage: {"sample1": "0.500", "sample2": "0.700"}}
+    with open(outfrac, "w") as out_fp:
         write_lineage_sample_frac(sample_names, sk_linD, out_fp)
 
-    frac_lines = [x.strip().split('\t') for x in Path(outfrac).read_text().splitlines()]
+    frac_lines = [x.strip().split("\t") for x in Path(outfrac).read_text().splitlines()]
     print("csv_lines: ", frac_lines)
-    assert frac_lines == [['lineage', 'sample1', 'sample2'], ['a', '0.500', '0.700']]
+    assert frac_lines == [["lineage", "sample1", "sample2"], ["a", "0.500", "0.700"]]
 
-    phy_lineage='a;b'
+    phy_lineage = "a;b"
     print(phy_lineage)
-    phy2_lineage = 'a;c'
+    phy2_lineage = "a;c"
     print(phy2_lineage)
-    phy_linD = {phy_lineage: {'sample1': '0.500'}, phy2_lineage: {'sample2': '0.700'}}
-    with open(outfrac, 'w') as out_fp:
+    phy_linD = {phy_lineage: {"sample1": "0.500"}, phy2_lineage: {"sample2": "0.700"}}
+    with open(outfrac, "w") as out_fp:
         write_lineage_sample_frac(sample_names, phy_linD, out_fp)
 
-    frac_lines = [x.strip().split('\t') for x in Path(outfrac).read_text().splitlines()]
+    frac_lines = [x.strip().split("\t") for x in Path(outfrac).read_text().splitlines()]
     print("csv_lines: ", frac_lines)
-    assert frac_lines == [['lineage', 'sample1', 'sample2'], ['a;b', '0.500', '0'],  ['a;c', '0', '0.700']]
+    assert frac_lines == [
+        ["lineage", "sample1", "sample2"],
+        ["a;b", "0.500", "0"],
+        ["a;c", "0", "0.700"],
+    ]
 
 
 def test_tax_multi_load_files(runtmp):
     # test loading various good and bad files
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    taxonomy_csv2 = utils.get_test_data('tax/test-strain.taxonomy.csv')
-    badcsv = utils.get_test_data('tax/47+63_x_gtdb-rs202.gather.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    taxonomy_csv2 = utils.get_test_data("tax/test-strain.taxonomy.csv")
+    badcsv = utils.get_test_data("tax/47+63_x_gtdb-rs202.gather.csv")
 
     db = MultiLineageDB.load([taxonomy_csv])
     assert len(db) == 6
-    assert 'strain' not in db.available_ranks
+    assert "strain" not in db.available_ranks
 
     db = MultiLineageDB.load([taxonomy_csv2])
     assert len(db) == 6
-    assert 'strain' in db.available_ranks
-    assert db['GCF_001881345.1'][0].rank == 'superkingdom'
+    assert "strain" in db.available_ranks
+    assert db["GCF_001881345.1"][0].rank == "superkingdom"
 
     # load a string rather than a list
     with pytest.raises(TypeError):
@@ -1001,75 +1573,83 @@ def test_tax_multi_load_files(runtmp):
 
     # load a directory
     with pytest.raises(ValueError):
-        MultiLineageDB.load([runtmp.output('')])
+        MultiLineageDB.load([runtmp.output("")])
 
     # file does not exist
     with pytest.raises(ValueError):
-        MultiLineageDB.load([runtmp.output('no-such-file')])
+        MultiLineageDB.load([runtmp.output("no-such-file")])
 
 
 def test_tax_sql_load_new_file(runtmp):
     # test loading a newer-format sql file with sourmash_internals table
-    taxonomy_db = utils.get_test_data('sqlite/test.taxonomy.db')
+    taxonomy_db = utils.get_test_data("sqlite/test.taxonomy.db")
 
     db = MultiLineageDB.load([taxonomy_db])
     print(list(db.keys()))
     assert len(db) == 6
-    assert 'strain' not in db.available_ranks
-    assert db['GCF_001881345'][0].rank == 'superkingdom'
+    assert "strain" not in db.available_ranks
+    assert db["GCF_001881345"][0].rank == "superkingdom"
 
 
 def test_tax_multi_load_files_shadowed(runtmp):
     # test loading various good and bad files
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    taxonomy_csv2 = utils.get_test_data('tax/test-strain.taxonomy.csv')
-    taxonomy_db = utils.get_test_data('tax/test.taxonomy.db')
-
-    db = MultiLineageDB.load([taxonomy_csv, taxonomy_csv2, taxonomy_db],
-                             keep_full_identifiers=False,
-                             keep_identifier_versions=False)
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    taxonomy_csv2 = utils.get_test_data("tax/test-strain.taxonomy.csv")
+    taxonomy_db = utils.get_test_data("tax/test.taxonomy.db")
+
+    db = MultiLineageDB.load(
+        [taxonomy_csv, taxonomy_csv2, taxonomy_db],
+        keep_full_identifiers=False,
+        keep_identifier_versions=False,
+    )
     assert len(db.shadowed_identifiers()) == 6
 
     # we should have everything including strain
     assert set(RankLineageInfo().taxlist) == set(db.available_ranks)
 
-    db = MultiLineageDB.load([taxonomy_csv, taxonomy_db],
-                             keep_full_identifiers=False,
-                             keep_identifier_versions=False)
+    db = MultiLineageDB.load(
+        [taxonomy_csv, taxonomy_db],
+        keep_full_identifiers=False,
+        keep_identifier_versions=False,
+    )
     assert len(db.shadowed_identifiers()) == 6
     assert set(RankLineageInfo().taxlist[:-1]) == set(db.available_ranks)
 
 
 def test_tax_multi_save_files(runtmp, keep_identifiers, keep_versions):
     # test save
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
 
     if keep_identifiers and not keep_versions:
         with pytest.raises(ValueError):
-            db = MultiLineageDB.load([taxonomy_csv],
-                                     keep_full_identifiers=keep_identifiers,
-                                     keep_identifier_versions=keep_versions)
+            db = MultiLineageDB.load(
+                [taxonomy_csv],
+                keep_full_identifiers=keep_identifiers,
+                keep_identifier_versions=keep_versions,
+            )
         return
 
-    db = MultiLineageDB.load([taxonomy_csv],
-                             keep_full_identifiers=keep_identifiers,
-                             keep_identifier_versions=keep_versions)
+    db = MultiLineageDB.load(
+        [taxonomy_csv],
+        keep_full_identifiers=keep_identifiers,
+        keep_identifier_versions=keep_versions,
+    )
 
-    out_db = runtmp.output('out.db')
-    out_csv = runtmp.output('out.csv')
-    out2_csv = runtmp.output('out2.csv')
+    out_db = runtmp.output("out.db")
+    out_csv = runtmp.output("out.csv")
+    out2_csv = runtmp.output("out2.csv")
 
     # can't save to fp with sql
-    with open(out_csv, 'wt') as fp:
+    with open(out_csv, "w") as fp:
         with pytest.raises(ValueError):
-            db.save(fp, 'sql')
+            db.save(fp, "sql")
 
     # these should all work...
-    with open(out_csv, 'wt') as fp:
-        db.save(fp, 'csv')
+    with open(out_csv, "w") as fp:
+        db.save(fp, "csv")
 
-    db.save(out2_csv, 'csv')
-    db.save(out_db, 'sql')
+    db.save(out2_csv, "csv")
+    db.save(out_db, "sql")
 
     # ...and be equal
     db1 = db.load([out_db])
@@ -1078,19 +1658,20 @@ def test_tax_multi_save_files(runtmp, keep_identifiers, keep_versions):
 
     def strip_strain(it):
         for k, v in it:
-            if v[-1].rank == 'strain':
+            if v[-1].rank == "strain":
                 v = v[:-1]
             yield k, v
 
     import pprint
+
     db_items = list(strip_strain(db.items()))
     db1_items = list(strip_strain(db1.items()))
     db2_items = list(strip_strain(db2.items()))
     db3_items = list(strip_strain(db3.items()))
     pprint.pprint(db_items)
-    print('XXX')
+    print("XXX")
     pprint.pprint(list(db1_items))
-    print('XXX')
+    print("XXX")
     pprint.pprint(list(db2_items))
 
     assert set(db_items) == set(db1_items)
@@ -1100,18 +1681,18 @@ def strip_strain(it):
 
 def test_lineage_db_csv_load(runtmp):
     # test LineageDB.load
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
-    taxonomy_csv2 = utils.get_test_data('tax/test-strain.taxonomy.csv')
-    badcsv = utils.get_test_data('tax/47+63_x_gtdb-rs202.gather.csv')
-    badcsv2 = utils.get_test_data('tax/test-missing-ranks.taxonomy.csv')
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    taxonomy_csv2 = utils.get_test_data("tax/test-strain.taxonomy.csv")
+    badcsv = utils.get_test_data("tax/47+63_x_gtdb-rs202.gather.csv")
+    badcsv2 = utils.get_test_data("tax/test-missing-ranks.taxonomy.csv")
 
     db = LineageDB.load(taxonomy_csv)
     assert len(db) == 6
-    assert 'strain' not in db.available_ranks
+    assert "strain" not in db.available_ranks
 
     db = LineageDB.load(taxonomy_csv2)
     assert len(db) == 6
-    assert 'strain' in db.available_ranks
+    assert "strain" in db.available_ranks
 
     # load the wrong kind of csv
     with pytest.raises(ValueError):
@@ -1123,32 +1704,32 @@ def test_lineage_db_csv_load(runtmp):
 
     # load a directory
     with pytest.raises(ValueError):
-        LineageDB.load(runtmp.output(''))
+        LineageDB.load(runtmp.output(""))
 
     # file does not exist
     with pytest.raises(ValueError):
-        LineageDB.load(runtmp.output('no-such-file'))
+        LineageDB.load(runtmp.output("no-such-file"))
 
     # construct a CSV with bad headers
-    with open(runtmp.output('xxx.csv'), 'w', newline="") as fp:
-        fp.write('x,y,z\n')
+    with open(runtmp.output("xxx.csv"), "w", newline="") as fp:
+        fp.write("x,y,z\n")
     with pytest.raises(ValueError):
-        LineageDB.load(runtmp.output('xxx.csv'))
+        LineageDB.load(runtmp.output("xxx.csv"))
 
 
 def test_lineage_db_sql_load(runtmp):
     # test LineageDB_sqlite.load
-    taxonomy_db = utils.get_test_data('tax/test.taxonomy.db')
-    taxonomy_csv = utils.get_test_data('tax/test.taxonomy.csv')
+    taxonomy_db = utils.get_test_data("tax/test.taxonomy.db")
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
 
     db = LineageDB_Sqlite.load(taxonomy_db)
     assert bool(db)
     assert len(db) == 6
     db.available_ranks
-    assert 'strain' not in db.available_ranks
-    assert db['GCF_001881345'][0].rank == 'superkingdom'
+    assert "strain" not in db.available_ranks
+    assert db["GCF_001881345"][0].rank == "superkingdom"
     with pytest.raises(KeyError):
-        db['foo']
+        db["foo"]
 
     # load any kind of CSV
     with pytest.raises(ValueError):
@@ -1156,57 +1737,63 @@ def test_lineage_db_sql_load(runtmp):
 
     # load a directory
     with pytest.raises(ValueError):
-        LineageDB_Sqlite.load(runtmp.output(''))
+        LineageDB_Sqlite.load(runtmp.output(""))
 
     # file does not exist
     with pytest.raises(ValueError):
-        LineageDB_Sqlite.load(runtmp.output('no-such-file'))
+        LineageDB_Sqlite.load(runtmp.output("no-such-file"))
 
 
 def test_LineagePair():
-    lin = LineagePair(rank="rank1", name='name1')
+    lin = LineagePair(rank="rank1", name="name1")
     print(lin)
-    assert lin.rank=="rank1"
-    assert lin.name =="name1"
-    assert lin.taxid==None
+    assert lin.rank == "rank1"
+    assert lin.name == "name1"
+    assert lin.taxid is None
 
 
 def test_LineagePair_1():
-    lin = LineagePair(rank="rank1", name='name1', taxid=1)
-    assert lin.rank=="rank1"
-    assert lin.name =="name1"
-    assert lin.taxid==1
+    lin = LineagePair(rank="rank1", name="name1", taxid=1)
+    assert lin.rank == "rank1"
+    assert lin.name == "name1"
+    assert lin.taxid == 1
     print(lin)
 
 
 def test_BaseLineageInfo_init_empty():
-    ranks=["A", "B", "C"]
+    ranks = ["A", "B", "C"]
     taxinf = BaseLineageInfo(ranks=ranks)
     print(taxinf.lineage)
     print(taxinf.lineage_str)
-    assert taxinf.zip_lineage()== ['', '', ''] # this is a bit odd, but it's what preserves empty ranks...
+    assert taxinf.zip_lineage() == [
+        "",
+        "",
+        "",
+    ]  # this is a bit odd, but it's what preserves empty ranks...
     print(taxinf.filled_lineage)
     assert taxinf.filled_lineage == ()
-    assert taxinf.lowest_lineage_name == None
-    assert taxinf.lowest_lineage_taxid == None
+    assert taxinf.lowest_lineage_name is None
+    assert taxinf.lowest_lineage_taxid is None
     assert taxinf.filled_ranks == ()
-    assert taxinf.name_at_rank("A") == None
-    assert taxinf.lowest_rank == None
+    assert taxinf.name_at_rank("A") is None
+    assert taxinf.lowest_rank is None
     assert taxinf.display_lineage() == ""
     assert taxinf.display_lineage(null_as_unclassified=True) == "unclassified"
 
 
 def test_BaseLineageInfo_init_lineage_str():
     x = "a;b;c"
-    ranks=["A", "B", "C"]
+    ranks = ["A", "B", "C"]
     taxinf = BaseLineageInfo(lineage_str=x, ranks=ranks)
     print(taxinf.lineage)
     print(taxinf.lineage_str)
-    assert taxinf.zip_lineage()== ['a', 'b', 'c']
+    assert taxinf.zip_lineage() == ["a", "b", "c"]
     print(taxinf.filled_lineage)
-    assert taxinf.filled_lineage == (LineagePair(rank='A', name='a', taxid=None),
-                                     LineagePair(rank='B', name='b', taxid=None),
-                                     LineagePair(rank='C', name='c', taxid=None))
+    assert taxinf.filled_lineage == (
+        LineagePair(rank="A", name="a", taxid=None),
+        LineagePair(rank="B", name="b", taxid=None),
+        LineagePair(rank="C", name="c", taxid=None),
+    )
     assert taxinf.lowest_lineage_name == "c"
     assert taxinf.lowest_rank == "C"
     assert taxinf.name_at_rank("A") == "a"
@@ -1214,37 +1801,39 @@ def test_BaseLineageInfo_init_lineage_str():
 
 def test_BaseLineageInfo_init_lineage_str_comma_sep():
     x = "a,b,c"
-    ranks=["A", "B", "C"]
+    ranks = ["A", "B", "C"]
     taxinf = BaseLineageInfo(lineage_str=x, ranks=ranks)
     print(taxinf.lineage)
     print(taxinf.lineage_str)
-    assert taxinf.zip_lineage()== ['a', 'b', 'c']
+    assert taxinf.zip_lineage() == ["a", "b", "c"]
     print(taxinf.filled_lineage)
     assert taxinf.lowest_lineage_name == "c"
 
 
 def test_BaseLineageInfo_init_lineage_tups():
-    ranks=["A", "B", "C"]
-    lin_tups = (LineagePair(rank="A", name='a'), LineagePair(rank="C", name='b'))
+    ranks = ["A", "B", "C"]
+    lin_tups = (LineagePair(rank="A", name="a"), LineagePair(rank="C", name="b"))
     taxinf = BaseLineageInfo(lineage=lin_tups, ranks=ranks)
     print(taxinf.lineage)
     print(taxinf.lineage_str)
-    assert taxinf.zip_lineage()== ['a', '', 'b']
+    assert taxinf.zip_lineage() == ["a", "", "b"]
 
 
 def test_BaseLineageInfo_init_lca_lineage_tups():
-    ranks=["A", "B", "C"]
-    lin_tups = (LineagePair(rank="A", name='a'), LineagePair(rank="C", name='b'))
+    ranks = ["A", "B", "C"]
+    lin_tups = (LineagePair(rank="A", name="a"), LineagePair(rank="C", name="b"))
     taxinf = BaseLineageInfo(lineage=lin_tups, ranks=ranks)
     print(taxinf.lineage)
     print(taxinf.lineage_str)
-    assert taxinf.zip_lineage()== ['a', '', 'b']
+    assert taxinf.zip_lineage() == ["a", "", "b"]
 
 
 def test_BaseLineageInfo_init_no_ranks():
     x = "a;b;c"
-    rankD = {"superkingdom": "a", "phylum": "b", "class": "c"}
-    lin_tups = (LineagePair(rank="rank2", name='name1'), LineagePair(rank="rank1", name='name1'))
+    lin_tups = (
+        LineagePair(rank="rank2", name="name1"),
+        LineagePair(rank="rank1", name="name1"),
+    )
     with pytest.raises(TypeError) as exc:
         BaseLineageInfo(lineage_str=x)
     print(exc)
@@ -1256,9 +1845,8 @@ def test_BaseLineageInfo_init_no_ranks():
 
 
 def test_BaseLineageInfo_init_with_wrong_ranks():
-    ranks=["A", "B", "C"]
-    lin_tups = [LineagePair(rank="rank1", name='name1')]
-    linD = {"rank1": "a"}
+    ranks = ["A", "B", "C"]
+    lin_tups = [LineagePair(rank="rank1", name="name1")]
     with pytest.raises(ValueError) as exc:
         BaseLineageInfo(lineage=lin_tups, ranks=ranks)
     print(str(exc))
@@ -1266,7 +1854,7 @@ def test_BaseLineageInfo_init_with_wrong_ranks():
 
 
 def test_BaseLineageInfo_init_not_lineagepair():
-    ranks=["A", "B", "C"]
+    ranks = ["A", "B", "C"]
     lin_tups = (("rank1", "name1"),)
     with pytest.raises(ValueError) as exc:
         BaseLineageInfo(lineage=lin_tups, ranks=ranks)
@@ -1276,7 +1864,16 @@ def test_BaseLineageInfo_init_not_lineagepair():
 
 def test_RankLineageInfo_taxlist():
     taxinf = RankLineageInfo()
-    taxranks = ('superkingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'strain')
+    taxranks = (
+        "superkingdom",
+        "phylum",
+        "class",
+        "order",
+        "family",
+        "genus",
+        "species",
+        "strain",
+    )
     assert taxinf.taxlist == taxranks
     assert taxinf.ascending_taxlist == taxranks[::-1]
 
@@ -1286,14 +1883,14 @@ def test_RankLineageInfo_init_lineage_str():
     taxinf = RankLineageInfo(lineage_str=x)
     print(taxinf.lineage)
     print(taxinf.lineage_str)
-    assert taxinf.zip_lineage()== ['a', 'b', 'c', '', '', '', '', '']
+    assert taxinf.zip_lineage() == ["a", "b", "c", "", "", "", "", ""]
 
 
 def test_LINLineageInfo_init_empty():
     taxinf = LINLineageInfo()
     assert taxinf.n_lin_positions == 0
-    assert taxinf.zip_lineage()== []
-    assert taxinf.display_lineage()== ""
+    assert taxinf.zip_lineage() == []
+    assert taxinf.display_lineage() == ""
     assert taxinf.filled_ranks == ()
     assert taxinf.n_filled_pos == 0
 
@@ -1304,7 +1901,7 @@ def test_LINLineageInfo_init_n_pos():
     print(taxinf.lineage)
     print(taxinf.lineage_str)
     assert taxinf.n_lin_positions == 5
-    assert taxinf.zip_lineage()== ['', '', '', '', '']
+    assert taxinf.zip_lineage() == ["", "", "", "", ""]
     assert taxinf.filled_ranks == ()
     assert taxinf.n_filled_pos == 0
 
@@ -1316,8 +1913,8 @@ def test_LINLineageInfo_init_n_pos_and_lineage_str():
     print(taxinf.lineage)
     print(taxinf.lineage_str)
     assert taxinf.n_lin_positions == 5
-    assert taxinf.zip_lineage()== ['0', '0', '1', '', '']
-    assert taxinf.filled_ranks == ("0","1","2")
+    assert taxinf.zip_lineage() == ["0", "0", "1", "", ""]
+    assert taxinf.filled_ranks == ("0", "1", "2")
     assert taxinf.n_filled_pos == 3
 
 
@@ -1327,7 +1924,10 @@ def test_LINLineageInfo_init_n_pos_and_lineage_str_fail():
     with pytest.raises(ValueError) as exc:
         LINLineageInfo(lineage_str=x, n_lin_positions=n_pos)
     print(str(exc))
-    assert "Provided 'n_lin_positions' has fewer positions than provided 'lineage_str'." in str(exc)
+    assert (
+        "Provided 'n_lin_positions' has fewer positions than provided 'lineage_str'."
+        in str(exc)
+    )
 
 
 def test_LINLineageInfo_init_lineage_str_only():
@@ -1336,8 +1936,8 @@ def test_LINLineageInfo_init_lineage_str_only():
     print(taxinf.lineage)
     print(taxinf.lineage_str)
     assert taxinf.n_lin_positions == 3
-    assert taxinf.zip_lineage()== ['0', '0', '1']
-    assert taxinf.filled_ranks == ("0","1","2")
+    assert taxinf.zip_lineage() == ["0", "0", "1"]
+    assert taxinf.filled_ranks == ("0", "1", "2")
     assert taxinf.n_filled_pos == 3
 
 
@@ -1350,12 +1950,15 @@ def test_LINLineageInfo_init_not_lineagepair():
 
 
 def test_LINLineageInfo_init_lineagepair():
-    lin_tups = (LineagePair("rank1", "name1"), LineagePair("rank2", None),)
+    lin_tups = (
+        LineagePair("rank1", "name1"),
+        LineagePair("rank2", None),
+    )
     taxinf = LINLineageInfo(lineage=lin_tups)
     print(taxinf.lineage)
     assert taxinf.n_lin_positions == 2
-    assert taxinf.zip_lineage()== ["name1", ""]
-    assert taxinf.zip_lineage(truncate_empty=True)== ["name1"]
+    assert taxinf.zip_lineage() == ["name1", ""]
+    assert taxinf.zip_lineage(truncate_empty=True) == ["name1"]
     assert taxinf.filled_ranks == ("rank1",)
     assert taxinf.ranks == ("rank1", "rank2")
     assert taxinf.n_filled_pos == 1
@@ -1363,7 +1966,7 @@ def test_LINLineageInfo_init_lineagepair():
 
 def test_lca_LINLineageInfo_diff_n_pos():
     x = "0;0;1"
-    y = '0'
+    y = "0"
     lin1 = LINLineageInfo(lineage_str=x)
     lin2 = LINLineageInfo(lineage_str=y)
     assert lin1.is_compatible(lin2)
@@ -1376,30 +1979,30 @@ def test_lca_LINLineageInfo_diff_n_pos():
 
 def test_lca_LINLineageInfo_no_lca():
     x = "0;0;1"
-    y = '12;0;1'
+    y = "12;0;1"
     lin1 = LINLineageInfo(lineage_str=x)
     lin2 = LINLineageInfo(lineage_str=y)
     assert lin1.is_compatible(lin2)
     assert lin2.is_compatible(lin1)
     lca_from_lin1 = lin1.find_lca(lin2)
     lca_from_lin2 = lin2.find_lca(lin1)
-    assert lca_from_lin1 == lca_from_lin2 == None
+    assert lca_from_lin1 == lca_from_lin2 is None
 
 
 def test_lca_RankLineageInfo_no_lca():
     x = "a;b;c"
-    y = 'd;e;f;g'
+    y = "d;e;f;g"
     lin1 = RankLineageInfo(lineage_str=x)
     lin2 = RankLineageInfo(lineage_str=y)
     assert lin1.is_compatible(lin2)
     assert lin2.is_compatible(lin1)
     lca_from_lin1 = lin1.find_lca(lin2)
     lca_from_lin2 = lin2.find_lca(lin1)
-    assert lca_from_lin1 == lca_from_lin2 == None
+    assert lca_from_lin1 == lca_from_lin2 is None
 
 
 def test_incompatibility_LINLineageInfo_RankLineageInfo():
-    x="a;b;c"
+    x = "a;b;c"
     lin1 = RankLineageInfo(lineage_str=x)
     lin2 = LINLineageInfo(lineage_str=x)
     assert not lin1.is_compatible(lin2)
@@ -1408,64 +2011,75 @@ def test_incompatibility_LINLineageInfo_RankLineageInfo():
 
 def test_RankLineageInfo_init_lineage_str_with_ranks_as_list():
     x = "a;b;c"
-    taxranks = ['superkingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species']
+    taxranks = [
+        "superkingdom",
+        "phylum",
+        "class",
+        "order",
+        "family",
+        "genus",
+        "species",
+    ]
     taxinf = RankLineageInfo(lineage_str=x, ranks=taxranks)
     print(taxinf.lineage)
     print(taxinf.lineage_str)
-    assert taxinf.zip_lineage()== ['a', 'b', 'c', '', '', '', '']
+    assert taxinf.zip_lineage() == ["a", "b", "c", "", "", "", ""]
 
 
 def test_RankLineageInfo_init_lineage_tups():
-    x = (LineagePair(rank="superkingdom", name='a'), LineagePair(rank="phylum", name='b'))
+    x = (
+        LineagePair(rank="superkingdom", name="a"),
+        LineagePair(rank="phylum", name="b"),
+    )
     taxinf = RankLineageInfo(lineage=x)
     print(taxinf.lineage)
     print(taxinf.lineage_str)
-    assert taxinf.zip_lineage()== ['a', 'b', '', '', '', '', '', '']
+    assert taxinf.zip_lineage() == ["a", "b", "", "", "", "", "", ""]
 
 
 def test_RankLineageInfo_init_lineage_dict_fail():
-    ranks=["A", "B", "C"]
-    lin_tups = (LineagePair(rank="A", name='a'), LineagePair(rank="C", name='b'))
+    ranks = ["A", "B", "C"]
+    lin_tups = (LineagePair(rank="A", name="a"), LineagePair(rank="C", name="b"))
     with pytest.raises(ValueError) as exc:
-        taxinf = RankLineageInfo(ranks=ranks, lineage_dict=lin_tups)
+        RankLineageInfo(ranks=ranks, lineage_dict=lin_tups)
     print(str(exc))
 
     assert "is not dictionary" in str(exc)
 
 
 def test_RankLineageInfo_init_lineage_dict():
-    x = {'rank1': 'name1', 'rank2': 'name2'}
+    x = {"rank1": "name1", "rank2": "name2"}
     taxinf = RankLineageInfo(lineage_dict=x, ranks=["rank1", "rank2"])
     print("ranks: ", taxinf.ranks)
     print("lineage: ", taxinf.lineage)
     print("zipped lineage: ", taxinf.zip_lineage())
-    assert taxinf.zip_lineage()== ['name1', 'name2']
+    assert taxinf.zip_lineage() == ["name1", "name2"]
 
 
 def test_RankLineageInfo_init_lineage_dict_default_ranks():
-    x = {"superkingdom":'a',"phylum":'b'}
+    x = {"superkingdom": "a", "phylum": "b"}
     taxinf = RankLineageInfo(lineage_dict=x)
     print(taxinf.lineage)
     print(taxinf.lineage_str)
-    assert taxinf.zip_lineage()== ['a', 'b', '', '', '', '', '', '']
+    assert taxinf.zip_lineage() == ["a", "b", "", "", "", "", "", ""]
 
 
 def test_RankLineageInfo_init_lineage_dict_withtaxpath():
-    x = {'rank1': 'name1', 'rank2': 'name2', 'taxpath': "1|2"}
+    x = {"rank1": "name1", "rank2": "name2", "taxpath": "1|2"}
     taxinf = RankLineageInfo(lineage_dict=x, ranks=["rank1", "rank2"])
     print("ranks: ", taxinf.ranks)
     print("lineage: ", taxinf.lineage)
     print("zipped lineage: ", taxinf.zip_lineage())
     print("zipped taxids: ", taxinf.zip_taxid())
-    assert taxinf.zip_lineage()== ['name1', 'name2']
-    assert taxinf.zip_taxid()== ['1', '2']
+    assert taxinf.zip_lineage() == ["name1", "name2"]
+    assert taxinf.zip_taxid() == ["1", "2"]
     assert taxinf.lowest_lineage_taxid == "2"
     assert taxinf.lowest_lineage_name == "name2"
 
 
 def test_RankLineageInfo_init_lineage_str_lineage_dict_test_eq():
     x = "a;b;c"
-    ranks=["A", "B", "C"]
+    ranks = ["A", "B", "C"]
     rankD = {"A": "a", "B": "b", "C": "c"}
     lin1 = RankLineageInfo(lineage_str=x, ranks=ranks)
     lin2 = RankLineageInfo(lineage_dict=rankD, ranks=ranks)
@@ -1473,56 +2087,56 @@ def test_RankLineageInfo_init_lineage_str_lineage_dict_test_eq():
 
 
 def test_RankLineageInfo_init_lineage_dict_missing_rank():
-    x = {'superkingdom': 'name1', 'class': 'name2'}
+    x = {"superkingdom": "name1", "class": "name2"}
     taxinf = RankLineageInfo(lineage_dict=x)
     print("ranks: ", taxinf.ranks)
     print("lineage: ", taxinf.lineage)
     print("zipped lineage: ", taxinf.zip_lineage())
-    assert taxinf.zip_lineage()== ['name1', '', 'name2', '', '', '', '', '']
-    assert taxinf.zip_lineage(truncate_empty=True)== ['name1', '', 'name2']
+    assert taxinf.zip_lineage() == ["name1", "", "name2", "", "", "", "", ""]
+    assert taxinf.zip_lineage(truncate_empty=True) == ["name1", "", "name2"]
 
 
 def test_RankLineageInfo_init_lineage_dict_missing_rank_with_taxpath():
-    x = {'superkingdom': 'name1', 'class': 'name2', 'taxpath': '1||2'}
+    x = {"superkingdom": "name1", "class": "name2", "taxpath": "1||2"}
     taxinf = RankLineageInfo(lineage_dict=x)
     print("ranks: ", taxinf.ranks)
     print("lineage: ", taxinf.lineage)
     print("zipped lineage: ", taxinf.zip_lineage())
-    assert taxinf.zip_lineage()== ['name1', '', 'name2', '', '', '', '', '']
-    assert taxinf.zip_taxid()== ['1', '', '2', '', '', '', '', '']
+    assert taxinf.zip_lineage() == ["name1", "", "name2", "", "", "", "", ""]
+    assert taxinf.zip_taxid() == ["1", "", "2", "", "", "", "", ""]
 
 
 def test_RankLineageInfo_init_lineage_dict_name_taxpath_mismatch():
     # If there's no name, we don't report the taxpath, because lineage is not "filled".
     # Is this desired behavior?
-    x = {'superkingdom': 'name1', 'taxpath': '1||2'}
+    x = {"superkingdom": "name1", "taxpath": "1||2"}
     taxinf = RankLineageInfo(lineage_dict=x)
     print("ranks: ", taxinf.ranks)
     print("lineage: ", taxinf.lineage)
     print("zipped lineage: ", taxinf.zip_lineage())
-    assert taxinf.zip_lineage()== ['name1', '', '', '', '', '', '', '']
-    assert taxinf.zip_taxid()== ['1', '', '', '', '', '', '', '']
+    assert taxinf.zip_lineage() == ["name1", "", "", "", "", "", "", ""]
+    assert taxinf.zip_taxid() == ["1", "", "", "", "", "", "", ""]
 
 
 def test_RankLineageInfo_init_lineage_dict_name_taxpath_missing_taxids():
     # If there's no name, we don't report the taxpath, because lineage is not "filled".
     # Is this desired behavior?
-    x = {'superkingdom': 'name1', 'phylum': "name2", "class": "name3", 'taxpath': '|2'}
+    x = {"superkingdom": "name1", "phylum": "name2", "class": "name3", "taxpath": "|2"}
     taxinf = RankLineageInfo(lineage_dict=x)
     print("ranks: ", taxinf.ranks)
     print("lineage: ", taxinf.lineage)
     print("zipped lineage: ", taxinf.zip_lineage())
     print("zipped taxids: ", taxinf.zip_taxid())
-    assert taxinf.zip_lineage()== ['name1', 'name2', 'name3', '', '', '', '', '']
-    assert taxinf.zip_taxid()== ['', '2', '', '', '', '', '', '']
+    assert taxinf.zip_lineage() == ["name1", "name2", "name3", "", "", "", "", ""]
+    assert taxinf.zip_taxid() == ["", "2", "", "", "", "", "", ""]
 
 
 def test_RankLineageInfo_init_lineage_dict_taxpath_too_long():
-    x = {'superkingdom': 'name1', 'class': 'name2', 'taxpath': '1||2||||||||||'}
+    x = {"superkingdom": "name1", "class": "name2", "taxpath": "1||2||||||||||"}
     with pytest.raises(ValueError) as exc:
         RankLineageInfo(lineage_dict=x)
     print(str(exc))
-    assert f"Number of NCBI taxids (13) exceeds number of ranks (8)" in str(exc)
+    assert "Number of NCBI taxids (13) exceeds number of ranks (8)" in str(exc)
 
 
 def test_RankLineageInfo_init_lineage_str_lineage_dict_test_eq():
@@ -1540,7 +2154,7 @@ def test_RankLineageInfo_init_lineage_str_1_truncate():
     taxinf = RankLineageInfo(lineage_str=x)
     print(taxinf.lineage)
     print(taxinf.lineage_str)
-    assert taxinf.zip_lineage(truncate_empty=True)== ['a', 'b', 'c']
+    assert taxinf.zip_lineage(truncate_empty=True) == ["a", "b", "c"]
 
 
 def test_RankLineageInfo_init_lineage_str_2():
@@ -1548,7 +2162,7 @@ def test_RankLineageInfo_init_lineage_str_2():
     taxinf = RankLineageInfo(lineage_str=x)
     print(taxinf.lineage)
     print(taxinf.lineage_str)
-    assert taxinf.zip_lineage()== ['a', 'b', '', 'c' '', '', '', '', '']
+    assert taxinf.zip_lineage() == ["a", "b", "", "c" "", "", "", "", ""]
 
 
 def test_RankLineageInfo_init_lineage_str_2_truncate():
@@ -1556,72 +2170,92 @@ def test_RankLineageInfo_init_lineage_str_2_truncate():
     taxinf = RankLineageInfo(lineage_str=x)
     print(taxinf.lineage)
     print(taxinf.lineage_str)
-    assert taxinf.zip_lineage(truncate_empty=True)== ['a', 'b', '', 'c']
+    assert taxinf.zip_lineage(truncate_empty=True) == ["a", "b", "", "c"]
 
 
 def test_RankLineageInfo_init_lineage_with_incorrect_rank():
-    x = [ LineagePair('superkingdom', 'a'), LineagePair("NotARank", ''), LineagePair('class', 'c') ]
+    x = [
+        LineagePair("superkingdom", "a"),
+        LineagePair("NotARank", ""),
+        LineagePair("class", "c"),
+    ]
     with pytest.raises(ValueError) as exc:
         RankLineageInfo(lineage=x)
     print(str(exc))
-    assert f"Rank 'NotARank' not present in " in str(exc)
+    assert "Rank 'NotARank' not present in " in str(exc)
 
 
 def test_zip_lineage_1():
-    x = [ LineagePair('superkingdom', 'a'), LineagePair('phylum', 'b') ]
+    x = [LineagePair("superkingdom", "a"), LineagePair("phylum", "b")]
     taxinf = RankLineageInfo(lineage=x)
     print("ranks: ", taxinf.ranks)
     print("zipped lineage: ", taxinf.zip_lineage())
-    assert taxinf.zip_lineage() == ['a', 'b', '', '', '', '', '', '']
+    assert taxinf.zip_lineage() == ["a", "b", "", "", "", "", "", ""]
 
 
 def test_zip_lineage_2():
-    x = [ LineagePair('superkingdom', 'a'), LineagePair('phylum', 'b') ]
+    x = [LineagePair("superkingdom", "a"), LineagePair("phylum", "b")]
     taxinf = RankLineageInfo(lineage=x)
     print("ranks: ", taxinf.ranks)
     print("zipped lineage: ", taxinf.zip_lineage(truncate_empty=True))
-    assert taxinf.zip_lineage(truncate_empty=True) == ['a', 'b']
+    assert taxinf.zip_lineage(truncate_empty=True) == ["a", "b"]
 
 
 def test_zip_lineage_3():
-    x = [ LineagePair('superkingdom', 'a'), LineagePair(None, ''), LineagePair('class', 'c') ]
+    x = [
+        LineagePair("superkingdom", "a"),
+        LineagePair(None, ""),
+        LineagePair("class", "c"),
+    ]
     taxinf = RankLineageInfo(lineage=x)
-    assert taxinf.zip_lineage() == ['a', '', 'c', '', '', '', '', '']
+    assert taxinf.zip_lineage() == ["a", "", "c", "", "", "", "", ""]
 
 
 def test_zip_lineage_3_truncate():
-    x = [ LineagePair('superkingdom', 'a'), LineagePair(None, ''), LineagePair('class', 'c') ]
+    x = [
+        LineagePair("superkingdom", "a"),
+        LineagePair(None, ""),
+        LineagePair("class", "c"),
+    ]
     taxinf = RankLineageInfo(lineage=x)
-    assert taxinf.zip_lineage(truncate_empty=True) == ['a', '', 'c']
+    assert taxinf.zip_lineage(truncate_empty=True) == ["a", "", "c"]
 
 
 def test_zip_lineage_4():
-    x = [ LineagePair('superkingdom', 'a'), LineagePair('class', 'c') ]
+    x = [LineagePair("superkingdom", "a"), LineagePair("class", "c")]
     taxinf = RankLineageInfo(lineage=x)
-    assert taxinf.zip_lineage(truncate_empty=True) == ['a', '', 'c']
+    assert taxinf.zip_lineage(truncate_empty=True) == ["a", "", "c"]
 
 
 def test_display_lineage_1():
-    x = [ LineagePair('superkingdom', 'a'), LineagePair('phylum', 'b') ]
+    x = [LineagePair("superkingdom", "a"), LineagePair("phylum", "b")]
     taxinf = RankLineageInfo(lineage=x)
     assert taxinf.display_lineage() == "a;b"
 
 
 def test_display_lineage_2():
-    x = [ LineagePair('superkingdom', 'a'), LineagePair(None, ''), LineagePair('class', 'c') ]
+    x = [
+        LineagePair("superkingdom", "a"),
+        LineagePair(None, ""),
+        LineagePair("class", "c"),
+    ]
     taxinf = RankLineageInfo(lineage=x)
     assert taxinf.display_lineage() == "a;;c"
 
 
 def test_display_taxid_1():
-    x = [ LineagePair('superkingdom', 'a', 1), LineagePair('phylum', 'b', 2) ]
+    x = [LineagePair("superkingdom", "a", 1), LineagePair("phylum", "b", 2)]
     taxinf = RankLineageInfo(lineage=x)
     print(taxinf)
     assert taxinf.display_taxid() == "1;2"
 
 
 def test_display_taxid_2():
-    x = [ LineagePair('superkingdom', 'name1', 1), LineagePair(None, ''), LineagePair    ('class', 'name2',2) ]
+    x = [
+        LineagePair("superkingdom", "name1", 1),
+        LineagePair(None, ""),
+        LineagePair("class", "name2", 2),
+    ]
     taxinf = RankLineageInfo(lineage=x)
     print(taxinf)
     assert taxinf.display_taxid() == "1;;2"
@@ -1629,54 +2263,53 @@ def test_display_taxid_2():
 
 def test_is_lineage_match_1():
     # basic behavior: match at order and above, but not at family or below.
-    lin1 = RankLineageInfo(lineage_str = 'd__a;p__b;c__c;o__d;f__e')
-    lin2 = RankLineageInfo(lineage_str = 'd__a;p__b;c__c;o__d;f__f')
+    lin1 = RankLineageInfo(lineage_str="d__a;p__b;c__c;o__d;f__e")
+    lin2 = RankLineageInfo(lineage_str="d__a;p__b;c__c;o__d;f__f")
     print(lin1.lineage)
     assert lin1.is_compatible(lin2)
-    assert lin1.is_lineage_match(lin2, 'superkingdom')
-    assert lin2.is_lineage_match(lin1, 'superkingdom')
-    assert lin1.is_lineage_match(lin2, 'phylum')
-    assert lin2.is_lineage_match(lin1, 'phylum')
-    assert lin1.is_lineage_match(lin2, 'class')
-    assert lin2.is_lineage_match(lin1, 'class')
-    assert lin1.is_lineage_match(lin2, 'order')
-    assert lin2.is_lineage_match(lin1, 'order')
-
-    assert not lin1.is_lineage_match(lin2, 'family')
-    assert not lin2.is_lineage_match(lin1, 'family')
-    assert not lin1.is_lineage_match(lin2, 'genus')
-    assert not lin2.is_lineage_match(lin1, 'genus')
-    assert not lin1.is_lineage_match(lin2, 'species')
-    assert not lin2.is_lineage_match(lin1, 'species')
+    assert lin1.is_lineage_match(lin2, "superkingdom")
+    assert lin2.is_lineage_match(lin1, "superkingdom")
+    assert lin1.is_lineage_match(lin2, "phylum")
+    assert lin2.is_lineage_match(lin1, "phylum")
+    assert lin1.is_lineage_match(lin2, "class")
+    assert lin2.is_lineage_match(lin1, "class")
+    assert lin1.is_lineage_match(lin2, "order")
+    assert lin2.is_lineage_match(lin1, "order")
+
+    assert not lin1.is_lineage_match(lin2, "family")
+    assert not lin2.is_lineage_match(lin1, "family")
+    assert not lin1.is_lineage_match(lin2, "genus")
+    assert not lin2.is_lineage_match(lin1, "genus")
+    assert not lin1.is_lineage_match(lin2, "species")
+    assert not lin2.is_lineage_match(lin1, "species")
 
     lca_from_lin1 = lin1.find_lca(lin2)
     print(lca_from_lin1.display_lineage())
     lca_from_lin2 = lin2.find_lca(lin1)
     assert lca_from_lin1 == lca_from_lin2
     assert lca_from_lin1.display_lineage() == "d__a;p__b;c__c;o__d"
-    
 
 
 def test_is_lineage_match_2():
     # match at family, and above, levels; no genus or species to match
-    lin1 = RankLineageInfo(lineage_str = 'd__a;p__b;c__c;o__d;f__f')
-    lin2 = RankLineageInfo(lineage_str = 'd__a;p__b;c__c;o__d;f__f')
+    lin1 = RankLineageInfo(lineage_str="d__a;p__b;c__c;o__d;f__f")
+    lin2 = RankLineageInfo(lineage_str="d__a;p__b;c__c;o__d;f__f")
     assert lin1.is_compatible(lin2)
-    assert lin1.is_lineage_match(lin2, 'superkingdom')
-    assert lin2.is_lineage_match(lin1, 'superkingdom')
-    assert lin1.is_lineage_match(lin2, 'phylum')
-    assert lin2.is_lineage_match(lin1, 'phylum')
-    assert lin1.is_lineage_match(lin2, 'class')
-    assert lin2.is_lineage_match(lin1, 'class')
-    assert lin1.is_lineage_match(lin2, 'order')
-    assert lin2.is_lineage_match(lin1, 'order')
-    assert lin1.is_lineage_match(lin2, 'family')
-    assert lin2.is_lineage_match(lin1, 'family')
-
-    assert not lin1.is_lineage_match(lin2, 'genus')
-    assert not lin2.is_lineage_match(lin1, 'genus')
-    assert not lin1.is_lineage_match(lin2, 'species')
-    assert not lin2.is_lineage_match(lin1, 'species')
+    assert lin1.is_lineage_match(lin2, "superkingdom")
+    assert lin2.is_lineage_match(lin1, "superkingdom")
+    assert lin1.is_lineage_match(lin2, "phylum")
+    assert lin2.is_lineage_match(lin1, "phylum")
+    assert lin1.is_lineage_match(lin2, "class")
+    assert lin2.is_lineage_match(lin1, "class")
+    assert lin1.is_lineage_match(lin2, "order")
+    assert lin2.is_lineage_match(lin1, "order")
+    assert lin1.is_lineage_match(lin2, "family")
+    assert lin2.is_lineage_match(lin1, "family")
+
+    assert not lin1.is_lineage_match(lin2, "genus")
+    assert not lin2.is_lineage_match(lin1, "genus")
+    assert not lin1.is_lineage_match(lin2, "species")
+    assert not lin2.is_lineage_match(lin1, "species")
 
     lca_from_lin1 = lin1.find_lca(lin2)
     print(lca_from_lin1.display_lineage())
@@ -1688,70 +2321,79 @@ def test_is_lineage_match_2():
 def test_is_lineage_match_3():
     # one lineage is empty
     lin1 = RankLineageInfo()
-    lin2 = RankLineageInfo(lineage_str = 'd__a;p__b;c__c;o__d;f__f')
+    lin2 = RankLineageInfo(lineage_str="d__a;p__b;c__c;o__d;f__f")
 
     assert lin1.is_compatible(lin2)
-    assert not lin1.is_lineage_match(lin2, 'superkingdom')
-    assert not lin2.is_lineage_match(lin1, 'superkingdom')
-    assert not lin1.is_lineage_match(lin2, 'phylum')
-    assert not lin2.is_lineage_match(lin1, 'phylum')
-    assert not lin1.is_lineage_match(lin2, 'class')
-    assert not lin2.is_lineage_match(lin1, 'class')
-    assert not lin1.is_lineage_match(lin2, 'order')
-    assert not lin2.is_lineage_match(lin1, 'order')
-    assert not lin1.is_lineage_match(lin2, 'family')
-    assert not lin2.is_lineage_match(lin1, 'family')
-    assert not lin1.is_lineage_match(lin2, 'genus')
-    assert not lin2.is_lineage_match(lin1, 'genus')
-    assert not lin1.is_lineage_match(lin2, 'species')
-    assert not lin2.is_lineage_match(lin1, 'species')
+    assert not lin1.is_lineage_match(lin2, "superkingdom")
+    assert not lin2.is_lineage_match(lin1, "superkingdom")
+    assert not lin1.is_lineage_match(lin2, "phylum")
+    assert not lin2.is_lineage_match(lin1, "phylum")
+    assert not lin1.is_lineage_match(lin2, "class")
+    assert not lin2.is_lineage_match(lin1, "class")
+    assert not lin1.is_lineage_match(lin2, "order")
+    assert not lin2.is_lineage_match(lin1, "order")
+    assert not lin1.is_lineage_match(lin2, "family")
+    assert not lin2.is_lineage_match(lin1, "family")
+    assert not lin1.is_lineage_match(lin2, "genus")
+    assert not lin2.is_lineage_match(lin1, "genus")
+    assert not lin1.is_lineage_match(lin2, "species")
+    assert not lin2.is_lineage_match(lin1, "species")
 
 
 def test_is_lineage_match_incorrect_ranks():
-    #test comparison with incompatible ranks
-    taxranks = ('superkingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'strain')
-    lin1 = RankLineageInfo(lineage_str = 'd__a;p__b;c__c;o__d;f__e', ranks=taxranks[::-1])
-    lin2 = RankLineageInfo(lineage_str = 'd__a;p__b;c__c;o__d;f__f')
+    # test comparison with incompatible ranks
+    taxranks = (
+        "superkingdom",
+        "phylum",
+        "class",
+        "order",
+        "family",
+        "genus",
+        "species",
+        "strain",
+    )
+    lin1 = RankLineageInfo(lineage_str="d__a;p__b;c__c;o__d;f__e", ranks=taxranks[::-1])
+    lin2 = RankLineageInfo(lineage_str="d__a;p__b;c__c;o__d;f__f")
     print(lin1.lineage)
     assert not lin1.is_compatible(lin2)
     with pytest.raises(ValueError) as exc:
-        lin1.is_lineage_match(lin2, 'superkingdom')
+        lin1.is_lineage_match(lin2, "superkingdom")
     print(str(exc))
-    assert 'Cannot compare lineages from taxonomies with different ranks.' in str(exc)
+    assert "Cannot compare lineages from taxonomies with different ranks." in str(exc)
 
 
 def test_is_lineage_match_improper_rank():
-    #test comparison with incompatible ranks
-    lin1 = RankLineageInfo(lineage_str = 'd__a;p__b;c__c;o__d;f__e')
-    lin2 = RankLineageInfo(lineage_str = 'd__a;p__b;c__c;o__d;f__f')
+    # test comparison with incompatible ranks
+    lin1 = RankLineageInfo(lineage_str="d__a;p__b;c__c;o__d;f__e")
+    lin2 = RankLineageInfo(lineage_str="d__a;p__b;c__c;o__d;f__f")
     print(lin1.lineage)
     assert lin1.is_compatible(lin2)
     with pytest.raises(ValueError) as exc:
-        lin1.is_lineage_match(lin2, 'NotARank')
+        lin1.is_lineage_match(lin2, "NotARank")
     print(str(exc))
     assert "Desired Rank 'NotARank' not available for this lineage" in str(exc)
 
 
 def test_pop_to_rank_1():
     # basic behavior - pop to order?
-    lin1 = RankLineageInfo(lineage_str='d__a;p__b;c__c;o__d')
-    lin2 = RankLineageInfo(lineage_str='d__a;p__b;c__c;o__d;f__f')
+    lin1 = RankLineageInfo(lineage_str="d__a;p__b;c__c;o__d")
+    lin2 = RankLineageInfo(lineage_str="d__a;p__b;c__c;o__d;f__f")
 
     print(lin1)
-    popped = lin2.pop_to_rank('order')
+    popped = lin2.pop_to_rank("order")
     print(popped)
     assert popped == lin1
 
 
 def test_pop_to_rank_2():
     # what if we're already above rank?
-    lin2 = RankLineageInfo(lineage_str='d__a;p__b;c__c;o__d;f__f')
-    print(lin2.pop_to_rank('species'))
-    assert lin2.pop_to_rank('species') == lin2
+    lin2 = RankLineageInfo(lineage_str="d__a;p__b;c__c;o__d;f__f")
+    print(lin2.pop_to_rank("species"))
+    assert lin2.pop_to_rank("species") == lin2
 
 
 def test_pop_to_rank_rank_not_avail():
-    lin1 = RankLineageInfo(lineage_str = 'd__a;p__b;c__c;o__d;f__f')
+    lin1 = RankLineageInfo(lineage_str="d__a;p__b;c__c;o__d;f__f")
     with pytest.raises(ValueError) as exc:
         lin1.pop_to_rank("NotARank")
     print(str(exc))
@@ -1759,15 +2401,17 @@ def test_pop_to_rank_rank_not_avail():
 
 
 def test_lineage_at_rank_norank():
-    lin1 = RankLineageInfo(lineage_str = 'd__a;p__b;c__c;o__d;f__f')
+    lin1 = RankLineageInfo(lineage_str="d__a;p__b;c__c;o__d;f__f")
     with pytest.raises(TypeError) as exc:
         lin1.lineage_at_rank()
     print(str(exc))
-    assert "lineage_at_rank() missing 1 required positional argument: 'rank'" in str(exc)
+    assert "lineage_at_rank() missing 1 required positional argument: 'rank'" in str(
+        exc
+    )
 
 
 def test_lineage_at_rank_rank_not_avail():
-    lin1 = RankLineageInfo(lineage_str = 'd__a;p__b;c__c;o__d;f__f')
+    lin1 = RankLineageInfo(lineage_str="d__a;p__b;c__c;o__d;f__f")
     with pytest.raises(ValueError) as exc:
         lin1.lineage_at_rank("NotARank")
     print(str(exc))
@@ -1775,27 +2419,33 @@ def test_lineage_at_rank_rank_not_avail():
 
 
 def test_lineage_at_rank_1():
-    lin1 = RankLineageInfo(lineage_str = 'd__a;p__b;c__c;o__d;f__f')
-    print(lin1.lineage_at_rank('superkingdom'))
-    
-    assert lin1.lineage_at_rank('superkingdom') == (LineagePair(rank='superkingdom', name='d__a', taxid=None),)
-    print(lin1.lineage_at_rank('class'))
-    assert lin1.lineage_at_rank('class') == (LineagePair(rank='superkingdom', name='d__a', taxid=None),
-                                             LineagePair(rank='phylum', name='p__b', taxid=None),
-                                             LineagePair(rank='class', name='c__c', taxid=None))
+    lin1 = RankLineageInfo(lineage_str="d__a;p__b;c__c;o__d;f__f")
+    print(lin1.lineage_at_rank("superkingdom"))
+
+    assert lin1.lineage_at_rank("superkingdom") == (
+        LineagePair(rank="superkingdom", name="d__a", taxid=None),
+    )
+    print(lin1.lineage_at_rank("class"))
+    assert lin1.lineage_at_rank("class") == (
+        LineagePair(rank="superkingdom", name="d__a", taxid=None),
+        LineagePair(rank="phylum", name="p__b", taxid=None),
+        LineagePair(rank="class", name="c__c", taxid=None),
+    )
 
 
 def test_lineage_at_rank_below_rank():
-    lin1 = RankLineageInfo(lineage_str = 'd__a;p__b;c__c;o__d;f__f')
-    print(lin1.lineage_at_rank('superkingdom'))
+    lin1 = RankLineageInfo(lineage_str="d__a;p__b;c__c;o__d;f__f")
+    print(lin1.lineage_at_rank("superkingdom"))
     # if rank is not provided, we only return the filled lineage, to follow original pop_to_rank behavior.
 
-    print(lin1.lineage_at_rank('genus'))
-    assert lin1.lineage_at_rank('genus') == (LineagePair(rank='superkingdom', name='d__a', taxid=None),
-                                             LineagePair(rank='phylum', name='p__b', taxid=None),
-                                             LineagePair(rank='class', name='c__c', taxid=None),
-                                             LineagePair(rank='order', name='o__d', taxid=None),
-                                             LineagePair(rank='family', name='f__f', taxid=None))
+    print(lin1.lineage_at_rank("genus"))
+    assert lin1.lineage_at_rank("genus") == (
+        LineagePair(rank="superkingdom", name="d__a", taxid=None),
+        LineagePair(rank="phylum", name="p__b", taxid=None),
+        LineagePair(rank="class", name="c__c", taxid=None),
+        LineagePair(rank="order", name="o__d", taxid=None),
+        LineagePair(rank="family", name="f__f", taxid=None),
+    )
 
 
 def test_TaxResult_get_match_lineage_1():
@@ -1825,13 +2475,15 @@ def test_TaxResult_get_match_lineage_skip_ident():
 
     gA = {"name": "gA.1 name"}
     taxres = make_TaxResult(gA)
-    taxres.get_match_lineage(tax_assignments=taxD, skip_idents=['gA'])
+    taxres.get_match_lineage(tax_assignments=taxD, skip_idents=["gA"])
     print("skipped_ident?: ", taxres.skipped_ident)
     print("missed_ident?: ", taxres.missed_ident)
     assert taxres.skipped_ident == True
     assert taxres.lineageInfo == RankLineageInfo()
     assert taxres.lineageInfo.display_lineage() == ""
-    assert taxres.lineageInfo.display_lineage(null_as_unclassified=True) == "unclassified"
+    assert (
+        taxres.lineageInfo.display_lineage(null_as_unclassified=True) == "unclassified"
+    )
 
 
 def test_TaxResult_get_match_lineage_missed_ident_fail_on_missing():
@@ -1840,14 +2492,16 @@ def test_TaxResult_get_match_lineage_missed_ident_fail_on_missing():
 
     gA = {"name": "gA.1 name"}
     taxres = make_TaxResult(gA)
-    taxres.get_match_lineage(tax_assignments=taxD, skip_idents=['gB'])
+    taxres.get_match_lineage(tax_assignments=taxD, skip_idents=["gB"])
     print("skipped_ident?: ", taxres.skipped_ident)
     print("missed_ident?: ", taxres.missed_ident)
     assert taxres.skipped_ident == False
     assert taxres.missed_ident == True
     assert taxres.lineageInfo == RankLineageInfo()
     assert taxres.lineageInfo.display_lineage() == ""
-    assert taxres.lineageInfo.display_lineage(null_as_unclassified=True) == "unclassified"
+    assert (
+        taxres.lineageInfo.display_lineage(null_as_unclassified=True) == "unclassified"
+    )
 
 
 def test_TaxResult_get_match_lineage_missed_ident_fail_on_missing():
@@ -1857,7 +2511,9 @@ def test_TaxResult_get_match_lineage_missed_ident_fail_on_missing():
     gA = {"name": "gA.1 name"}
     taxres = make_TaxResult(gA)
     with pytest.raises(ValueError) as exc:
-        taxres.get_match_lineage(tax_assignments=taxD, skip_idents=['gB'], fail_on_missing_taxonomy=True)
+        taxres.get_match_lineage(
+            tax_assignments=taxD, skip_idents=["gB"], fail_on_missing_taxonomy=True
+        )
     print(str(exc))
     assert "Error: ident 'gA' is not in the taxonomy database." in str(exc)
 
@@ -1881,7 +2537,16 @@ def test_QueryTaxResult():
     assert q_res.skipped_idents == set()
     assert q_res.missed_idents == set()
     assert q_res.summarized_lineage_results == {}
-    taxranks = ('superkingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'strain')
+    taxranks = (
+        "superkingdom",
+        "phylum",
+        "class",
+        "order",
+        "family",
+        "genus",
+        "species",
+        "strain",
+    )
     assert q_res.ranks == taxranks
     assert q_res.ascending_ranks == taxranks[::-1]
 
@@ -1891,7 +2556,7 @@ def test_QueryTaxResult_add_incompatible():
     tax_info = [("gA", "a;b;c")]
     taxD = make_mini_taxonomy(tax_info=tax_info)
     taxres = make_TaxResult(taxD=taxD)
-    taxres2 = make_TaxResult({'query_name': 'q2'}, taxD=taxD)
+    taxres2 = make_TaxResult({"query_name": "q2"}, taxD=taxD)
     # initialize
     q_res = QueryTaxResult(taxres.query_info)
     # check that new querytaxres is compatible with taxres and not taxres2
@@ -1906,22 +2571,25 @@ def test_QueryTaxResult_add_incompatible():
 
 def test_QueryTaxResult_add_without_tax_info():
     "initialize and add a taxresult with missed ident"
-    taxres = make_TaxResult() # do not add taxonomic info
+    taxres = make_TaxResult()  # do not add taxonomic info
     # initialize
     q_res = QueryTaxResult(taxres.query_info)
     print("attempted to add lineage info?: ", taxres.match_lineage_attempted)
     with pytest.raises(ValueError) as exc:
         q_res.add_taxresult(taxres)
     print(str(exc))
-    assert "Error: Cannot add TaxResult. Please use get_match_lineage() to add taxonomic lineage information first." in str(exc)
-    
-    
+    assert (
+        "Error: Cannot add TaxResult. Please use get_match_lineage() to add taxonomic lineage information first."
+        in str(exc)
+    )
+
+
 def test_QueryTaxResult_add_skipped_ident():
     "initialize and add a taxresult with skipped ident"
     gA_tax = ("gA", "a;b;c")
     taxD = make_mini_taxonomy([gA_tax])
-    taxres = make_TaxResult(taxD=taxD, skip_idents = ['gA'])
-#    taxres.get_match_lineage(tax_assignments=taxD, skip_idents=['gA'])
+    taxres = make_TaxResult(taxD=taxD, skip_idents=["gA"])
+    #    taxres.get_match_lineage(tax_assignments=taxD, skip_idents=['gA'])
     # initialize
     q_res = QueryTaxResult(taxres.query_info)
     q_res.add_taxresult(taxres)
@@ -1953,16 +2621,16 @@ def test_QueryTaxResult_track_missed_and_skipped():
     taxD = make_mini_taxonomy(tax_info=tax_info)
     # make results
     taxres = make_TaxResult()
-    taxres2 = make_TaxResult({"name": 'gB'}) # skipped
-    taxres3 = make_TaxResult({"name": 'gB'}) # skipped
-    taxres4 = make_TaxResult({"name": 'gC'}) # skipped
-    taxres5 = make_TaxResult({"name": 'gD'}) # missed
-    taxres6 = make_TaxResult({"name": 'gE'}) # missed
+    taxres2 = make_TaxResult({"name": "gB"})  # skipped
+    taxres3 = make_TaxResult({"name": "gB"})  # skipped
+    taxres4 = make_TaxResult({"name": "gC"})  # skipped
+    taxres5 = make_TaxResult({"name": "gD"})  # missed
+    taxres6 = make_TaxResult({"name": "gE"})  # missed
     # initialize
     q_res = QueryTaxResult(taxres.query_info)
     # add taxonomic info to taxres, add to q_res
     for n, tr in enumerate([taxres, taxres2, taxres3, taxres4, taxres5, taxres6]):
-        tr.get_match_lineage(tax_assignments=taxD, skip_idents=['gB', 'gC'])
+        tr.get_match_lineage(tax_assignments=taxD, skip_idents=["gB", "gC"])
         print("num: ", n)
         print("skipped?: ", tr.skipped_ident)
         print("missed?: ", tr.missed_ident)
@@ -1972,18 +2640,27 @@ def test_QueryTaxResult_track_missed_and_skipped():
     print(q_res.n_missed)
     assert q_res.n_missed == 2
     assert q_res.n_skipped == 3
-    assert 'gB' in q_res.skipped_idents
+    assert "gB" in q_res.skipped_idents
     assert len(q_res.skipped_idents) == 2
-    assert 'gD' in q_res.missed_idents
+    assert "gD" in q_res.missed_idents
     assert q_res.summarized_lineage_results == {}
 
 
 def test_QueryTaxResult_track_missed_and_skipped_using_fn():
     "make sure missed and skipped idents are being tracked. Same as above but use helper fn."
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}, {"name": 'gB'}, {"name": 'gC'}, {"name": 'gD'}, {"name": 'gE'}]
-    gres = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, skip_idents=['gB', 'gC'])
-    # should have 6 results for default query 'q1' 
+    gather_results = [
+        {},
+        {"name": "gB"},
+        {"name": "gB"},
+        {"name": "gC"},
+        {"name": "gD"},
+        {"name": "gE"},
+    ]
+    gres = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, skip_idents=["gB", "gC"]
+    )
+    # should have 6 results for default query 'q1'
     print(gres.keys())
     q_res = next(iter(gres.values()))
     assert len(q_res.raw_taxresults) == 6
@@ -1991,237 +2668,411 @@ def test_QueryTaxResult_track_missed_and_skipped_using_fn():
     print(q_res.n_missed)
     assert q_res.n_missed == 2
     assert q_res.n_skipped == 3
-    assert 'gB' in q_res.skipped_idents
+    assert "gB" in q_res.skipped_idents
     assert len(q_res.skipped_idents) == 2
-    assert 'gD' in q_res.missed_idents
+    assert "gD" in q_res.missed_idents
     assert q_res.summarized_lineage_results == {}
 
 
 def test_QueryTaxResult_summarize_up_ranks_1():
     "basic functionality: summarize up ranks"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
+    gather_results = [{}, {"name": "gB"}]
     gres = make_QueryTaxResults(gather_info=gather_results, taxD=taxD)
     assert len(gres.keys()) == 1
     q_res = next(iter(gres.values()))
     # now summarize up the ranks
     q_res.summarize_up_ranks()
     assert len(q_res.raw_taxresults) == 2
-    #print(q_res.sum_uniq_weighted.values())
-    #print(q_res.sum_uniq_weighted['superkingdom'])
-    assert list(q_res.sum_uniq_weighted.keys()) == ['class', 'phylum', 'superkingdom']
-    assert q_res.sum_uniq_weighted['superkingdom'] == {RankLineageInfo(lineage_str="a"): approx(0.4)}
-    assert q_res.sum_uniq_to_query['superkingdom'] == {RankLineageInfo(lineage_str="a"): approx(0.2)}
-    assert q_res.sum_uniq_bp['superkingdom'] == {RankLineageInfo(lineage_str="a"): 40}
-    assert q_res.sum_uniq_weighted['phylum'] == {RankLineageInfo(lineage_str="a;b"): approx(0.4)}
-    assert q_res.sum_uniq_to_query['phylum'] == {RankLineageInfo(lineage_str="a;b"): approx(0.2)}
-    assert q_res.sum_uniq_bp['phylum'] == {RankLineageInfo(lineage_str="a;b"): 40}
-    assert q_res.sum_uniq_weighted['class'] == {RankLineageInfo(lineage_str="a;b;c"): approx(0.2),
-                                                RankLineageInfo(lineage_str="a;b;d"): approx(0.2)}
-    assert q_res.sum_uniq_to_query['class'] == {RankLineageInfo(lineage_str="a;b;c"): approx(0.1),
-                                                RankLineageInfo(lineage_str="a;b;d"): approx(0.1)}
-    assert q_res.sum_uniq_bp['class'] == {RankLineageInfo(lineage_str="a;b;c"): 20,
-                                          RankLineageInfo(lineage_str="a;b;d"): 20}
+    # print(q_res.sum_uniq_weighted.values())
+    # print(q_res.sum_uniq_weighted['superkingdom'])
+    assert list(q_res.sum_uniq_weighted.keys()) == ["class", "phylum", "superkingdom"]
+    assert q_res.sum_uniq_weighted["superkingdom"] == {
+        RankLineageInfo(lineage_str="a"): approx(0.4)
+    }
+    assert q_res.sum_uniq_to_query["superkingdom"] == {
+        RankLineageInfo(lineage_str="a"): approx(0.2)
+    }
+    assert q_res.sum_uniq_bp["superkingdom"] == {RankLineageInfo(lineage_str="a"): 40}
+    assert q_res.sum_uniq_weighted["phylum"] == {
+        RankLineageInfo(lineage_str="a;b"): approx(0.4)
+    }
+    assert q_res.sum_uniq_to_query["phylum"] == {
+        RankLineageInfo(lineage_str="a;b"): approx(0.2)
+    }
+    assert q_res.sum_uniq_bp["phylum"] == {RankLineageInfo(lineage_str="a;b"): 40}
+    assert q_res.sum_uniq_weighted["class"] == {
+        RankLineageInfo(lineage_str="a;b;c"): approx(0.2),
+        RankLineageInfo(lineage_str="a;b;d"): approx(0.2),
+    }
+    assert q_res.sum_uniq_to_query["class"] == {
+        RankLineageInfo(lineage_str="a;b;c"): approx(0.1),
+        RankLineageInfo(lineage_str="a;b;d"): approx(0.1),
+    }
+    assert q_res.sum_uniq_bp["class"] == {
+        RankLineageInfo(lineage_str="a;b;c"): 20,
+        RankLineageInfo(lineage_str="a;b;d"): 20,
+    }
 
 
 def test_QueryTaxResult_summarize_up_ranks_2():
     "summarize up ranks: different values"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB','f_unique_weighted': 0.1,'f_unique_to_query': 0.05,'unique_intersect_bp': 10,}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
+    gather_results = [
+        {},
+        {
+            "name": "gB",
+            "f_unique_weighted": 0.1,
+            "f_unique_to_query": 0.05,
+            "unique_intersect_bp": 10,
+        },
+    ]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
     # now summarize up the ranks
     q_res.summarize_up_ranks()
     assert len(q_res.raw_taxresults) == 2
     print(q_res.sum_uniq_weighted.values())
-    print(q_res.sum_uniq_weighted['superkingdom'])
-    assert q_res.sum_uniq_weighted['superkingdom'] == {RankLineageInfo(lineage_str="a"): approx(0.3)}
-    assert q_res.sum_uniq_to_query['superkingdom'] == {RankLineageInfo(lineage_str="a"): approx(0.15)}
-    assert q_res.sum_uniq_bp['superkingdom'] == {RankLineageInfo(lineage_str="a"): 30}
-    assert q_res.sum_uniq_weighted['phylum'] == {RankLineageInfo(lineage_str="a;b"): approx(0.3)}
-    assert q_res.sum_uniq_to_query['phylum'] == {RankLineageInfo(lineage_str="a;b"): approx(0.15)}
-    assert q_res.sum_uniq_bp['phylum'] == {RankLineageInfo(lineage_str="a;b"): 30}
-    assert q_res.sum_uniq_weighted['class'] == {RankLineageInfo(lineage_str="a;b;c"): approx(0.2),
-                                                RankLineageInfo(lineage_str="a;b;d"): approx(0.1)}
-    assert q_res.sum_uniq_to_query['class'] == {RankLineageInfo(lineage_str="a;b;c"): approx(0.1),
-                                                RankLineageInfo(lineage_str="a;b;d"): approx(0.05)}
-    assert q_res.sum_uniq_bp['class'] == {RankLineageInfo(lineage_str="a;b;c"): 20,
-                                          RankLineageInfo(lineage_str="a;b;d"): 10}
+    print(q_res.sum_uniq_weighted["superkingdom"])
+    assert q_res.sum_uniq_weighted["superkingdom"] == {
+        RankLineageInfo(lineage_str="a"): approx(0.3)
+    }
+    assert q_res.sum_uniq_to_query["superkingdom"] == {
+        RankLineageInfo(lineage_str="a"): approx(0.15)
+    }
+    assert q_res.sum_uniq_bp["superkingdom"] == {RankLineageInfo(lineage_str="a"): 30}
+    assert q_res.sum_uniq_weighted["phylum"] == {
+        RankLineageInfo(lineage_str="a;b"): approx(0.3)
+    }
+    assert q_res.sum_uniq_to_query["phylum"] == {
+        RankLineageInfo(lineage_str="a;b"): approx(0.15)
+    }
+    assert q_res.sum_uniq_bp["phylum"] == {RankLineageInfo(lineage_str="a;b"): 30}
+    assert q_res.sum_uniq_weighted["class"] == {
+        RankLineageInfo(lineage_str="a;b;c"): approx(0.2),
+        RankLineageInfo(lineage_str="a;b;d"): approx(0.1),
+    }
+    assert q_res.sum_uniq_to_query["class"] == {
+        RankLineageInfo(lineage_str="a;b;c"): approx(0.1),
+        RankLineageInfo(lineage_str="a;b;d"): approx(0.05),
+    }
+    assert q_res.sum_uniq_bp["class"] == {
+        RankLineageInfo(lineage_str="a;b;c"): 20,
+        RankLineageInfo(lineage_str="a;b;d"): 10,
+    }
 
 
 def test_QueryTaxResult_summarize_up_ranks_missing_lineage():
     "basic functionality: summarize up ranks"
     taxD = make_mini_taxonomy([("gA", "a;b;c")])
-    gather_results = [{}, {"name": 'gB'}]
+    gather_results = [{}, {"name": "gB"}]
     gres = make_QueryTaxResults(gather_info=gather_results, taxD=taxD)
     assert len(gres.keys()) == 1
     q_res = next(iter(gres.values()))
     # now summarize up the ranks
     q_res.summarize_up_ranks()
     assert len(q_res.raw_taxresults) == 2
-    #print(q_res.sum_uniq_weighted.values())
-    print(q_res.sum_uniq_weighted['superkingdom'])
-    assert q_res.sum_uniq_weighted['superkingdom'] == {RankLineageInfo(lineage_str="a"): approx(0.2)}
-    assert q_res.sum_uniq_to_query['superkingdom'] == {RankLineageInfo(lineage_str="a"): approx(0.1)}
-    assert q_res.sum_uniq_bp['superkingdom'] == {RankLineageInfo(lineage_str="a"): 20}
-    assert q_res.sum_uniq_weighted['phylum'] == {RankLineageInfo(lineage_str="a;b"): approx(0.2)}
-    assert q_res.sum_uniq_to_query['phylum'] == {RankLineageInfo(lineage_str="a;b"): approx(0.1)}
-    assert q_res.sum_uniq_bp['phylum'] == {RankLineageInfo(lineage_str="a;b"): 20}
-    assert q_res.sum_uniq_weighted['class'] == {RankLineageInfo(lineage_str="a;b;c"): approx(0.2)}
-    assert q_res.sum_uniq_to_query['class'] == {RankLineageInfo(lineage_str="a;b;c"): approx(0.1)}
-    assert q_res.sum_uniq_bp['class'] == {RankLineageInfo(lineage_str="a;b;c"): 20}
+    # print(q_res.sum_uniq_weighted.values())
+    print(q_res.sum_uniq_weighted["superkingdom"])
+    assert q_res.sum_uniq_weighted["superkingdom"] == {
+        RankLineageInfo(lineage_str="a"): approx(0.2)
+    }
+    assert q_res.sum_uniq_to_query["superkingdom"] == {
+        RankLineageInfo(lineage_str="a"): approx(0.1)
+    }
+    assert q_res.sum_uniq_bp["superkingdom"] == {RankLineageInfo(lineage_str="a"): 20}
+    assert q_res.sum_uniq_weighted["phylum"] == {
+        RankLineageInfo(lineage_str="a;b"): approx(0.2)
+    }
+    assert q_res.sum_uniq_to_query["phylum"] == {
+        RankLineageInfo(lineage_str="a;b"): approx(0.1)
+    }
+    assert q_res.sum_uniq_bp["phylum"] == {RankLineageInfo(lineage_str="a;b"): 20}
+    assert q_res.sum_uniq_weighted["class"] == {
+        RankLineageInfo(lineage_str="a;b;c"): approx(0.2)
+    }
+    assert q_res.sum_uniq_to_query["class"] == {
+        RankLineageInfo(lineage_str="a;b;c"): approx(0.1)
+    }
+    assert q_res.sum_uniq_bp["class"] == {RankLineageInfo(lineage_str="a;b;c"): 20}
 
 
 def test_QueryTaxResult_summarize_up_ranks_skipped_lineage():
     "basic functionality: summarize up ranks"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    gres = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, skip_idents=['gB'])
+    gather_results = [{}, {"name": "gB"}]
+    gres = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, skip_idents=["gB"]
+    )
     assert len(gres.keys()) == 1
     q_res = next(iter(gres.values()))
     # now summarize up the ranks
     q_res.summarize_up_ranks()
     assert len(q_res.raw_taxresults) == 2
-    assert list(q_res.sum_uniq_weighted.keys()) == ['class', 'phylum', 'superkingdom']
-    #print(q_res.sum_uniq_weighted.values())
-    print(q_res.sum_uniq_weighted['superkingdom'])
-    assert q_res.sum_uniq_weighted['superkingdom'] == {RankLineageInfo(lineage_str="a"): approx(0.2)}
-    assert q_res.sum_uniq_to_query['superkingdom'] == {RankLineageInfo(lineage_str="a"): approx(0.1)}
-    assert q_res.sum_uniq_bp['superkingdom'] == {RankLineageInfo(lineage_str="a"): 20}
-    assert q_res.sum_uniq_weighted['phylum'] == {RankLineageInfo(lineage_str="a;b"): approx(0.2)}
-    assert q_res.sum_uniq_to_query['phylum'] == {RankLineageInfo(lineage_str="a;b"): approx(0.1)}
-    assert q_res.sum_uniq_bp['phylum'] == {RankLineageInfo(lineage_str="a;b"): 20}
-    assert q_res.sum_uniq_weighted['class'] == {RankLineageInfo(lineage_str="a;b;c"): approx(0.2)}
-    assert q_res.sum_uniq_to_query['class'] == {RankLineageInfo(lineage_str="a;b;c"): approx(0.1)}
-    assert q_res.sum_uniq_bp['class'] == {RankLineageInfo(lineage_str="a;b;c"): 20}
+    assert list(q_res.sum_uniq_weighted.keys()) == ["class", "phylum", "superkingdom"]
+    # print(q_res.sum_uniq_weighted.values())
+    print(q_res.sum_uniq_weighted["superkingdom"])
+    assert q_res.sum_uniq_weighted["superkingdom"] == {
+        RankLineageInfo(lineage_str="a"): approx(0.2)
+    }
+    assert q_res.sum_uniq_to_query["superkingdom"] == {
+        RankLineageInfo(lineage_str="a"): approx(0.1)
+    }
+    assert q_res.sum_uniq_bp["superkingdom"] == {RankLineageInfo(lineage_str="a"): 20}
+    assert q_res.sum_uniq_weighted["phylum"] == {
+        RankLineageInfo(lineage_str="a;b"): approx(0.2)
+    }
+    assert q_res.sum_uniq_to_query["phylum"] == {
+        RankLineageInfo(lineage_str="a;b"): approx(0.1)
+    }
+    assert q_res.sum_uniq_bp["phylum"] == {RankLineageInfo(lineage_str="a;b"): 20}
+    assert q_res.sum_uniq_weighted["class"] == {
+        RankLineageInfo(lineage_str="a;b;c"): approx(0.2)
+    }
+    assert q_res.sum_uniq_to_query["class"] == {
+        RankLineageInfo(lineage_str="a;b;c"): approx(0.1)
+    }
+    assert q_res.sum_uniq_bp["class"] == {RankLineageInfo(lineage_str="a;b;c"): 20}
 
 
 def test_QueryTaxResult_summarize_up_ranks_perfect_match():
     "summarize up ranks: different values"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{'f_unique_to_query': 1.0}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
+    gather_results = [{"f_unique_to_query": 1.0}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
     # now summarize up the ranks
     q_res.summarize_up_ranks()
     assert len(q_res.raw_taxresults) == 1
     print(q_res.sum_uniq_weighted.values())
-    print(q_res.sum_uniq_to_query['superkingdom'])
-    assert list(q_res.sum_uniq_to_query['superkingdom'].values()) == [1.0]
-    assert 'gA' in q_res.perfect_match
+    print(q_res.sum_uniq_to_query["superkingdom"])
+    assert list(q_res.sum_uniq_to_query["superkingdom"].values()) == [1.0]
+    assert "gA" in q_res.perfect_match
 
 
 def test_QueryTaxResult_summarize_up_ranks_already_summarized():
     "summarize up ranks: error, already summarized"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{'f_unique_to_query': 1.0}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
+    gather_results = [{"f_unique_to_query": 1.0}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
     # now summarize up the ranks
     q_res.summarize_up_ranks()
     with pytest.raises(ValueError) as exc:
         q_res.summarize_up_ranks()
     print(str(exc))
     assert "Error: already summarized" in str(exc)
-    
+
 
 def test_QueryTaxResult_summarize_up_ranks_already_summarized_force():
     "summarize up ranks: already summarized but force"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB','f_unique_weighted': 0.1,'f_unique_to_query': 0.05,'unique_intersect_bp': 10,}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
+    gather_results = [
+        {},
+        {
+            "name": "gB",
+            "f_unique_weighted": 0.1,
+            "f_unique_to_query": 0.05,
+            "unique_intersect_bp": 10,
+        },
+    ]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
     # now summarize up the ranks
     q_res.summarize_up_ranks()
     q_res.summarize_up_ranks(force_resummarize=True)
-    assert list(q_res.sum_uniq_weighted.keys()) == ['class', 'phylum', 'superkingdom']
+    assert list(q_res.sum_uniq_weighted.keys()) == ["class", "phylum", "superkingdom"]
 
-    #check that all results are still good 
+    # check that all results are still good
     assert len(q_res.raw_taxresults) == 2
-    assert q_res.sum_uniq_weighted['superkingdom'] ==  {RankLineageInfo(lineage_str="a"): approx(0.3)}
-    assert q_res.sum_uniq_weighted['phylum'] ==  {RankLineageInfo(lineage_str="a;b"): approx(0.3)}
-    assert q_res.sum_uniq_to_query['phylum'] ==  {RankLineageInfo(lineage_str="a;b"): approx(0.15)}
-    assert q_res.sum_uniq_bp['phylum'] ==  {RankLineageInfo(lineage_str="a;b"): 30}
-    assert q_res.sum_uniq_to_query['class'] ==  {RankLineageInfo(lineage_str="a;b;c"): approx(0.1),
-                                                 RankLineageInfo(lineage_str="a;b;d"): approx(0.05)}
-    assert q_res.sum_uniq_weighted['class'] ==  {RankLineageInfo(lineage_str="a;b;c"): approx(0.2),
-                                                 RankLineageInfo(lineage_str="a;b;d"): approx(0.1)}
-    assert q_res.sum_uniq_bp['class'] ==  {RankLineageInfo(lineage_str="a;b;c"): 20,
-                                           RankLineageInfo(lineage_str="a;b;d"): 10}
+    assert q_res.sum_uniq_weighted["superkingdom"] == {
+        RankLineageInfo(lineage_str="a"): approx(0.3)
+    }
+    assert q_res.sum_uniq_weighted["phylum"] == {
+        RankLineageInfo(lineage_str="a;b"): approx(0.3)
+    }
+    assert q_res.sum_uniq_to_query["phylum"] == {
+        RankLineageInfo(lineage_str="a;b"): approx(0.15)
+    }
+    assert q_res.sum_uniq_bp["phylum"] == {RankLineageInfo(lineage_str="a;b"): 30}
+    assert q_res.sum_uniq_to_query["class"] == {
+        RankLineageInfo(lineage_str="a;b;c"): approx(0.1),
+        RankLineageInfo(lineage_str="a;b;d"): approx(0.05),
+    }
+    assert q_res.sum_uniq_weighted["class"] == {
+        RankLineageInfo(lineage_str="a;b;c"): approx(0.2),
+        RankLineageInfo(lineage_str="a;b;d"): approx(0.1),
+    }
+    assert q_res.sum_uniq_bp["class"] == {
+        RankLineageInfo(lineage_str="a;b;c"): 20,
+        RankLineageInfo(lineage_str="a;b;d"): 10,
+    }
 
 
 def test_QueryTaxResult_summarize_up_ranks_single_rank():
     "summarize up ranks: different values"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB','f_unique_weighted': 0.1,'f_unique_to_query': 0.05,'unique_intersect_bp': 10,}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
+    gather_results = [
+        {},
+        {
+            "name": "gB",
+            "f_unique_weighted": 0.1,
+            "f_unique_to_query": 0.05,
+            "unique_intersect_bp": 10,
+        },
+    ]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
     # now summarize up the ranks
-    q_res.summarize_up_ranks(single_rank='phylum')
+    q_res.summarize_up_ranks(single_rank="phylum")
     assert len(q_res.raw_taxresults) == 2
-    assert list(q_res.sum_uniq_weighted.keys()) == ['phylum']
+    assert list(q_res.sum_uniq_weighted.keys()) == ["phylum"]
     print(q_res.sum_uniq_weighted.keys())
     print(q_res.sum_uniq_weighted.values())
-    print(q_res.sum_uniq_weighted['phylum'])
-    assert q_res.sum_uniq_weighted['phylum'] == {RankLineageInfo(lineage_str="a;b"): approx(0.3)}
-    assert list(q_res.sum_uniq_to_query['phylum'].values()) == [approx(0.15)]                                                    
-    assert list(q_res.sum_uniq_bp['phylum'].values()) == [30]                                                    
-    assert q_res.summarized_ranks == ['phylum']
+    print(q_res.sum_uniq_weighted["phylum"])
+    assert q_res.sum_uniq_weighted["phylum"] == {
+        RankLineageInfo(lineage_str="a;b"): approx(0.3)
+    }
+    assert list(q_res.sum_uniq_to_query["phylum"].values()) == [approx(0.15)]
+    assert list(q_res.sum_uniq_bp["phylum"].values()) == [30]
+    assert q_res.summarized_ranks == ["phylum"]
+
 
 def test_QueryTaxResult_summarize_up_ranks_single_rank_not_available():
     "summarize up ranks: different values"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB','f_unique_weighted': 0.1,'f_unique_to_query': 0.05,'unique_intersect_bp': 10,}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
+    gather_results = [
+        {},
+        {
+            "name": "gB",
+            "f_unique_weighted": 0.1,
+            "f_unique_to_query": 0.05,
+            "unique_intersect_bp": 10,
+        },
+    ]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
     # now summarize up the ranks
     with pytest.raises(ValueError) as exc:
-        q_res.summarize_up_ranks(single_rank='NotARank')
+        q_res.summarize_up_ranks(single_rank="NotARank")
     print(str(exc))
-    assert "Error: rank 'NotARank' not in available ranks (strain, species, genus, family, order, class, phylum, superkingdom)" in str(exc)
+    assert (
+        "Error: rank 'NotARank' not in available ranks (strain, species, genus, family, order, class, phylum, superkingdom)"
+        in str(exc)
+    )
 
 
 def test_QueryTaxResult_summarize_up_ranks_single_rank_not_filled():
     "summarize up ranks: different values"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB','f_unique_weighted': 0.1,'f_unique_to_query': 0.05,'unique_intersect_bp': 10,}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
+    gather_results = [
+        {},
+        {
+            "name": "gB",
+            "f_unique_weighted": 0.1,
+            "f_unique_to_query": 0.05,
+            "unique_intersect_bp": 10,
+        },
+    ]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
     # now summarize up the ranks
     with pytest.raises(ValueError) as exc:
-        q_res.summarize_up_ranks(single_rank='species')
+        q_res.summarize_up_ranks(single_rank="species")
     print(str(exc))
-    assert "Error: rank 'species' was not available for any matching lineages." in str(exc)
+    assert "Error: rank 'species' was not available for any matching lineages." in str(
+        exc
+    )
 
 
 def test_QueryTaxResult_build_summarized_result_1():
     "basic functionality: build summarized_result"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
     q_res.build_summarized_result()
     print(q_res.summarized_lineage_results.keys())
-    sk = [SummarizedGatherResult(rank='superkingdom', fraction=0.2, f_weighted_at_rank=0.4, 
-                             lineage=RankLineageInfo(lineage_str='a'),
-                             bp_match_at_rank=40, query_ani_at_rank=approx(0.95, rel=1e-2)),
-          SummarizedGatherResult(rank='superkingdom', fraction=0.8, f_weighted_at_rank=0.6,
-                             lineage=RankLineageInfo(), bp_match_at_rank=60, query_ani_at_rank=None)]
-    print(q_res.summarized_lineage_results['superkingdom'])
-    assert q_res.summarized_lineage_results['superkingdom'] == sk
-    print(q_res.summarized_lineage_results['phylum'])
-    phy = [SummarizedGatherResult(rank='phylum', fraction=0.2, f_weighted_at_rank=0.4, 
-                                   lineage=RankLineageInfo(lineage_str='a;b'),
-                                   bp_match_at_rank=40, query_ani_at_rank=approx(0.95, rel=1e-2)),
-            SummarizedGatherResult(rank='phylum', fraction=0.8, f_weighted_at_rank=0.6,
-                                   lineage=RankLineageInfo(), bp_match_at_rank=60, query_ani_at_rank=None)]
-    assert q_res.summarized_lineage_results['phylum'] == phy
-    print(q_res.summarized_lineage_results['class'])
-    cl = [SummarizedGatherResult(rank='class', fraction=0.1, f_weighted_at_rank=0.2, 
-                                 lineage=RankLineageInfo(lineage_str='a;b;c'), 
-                                  bp_match_at_rank=20, query_ani_at_rank=approx(0.93, rel=1e-2)),
-          SummarizedGatherResult(rank='class', fraction=0.1, f_weighted_at_rank=0.2,
-                                 lineage=RankLineageInfo(lineage_str='a;b;d'),
-                                  bp_match_at_rank=20, query_ani_at_rank=approx(0.93, rel=1e-2)),
-          SummarizedGatherResult(rank='class', fraction=0.8, f_weighted_at_rank=0.6,
-                                 lineage=RankLineageInfo(), bp_match_at_rank=60, query_ani_at_rank=None)]
-    assert q_res.summarized_lineage_results['class'] == cl
-
-    assert q_res.total_f_weighted['phylum'] == approx(0.4)
-    assert q_res.total_f_classified['class'] == approx(0.2)
-    assert q_res.total_bp_classified['superkingdom'] == 40
+    sk = [
+        SummarizedGatherResult(
+            rank="superkingdom",
+            fraction=0.2,
+            f_weighted_at_rank=0.4,
+            lineage=RankLineageInfo(lineage_str="a"),
+            bp_match_at_rank=40,
+            query_ani_at_rank=approx(0.95, rel=1e-2),
+        ),
+        SummarizedGatherResult(
+            rank="superkingdom",
+            fraction=0.8,
+            f_weighted_at_rank=0.6,
+            lineage=RankLineageInfo(),
+            bp_match_at_rank=60,
+            query_ani_at_rank=None,
+        ),
+    ]
+    print(q_res.summarized_lineage_results["superkingdom"])
+    assert q_res.summarized_lineage_results["superkingdom"] == sk
+    print(q_res.summarized_lineage_results["phylum"])
+    phy = [
+        SummarizedGatherResult(
+            rank="phylum",
+            fraction=0.2,
+            f_weighted_at_rank=0.4,
+            lineage=RankLineageInfo(lineage_str="a;b"),
+            bp_match_at_rank=40,
+            query_ani_at_rank=approx(0.95, rel=1e-2),
+        ),
+        SummarizedGatherResult(
+            rank="phylum",
+            fraction=0.8,
+            f_weighted_at_rank=0.6,
+            lineage=RankLineageInfo(),
+            bp_match_at_rank=60,
+            query_ani_at_rank=None,
+        ),
+    ]
+    assert q_res.summarized_lineage_results["phylum"] == phy
+    print(q_res.summarized_lineage_results["class"])
+    cl = [
+        SummarizedGatherResult(
+            rank="class",
+            fraction=0.1,
+            f_weighted_at_rank=0.2,
+            lineage=RankLineageInfo(lineage_str="a;b;c"),
+            bp_match_at_rank=20,
+            query_ani_at_rank=approx(0.93, rel=1e-2),
+        ),
+        SummarizedGatherResult(
+            rank="class",
+            fraction=0.1,
+            f_weighted_at_rank=0.2,
+            lineage=RankLineageInfo(lineage_str="a;b;d"),
+            bp_match_at_rank=20,
+            query_ani_at_rank=approx(0.93, rel=1e-2),
+        ),
+        SummarizedGatherResult(
+            rank="class",
+            fraction=0.8,
+            f_weighted_at_rank=0.6,
+            lineage=RankLineageInfo(),
+            bp_match_at_rank=60,
+            query_ani_at_rank=None,
+        ),
+    ]
+    assert q_res.summarized_lineage_results["class"] == cl
+
+    assert q_res.total_f_weighted["phylum"] == approx(0.4)
+    assert q_res.total_f_classified["class"] == approx(0.2)
+    assert q_res.total_bp_classified["superkingdom"] == 40
 
 
 def test_QueryTaxResult_build_summarized_result_2():
@@ -2231,19 +3082,39 @@ def test_QueryTaxResult_build_summarized_result_2():
     gB_tax = ("gB", "a;c")
     taxD = make_mini_taxonomy([gA_tax, gB_tax])
     # make gather results
-    gather_results = [{'query_name': 'queryA', 'name': 'gA', 'f_unique_weighted': 0.5,'f_unique_to_query': 0.5,'unique_intersect_bp': 50}, 
-                      {'query_name': 'queryA', "name": 'gB', 'f_unique_weighted': 0.4,'f_unique_to_query': 0.3,'unique_intersect_bp': 30},
-                      {'query_name': 'queryB', "name": 'gB', 'f_unique_weighted': 0.3,'f_unique_to_query': 0.3,'unique_intersect_bp': 30}]
+    gather_results = [
+        {
+            "query_name": "queryA",
+            "name": "gA",
+            "f_unique_weighted": 0.5,
+            "f_unique_to_query": 0.5,
+            "unique_intersect_bp": 50,
+        },
+        {
+            "query_name": "queryA",
+            "name": "gB",
+            "f_unique_weighted": 0.4,
+            "f_unique_to_query": 0.3,
+            "unique_intersect_bp": 30,
+        },
+        {
+            "query_name": "queryB",
+            "name": "gB",
+            "f_unique_weighted": 0.3,
+            "f_unique_to_query": 0.3,
+            "unique_intersect_bp": 30,
+        },
+    ]
     gres = make_QueryTaxResults(gather_info=gather_results, taxD=taxD)
-    
+
     for query_name, q_res in gres.items():
-        q_res.build_summarized_result() # summarize and build result
-        sk = q_res.summarized_lineage_results['superkingdom']
-        phy = q_res.summarized_lineage_results['phylum']
+        q_res.build_summarized_result()  # summarize and build result
+        sk = q_res.summarized_lineage_results["superkingdom"]
+        phy = q_res.summarized_lineage_results["phylum"]
         assert len(sk) == 2
         assert sk[0].lineage == RankLineageInfo(lineage_str="a")
         print(phy)
-        if query_name == 'queryA':
+        if query_name == "queryA":
             # check superkingdom results
             assert sk[0].fraction == approx(0.8)
             assert sk[0].f_weighted_at_rank == approx(0.9)
@@ -2257,16 +3128,16 @@ def test_QueryTaxResult_build_summarized_result_2():
             assert phy[0].fraction == approx(0.5)
             assert phy[0].f_weighted_at_rank == approx(0.5)
             assert phy[0].bp_match_at_rank == 50
-            assert phy[0].lineage ==  RankLineageInfo(lineage_str="a;b")
+            assert phy[0].lineage == RankLineageInfo(lineage_str="a;b")
             assert phy[1].fraction == approx(0.3)
             assert phy[1].f_weighted_at_rank == approx(0.4)
             assert phy[1].bp_match_at_rank == 30
-            assert phy[1].lineage ==  RankLineageInfo(lineage_str="a;c")
+            assert phy[1].lineage == RankLineageInfo(lineage_str="a;c")
             assert phy[2].fraction == approx(0.2)
             assert phy[2].f_weighted_at_rank == approx(0.1)
             assert phy[2].bp_match_at_rank == 20
             assert phy[2].lineage == RankLineageInfo()
-        if query_name == 'queryB':
+        if query_name == "queryB":
             # check superkingdom results
             assert sk[0].fraction == approx(0.3)
             assert sk[0].f_weighted_at_rank == approx(0.3)
@@ -2280,7 +3151,7 @@ def test_QueryTaxResult_build_summarized_result_2():
             assert phy[0].fraction == approx(0.3)
             assert phy[0].f_weighted_at_rank == approx(0.3)
             assert phy[0].bp_match_at_rank == 30
-            assert phy[0].lineage ==  RankLineageInfo(lineage_str="a;c")
+            assert phy[0].lineage == RankLineageInfo(lineage_str="a;c")
             assert phy[1].fraction == approx(0.7)
             assert phy[1].f_weighted_at_rank == approx(0.7)
             assert phy[1].bp_match_at_rank == 70
@@ -2290,91 +3161,183 @@ def test_QueryTaxResult_build_summarized_result_2():
 def test_QueryTaxResult_build_summarized_result_missing_lineage():
     "build summarized_result with missing lineage"
     taxD = make_mini_taxonomy([("gA", "a;b;c")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
     q_res.build_summarized_result()
     print(q_res.summarized_lineage_results.keys())
-    print(q_res.summarized_lineage_results['superkingdom'])
-
-    sk = [SummarizedGatherResult(rank='superkingdom', fraction=0.1, f_weighted_at_rank=0.2, 
-                                 lineage=RankLineageInfo(lineage_str="a"), 
-                                 bp_match_at_rank=20, query_ani_at_rank=approx(0.928, rel=1e-2)),
-          SummarizedGatherResult(rank='superkingdom', fraction=0.9, lineage=RankLineageInfo(),f_weighted_at_rank=0.8,
-                                 bp_match_at_rank=80, query_ani_at_rank=None)]
-    assert q_res.summarized_lineage_results['superkingdom'] == sk
-    print(q_res.summarized_lineage_results['phylum'])
-    phy = [SummarizedGatherResult(rank='phylum', fraction=0.1, f_weighted_at_rank=0.2,
-                                  lineage=RankLineageInfo(lineage_str="a;b"), 
-                                  bp_match_at_rank=20, query_ani_at_rank=approx(0.928, rel=1e-2)),
-           SummarizedGatherResult(rank='phylum', fraction=0.9, lineage=RankLineageInfo(),f_weighted_at_rank=0.8,
-                                  bp_match_at_rank=80, query_ani_at_rank=None)]
-    assert q_res.summarized_lineage_results['phylum'] == phy
-    print(q_res.summarized_lineage_results['class'])
-    cl = [SummarizedGatherResult(rank='class', fraction=0.1, lineage= RankLineageInfo(lineage_str="a;b;c"),
-                                  f_weighted_at_rank=0.2, bp_match_at_rank=20, query_ani_at_rank=approx(0.928, rel=1e-2)),
-          SummarizedGatherResult(rank='class', fraction=0.9, lineage=RankLineageInfo(), f_weighted_at_rank=0.8,
-                                 bp_match_at_rank=80, query_ani_at_rank=None)]
-    assert q_res.summarized_lineage_results['class'] == cl
-
-    assert q_res.total_f_weighted['phylum'] == approx(0.2)
-    assert q_res.total_f_classified['class'] == approx(0.1)
-    assert q_res.total_bp_classified['superkingdom'] == 20
+    print(q_res.summarized_lineage_results["superkingdom"])
+
+    sk = [
+        SummarizedGatherResult(
+            rank="superkingdom",
+            fraction=0.1,
+            f_weighted_at_rank=0.2,
+            lineage=RankLineageInfo(lineage_str="a"),
+            bp_match_at_rank=20,
+            query_ani_at_rank=approx(0.928, rel=1e-2),
+        ),
+        SummarizedGatherResult(
+            rank="superkingdom",
+            fraction=0.9,
+            lineage=RankLineageInfo(),
+            f_weighted_at_rank=0.8,
+            bp_match_at_rank=80,
+            query_ani_at_rank=None,
+        ),
+    ]
+    assert q_res.summarized_lineage_results["superkingdom"] == sk
+    print(q_res.summarized_lineage_results["phylum"])
+    phy = [
+        SummarizedGatherResult(
+            rank="phylum",
+            fraction=0.1,
+            f_weighted_at_rank=0.2,
+            lineage=RankLineageInfo(lineage_str="a;b"),
+            bp_match_at_rank=20,
+            query_ani_at_rank=approx(0.928, rel=1e-2),
+        ),
+        SummarizedGatherResult(
+            rank="phylum",
+            fraction=0.9,
+            lineage=RankLineageInfo(),
+            f_weighted_at_rank=0.8,
+            bp_match_at_rank=80,
+            query_ani_at_rank=None,
+        ),
+    ]
+    assert q_res.summarized_lineage_results["phylum"] == phy
+    print(q_res.summarized_lineage_results["class"])
+    cl = [
+        SummarizedGatherResult(
+            rank="class",
+            fraction=0.1,
+            lineage=RankLineageInfo(lineage_str="a;b;c"),
+            f_weighted_at_rank=0.2,
+            bp_match_at_rank=20,
+            query_ani_at_rank=approx(0.928, rel=1e-2),
+        ),
+        SummarizedGatherResult(
+            rank="class",
+            fraction=0.9,
+            lineage=RankLineageInfo(),
+            f_weighted_at_rank=0.8,
+            bp_match_at_rank=80,
+            query_ani_at_rank=None,
+        ),
+    ]
+    assert q_res.summarized_lineage_results["class"] == cl
+
+    assert q_res.total_f_weighted["phylum"] == approx(0.2)
+    assert q_res.total_f_classified["class"] == approx(0.1)
+    assert q_res.total_bp_classified["superkingdom"] == 20
 
 
 def test_QueryTaxResult_build_summarized_result_skipped_lineage():
     "build summarized_result with skipped lineage"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True, skip_idents=['gB'])
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True, skip_idents=["gB"]
+    )
     q_res.build_summarized_result()
     print(q_res.summarized_lineage_results.keys())
-    print(q_res.summarized_lineage_results['superkingdom'])
-
-    sk = [SummarizedGatherResult(rank='superkingdom', fraction=0.1, f_weighted_at_rank=0.2,  
-                                 lineage=RankLineageInfo(lineage_str="a"), 
-                                 bp_match_at_rank=20, query_ani_at_rank=approx(0.928, rel=1e-2)), 
-          SummarizedGatherResult(rank='superkingdom', fraction=0.9, lineage=RankLineageInfo(),f_weighted_at_rank=0.8,
-                                 bp_match_at_rank=80, query_ani_at_rank=None)]
-    assert q_res.summarized_lineage_results['superkingdom'] == sk
-    print(q_res.summarized_lineage_results['phylum'])
-    phy = [SummarizedGatherResult(rank='phylum', fraction=0.1, lineage=RankLineageInfo(lineage_str="a;b"),
-                                  f_weighted_at_rank=0.2, bp_match_at_rank=20, query_ani_at_rank=approx(0.928, rel=1e-2)),
-           SummarizedGatherResult(rank='phylum', fraction=0.9, lineage=RankLineageInfo(), f_weighted_at_rank=0.8, bp_match_at_rank=80,
-                                  query_ani_at_rank=None)]
-    assert q_res.summarized_lineage_results['phylum'] == phy
-    print(q_res.summarized_lineage_results['class'])
-    cl = [SummarizedGatherResult(rank='class', fraction=0.1,lineage=RankLineageInfo(lineage_str="a;b;c"),
-                                  f_weighted_at_rank=0.2, bp_match_at_rank=20, query_ani_at_rank=approx(0.928, rel=1e-2)),
-          SummarizedGatherResult(rank='class', fraction=0.9, lineage=RankLineageInfo(), f_weighted_at_rank=0.8, bp_match_at_rank=80,
-                                 query_ani_at_rank=None)]
-    assert q_res.summarized_lineage_results['class'] == cl
-
-    assert q_res.total_f_weighted['phylum'] == approx(0.2)
-    assert q_res.total_f_classified['class'] == approx(0.1)
-    assert q_res.total_bp_classified['superkingdom'] == 20
+    print(q_res.summarized_lineage_results["superkingdom"])
+
+    sk = [
+        SummarizedGatherResult(
+            rank="superkingdom",
+            fraction=0.1,
+            f_weighted_at_rank=0.2,
+            lineage=RankLineageInfo(lineage_str="a"),
+            bp_match_at_rank=20,
+            query_ani_at_rank=approx(0.928, rel=1e-2),
+        ),
+        SummarizedGatherResult(
+            rank="superkingdom",
+            fraction=0.9,
+            lineage=RankLineageInfo(),
+            f_weighted_at_rank=0.8,
+            bp_match_at_rank=80,
+            query_ani_at_rank=None,
+        ),
+    ]
+    assert q_res.summarized_lineage_results["superkingdom"] == sk
+    print(q_res.summarized_lineage_results["phylum"])
+    phy = [
+        SummarizedGatherResult(
+            rank="phylum",
+            fraction=0.1,
+            lineage=RankLineageInfo(lineage_str="a;b"),
+            f_weighted_at_rank=0.2,
+            bp_match_at_rank=20,
+            query_ani_at_rank=approx(0.928, rel=1e-2),
+        ),
+        SummarizedGatherResult(
+            rank="phylum",
+            fraction=0.9,
+            lineage=RankLineageInfo(),
+            f_weighted_at_rank=0.8,
+            bp_match_at_rank=80,
+            query_ani_at_rank=None,
+        ),
+    ]
+    assert q_res.summarized_lineage_results["phylum"] == phy
+    print(q_res.summarized_lineage_results["class"])
+    cl = [
+        SummarizedGatherResult(
+            rank="class",
+            fraction=0.1,
+            lineage=RankLineageInfo(lineage_str="a;b;c"),
+            f_weighted_at_rank=0.2,
+            bp_match_at_rank=20,
+            query_ani_at_rank=approx(0.928, rel=1e-2),
+        ),
+        SummarizedGatherResult(
+            rank="class",
+            fraction=0.9,
+            lineage=RankLineageInfo(),
+            f_weighted_at_rank=0.8,
+            bp_match_at_rank=80,
+            query_ani_at_rank=None,
+        ),
+    ]
+    assert q_res.summarized_lineage_results["class"] == cl
+
+    assert q_res.total_f_weighted["phylum"] == approx(0.2)
+    assert q_res.total_f_classified["class"] == approx(0.1)
+    assert q_res.total_bp_classified["superkingdom"] == 20
 
 
 def test_QueryTaxResult_build_summarized_result_over100percent():
     "summarize up ranks: different values"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB','f_unique_to_query': 0.95}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
+    gather_results = [{}, {"name": "gB", "f_unique_to_query": 0.95}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
     # now summarize up the ranks
     assert len(q_res.raw_taxresults) == 2
     with pytest.raises(ValueError) as exc:
         q_res.build_summarized_result()
     print(str(exc))
-    assert "Summarized fraction is > 100% of the query! This should not be possible" in str(exc)
+    assert (
+        "Summarized fraction is > 100% of the query! This should not be possible"
+        in str(exc)
+    )
 
 
 def test_build_summarized_result_rank_fail_not_available_resummarize():
     "build classification result"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
-    q_res.summarize_up_ranks('superkingdom')
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
+    q_res.summarize_up_ranks("superkingdom")
     with pytest.raises(ValueError) as exc:
-        q_res.build_summarized_result(single_rank='order')
+        q_res.build_summarized_result(single_rank="order")
     print(str(exc))
     assert "Error: rank 'order' not in summarized rank(s), superkingdom" in str(exc)
 
@@ -2386,15 +3349,31 @@ def test_aggregate_by_lineage_at_rank():
     gB_tax = ("gB", "a;c")
     taxD = make_mini_taxonomy([gA_tax, gB_tax])
     # make gather results
-    gather_results = [{'query_name': 'queryA', 'name': 'gA', 'f_unique_weighted': 0.5,'f_unique_to_query': 0.4,'unique_intersect_bp': 50}, 
-                      {'query_name': 'queryA', "name": 'gB', 'f_unique_weighted': 0.3,'f_unique_to_query': 0.3,'unique_intersect_bp': 30}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True, summarize=True)
-    summarized, all_queries = aggregate_by_lineage_at_rank([q_res], rank='phylum', by_query=False)
+    gather_results = [
+        {
+            "query_name": "queryA",
+            "name": "gA",
+            "f_unique_weighted": 0.5,
+            "f_unique_to_query": 0.4,
+            "unique_intersect_bp": 50,
+        },
+        {
+            "query_name": "queryA",
+            "name": "gB",
+            "f_unique_weighted": 0.3,
+            "f_unique_to_query": 0.3,
+            "unique_intersect_bp": 30,
+        },
+    ]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True, summarize=True
+    )
+    summarized, all_queries = aggregate_by_lineage_at_rank(
+        [q_res], rank="phylum", by_query=False
+    )
     print(summarized)
-    assert summarized == {'a;b': 0.4,
-                          'a;c': 0.3,
-                          'unclassified': approx(0.3, rel=1e-2)}
-    assert all_queries == ['queryA']
+    assert summarized == {"a;b": 0.4, "a;c": 0.3, "unclassified": approx(0.3, rel=1e-2)}
+    assert all_queries == ["queryA"]
 
 
 def test_aggregate_by_lineage_at_rank_not_available():
@@ -2404,11 +3383,27 @@ def test_aggregate_by_lineage_at_rank_not_available():
     gB_tax = ("gB", "a;c")
     taxD = make_mini_taxonomy([gA_tax, gB_tax])
     # make gather results
-    gather_results = [{'query_name': 'queryA', 'name': 'gA', 'f_unique_weighted': 0.5,'f_unique_to_query': 0.4,'unique_intersect_bp': 50}, 
-                      {'query_name': 'queryA', "name": 'gB', 'f_unique_weighted': 0.3,'f_unique_to_query': 0.3,'unique_intersect_bp': 30}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True, summarize=True)
+    gather_results = [
+        {
+            "query_name": "queryA",
+            "name": "gA",
+            "f_unique_weighted": 0.5,
+            "f_unique_to_query": 0.4,
+            "unique_intersect_bp": 50,
+        },
+        {
+            "query_name": "queryA",
+            "name": "gB",
+            "f_unique_weighted": 0.3,
+            "f_unique_to_query": 0.3,
+            "unique_intersect_bp": 30,
+        },
+    ]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True, summarize=True
+    )
     with pytest.raises(ValueError) as exc:
-        aggregate_by_lineage_at_rank([q_res], rank='species', by_query=False)
+        aggregate_by_lineage_at_rank([q_res], rank="species", by_query=False)
     print(str(exc))
     assert "Error: rank 'species' not available for aggregation." in str(exc)
 
@@ -2420,49 +3415,85 @@ def test_aggregate_by_lineage_at_rank_by_query():
     gB_tax = ("gB", "a;c")
     taxD = make_mini_taxonomy([gA_tax, gB_tax])
     # make gather results
-    gather_results = [{'query_name': 'queryA', 'name': 'gA', 'f_unique_weighted': 0.2,'f_unique_to_query': 0.2,'unique_intersect_bp': 50}, 
-                      {'query_name': 'queryA', "name": 'gB', 'f_unique_weighted': 0.3,'f_unique_to_query': 0.3,'unique_intersect_bp': 30},
-                      {'query_name': 'queryB', "name": 'gB', 'f_unique_weighted': 0.4,'f_unique_to_query': 0.4,'unique_intersect_bp': 30}]
+    gather_results = [
+        {
+            "query_name": "queryA",
+            "name": "gA",
+            "f_unique_weighted": 0.2,
+            "f_unique_to_query": 0.2,
+            "unique_intersect_bp": 50,
+        },
+        {
+            "query_name": "queryA",
+            "name": "gB",
+            "f_unique_weighted": 0.3,
+            "f_unique_to_query": 0.3,
+            "unique_intersect_bp": 30,
+        },
+        {
+            "query_name": "queryB",
+            "name": "gB",
+            "f_unique_weighted": 0.4,
+            "f_unique_to_query": 0.4,
+            "unique_intersect_bp": 30,
+        },
+    ]
     gres = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, summarize=True)
     # check by query
-    summarized, all_queries = aggregate_by_lineage_at_rank(gres.values(), rank='superkingdom', by_query=True)
+    summarized, all_queries = aggregate_by_lineage_at_rank(
+        gres.values(), rank="superkingdom", by_query=True
+    )
     print(summarized)
-    assert summarized == {"a": {'queryA': 0.5, 'queryB': 0.4},
-                          "unclassified": {'queryA': 0.5, 'queryB': 0.6}}
-    #assert summarized == {'a': {'queryA': approx(0.1, rel=1e-2), 'queryB': 0.7}}
-    assert all_queries == ['queryA', 'queryB']
-    summarized, all_queries = aggregate_by_lineage_at_rank(gres.values(), rank='phylum', by_query=True)
+    assert summarized == {
+        "a": {"queryA": 0.5, "queryB": 0.4},
+        "unclassified": {"queryA": 0.5, "queryB": 0.6},
+    }
+    # assert summarized == {'a': {'queryA': approx(0.1, rel=1e-2), 'queryB': 0.7}}
+    assert all_queries == ["queryA", "queryB"]
+    summarized, all_queries = aggregate_by_lineage_at_rank(
+        gres.values(), rank="phylum", by_query=True
+    )
     print(summarized)
-    assert summarized == {'a;c': {'queryA': 0.3, 'queryB': 0.4}, 
-                          'a;b': {'queryA': 0.2}, 
-                          "unclassified": {'queryA': 0.5, 'queryB': 0.6}}
-    
+    assert summarized == {
+        "a;c": {"queryA": 0.3, "queryB": 0.4},
+        "a;b": {"queryA": 0.2},
+        "unclassified": {"queryA": 0.5, "queryB": 0.6},
+    }
+
 
 def test_build_classification_result_containment_threshold_fail():
     "classification result: improper containment threshold"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
     with pytest.raises(ValueError) as exc:
         q_res.build_classification_result(containment_threshold=1.2)
     print(str(exc))
-    assert "Containment threshold must be between 0 and 1 (input value: 1.2)." in str(exc)
+    assert "Containment threshold must be between 0 and 1 (input value: 1.2)." in str(
+        exc
+    )
     with pytest.raises(ValueError) as exc:
-        q_res.build_classification_result(containment_threshold=-.1)
+        q_res.build_classification_result(containment_threshold=-0.1)
     print(str(exc))
-    assert "Containment threshold must be between 0 and 1 (input value: -0.1)." in str(exc)
+    assert "Containment threshold must be between 0 and 1 (input value: -0.1)." in str(
+        exc
+    )
 
 
 def test_build_classification_result_containment_threshold():
     "basic functionality: build classification result using containment threshold"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
 
     q_res.build_classification_result(containment_threshold=0.1)
     print("classif: ", q_res.classification_result)
-    assert q_res.classification_result.status == 'match'
-    assert q_res.classification_result.rank == 'class'
+    assert q_res.classification_result.status == "match"
+    assert q_res.classification_result.rank == "class"
     assert q_res.classification_result.fraction == 0.1
     assert q_res.classification_result.lineage == RankLineageInfo(lineage_str="a;b;c")
     assert q_res.classification_result.f_weighted_at_rank == 0.2
@@ -2471,8 +3502,8 @@ def test_build_classification_result_containment_threshold():
 
     q_res.build_classification_result(containment_threshold=0.2)
     print("classif: ", q_res.classification_result)
-    assert q_res.classification_result.status == 'match'
-    assert q_res.classification_result.rank == 'phylum'
+    assert q_res.classification_result.status == "match"
+    assert q_res.classification_result.rank == "phylum"
     assert q_res.classification_result.lineage == RankLineageInfo(lineage_str="a;b")
     assert q_res.classification_result.f_weighted_at_rank == 0.4
     assert q_res.classification_result.fraction == 0.2
@@ -2481,8 +3512,8 @@ def test_build_classification_result_containment_threshold():
 
     q_res.build_classification_result(containment_threshold=1.0)
     print("classif: ", q_res.classification_result)
-    assert q_res.classification_result.status == 'below_threshold'
-    assert q_res.classification_result.rank == 'superkingdom'
+    assert q_res.classification_result.status == "below_threshold"
+    assert q_res.classification_result.rank == "superkingdom"
     assert q_res.classification_result.fraction == 0.2
     assert q_res.classification_result.lineage == RankLineageInfo(lineage_str="a")
     assert q_res.classification_result.f_weighted_at_rank == 0.4
@@ -2493,23 +3524,25 @@ def test_build_classification_result_containment_threshold():
 def test_build_classification_result_ani_threshold():
     "basic functionality: build classification result"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
 
-    q_res.build_classification_result(ani_threshold=.92)
+    q_res.build_classification_result(ani_threshold=0.92)
     print("classif: ", q_res.classification_result)
-    assert q_res.classification_result.status == 'match'
-    assert q_res.classification_result.rank == 'class'
+    assert q_res.classification_result.status == "match"
+    assert q_res.classification_result.rank == "class"
     assert q_res.classification_result.fraction == 0.1
     assert q_res.classification_result.lineage == RankLineageInfo(lineage_str="a;b;c")
     assert q_res.classification_result.f_weighted_at_rank == 0.2
     assert q_res.classification_result.bp_match_at_rank == 20
     assert q_res.classification_result.query_ani_at_rank == approx(0.928, rel=1e-2)
 
-    q_res.build_classification_result(ani_threshold=0.94) # should classify at phylum
+    q_res.build_classification_result(ani_threshold=0.94)  # should classify at phylum
     print("classif: ", q_res.classification_result)
-    assert q_res.classification_result.status == 'match'
-    assert q_res.classification_result.rank == 'phylum'
+    assert q_res.classification_result.status == "match"
+    assert q_res.classification_result.rank == "phylum"
     assert q_res.classification_result.fraction == 0.2
     assert q_res.classification_result.lineage == RankLineageInfo(lineage_str="a;b")
     assert q_res.classification_result.f_weighted_at_rank == 0.4
@@ -2519,8 +3552,8 @@ def test_build_classification_result_ani_threshold():
     # superk result, but doesn't meet ANI threshold
     q_res.build_classification_result(ani_threshold=0.96)
     print("classif: ", q_res.classification_result)
-    assert q_res.classification_result.status == 'below_threshold'
-    assert q_res.classification_result.rank == 'superkingdom'
+    assert q_res.classification_result.status == "below_threshold"
+    assert q_res.classification_result.rank == "superkingdom"
     assert q_res.classification_result.fraction == 0.2
     assert q_res.classification_result.lineage == RankLineageInfo(lineage_str="a")
     assert q_res.classification_result.f_weighted_at_rank == 0.4
@@ -2531,14 +3564,16 @@ def test_build_classification_result_ani_threshold():
 def test_build_classification_result_ani_threshold_fail():
     "classification result: improper ANI threshold"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
     with pytest.raises(ValueError) as exc:
         q_res.build_classification_result(ani_threshold=1.2)
     print(str(exc))
     assert "ANI threshold must be between 0 and 1 (input value: 1.2)." in str(exc)
     with pytest.raises(ValueError) as exc:
-        q_res.build_classification_result(ani_threshold=-.1)
+        q_res.build_classification_result(ani_threshold=-0.1)
     print(str(exc))
     assert "ANI threshold must be between 0 and 1 (input value: -0.1)." in str(exc)
 
@@ -2546,22 +3581,28 @@ def test_build_classification_result_ani_threshold_fail():
 def test_build_classification_result_rank_fail_not_filled():
     "classification result: rank not available (wasn't filled in tax lineage matches)"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
     with pytest.raises(ValueError) as exc:
-        q_res.build_classification_result(rank='order')
+        q_res.build_classification_result(rank="order")
     print(str(exc))
-    assert "Error: rank 'order' was not available for any matching lineages." in str(exc)
+    assert "Error: rank 'order' was not available for any matching lineages." in str(
+        exc
+    )
 
 
 def test_build_classification_result_rank_fail_not_available_resummarize():
     "classification result: rank not available (wasn't summarized)"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
-    q_res.summarize_up_ranks('superkingdom')
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
+    q_res.summarize_up_ranks("superkingdom")
     with pytest.raises(ValueError) as exc:
-        q_res.build_classification_result(rank='order')
+        q_res.build_classification_result(rank="order")
     print(str(exc))
     assert "Error: rank 'order' not in summarized rank(s), superkingdom" in str(exc)
 
@@ -2569,33 +3610,40 @@ def test_build_classification_result_rank_fail_not_available_resummarize():
 def test_build_classification_result_rank_fail_not_available():
     "classification result: rank not available"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
     with pytest.raises(ValueError) as exc:
-        q_res.build_classification_result(rank='NotARank')
+        q_res.build_classification_result(rank="NotARank")
     print(str(exc))
-    assert "Error: rank 'NotARank' not in available ranks (strain, species, genus, family, order, class, phylum, superkingdom)" in str(exc)
+    assert (
+        "Error: rank 'NotARank' not in available ranks (strain, species, genus, family, order, class, phylum, superkingdom)"
+        in str(exc)
+    )
 
 
 def test_build_classification_result_rank_containment_threshold():
     "classification result - rank and containment threshold (default)"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
 
-    q_res.build_classification_result(rank='class')
+    q_res.build_classification_result(rank="class")
     print("classif: ", q_res.classification_result)
-    assert q_res.classification_result.status == 'match'
-    assert q_res.classification_result.rank == 'class'
+    assert q_res.classification_result.status == "match"
+    assert q_res.classification_result.rank == "class"
     assert q_res.classification_result.fraction == 0.1
     assert q_res.classification_result.lineage == RankLineageInfo(lineage_str="a;b;c")
     assert q_res.classification_result.f_weighted_at_rank == 0.2
     assert q_res.classification_result.bp_match_at_rank == 20
     assert q_res.classification_result.query_ani_at_rank == approx(0.928, rel=1e-2)
 
-    q_res.build_classification_result(rank='class', containment_threshold=0.4)
-    assert q_res.classification_result.status == 'below_threshold'
-    assert q_res.classification_result.rank == 'class'
+    q_res.build_classification_result(rank="class", containment_threshold=0.4)
+    assert q_res.classification_result.status == "below_threshold"
+    assert q_res.classification_result.rank == "class"
     assert q_res.classification_result.fraction == 0.1
     assert q_res.classification_result.lineage == RankLineageInfo(lineage_str="a;b;c")
     assert q_res.classification_result.f_weighted_at_rank == 0.2
@@ -2606,21 +3654,23 @@ def test_build_classification_result_rank_containment_threshold():
 def test_build_classification_result_rank_ani_threshold():
     "classification result with rank and ANI threshold"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
-
-    q_res.build_classification_result(rank='class', ani_threshold=0.92)
-    assert q_res.classification_result.status == 'match'
-    assert q_res.classification_result.rank == 'class'
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
+
+    q_res.build_classification_result(rank="class", ani_threshold=0.92)
+    assert q_res.classification_result.status == "match"
+    assert q_res.classification_result.rank == "class"
     assert q_res.classification_result.fraction == 0.1
     assert q_res.classification_result.lineage == RankLineageInfo(lineage_str="a;b;c")
     assert q_res.classification_result.f_weighted_at_rank == 0.2
     assert q_res.classification_result.bp_match_at_rank == 20
     assert q_res.classification_result.query_ani_at_rank == approx(0.928, rel=1e-2)
 
-    q_res.build_classification_result(rank='class', ani_threshold=0.95)
-    assert q_res.classification_result.status == 'below_threshold'
-    assert q_res.classification_result.rank == 'class'
+    q_res.build_classification_result(rank="class", ani_threshold=0.95)
+    assert q_res.classification_result.status == "below_threshold"
+    assert q_res.classification_result.rank == "class"
     assert q_res.classification_result.fraction == 0.1
     assert q_res.classification_result.lineage == RankLineageInfo(lineage_str="a;b;c")
     assert q_res.classification_result.f_weighted_at_rank == 0.2
@@ -2631,55 +3681,63 @@ def test_build_classification_result_rank_ani_threshold():
 def test_krona_classified():
     "basic functionality: build classification result using containment threshold"
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
     q_res.build_classification_result()
-    assert q_res.krona_classified == None
-    q_res.build_classification_result(rank='phylum')#, force_resummarize=True)
+    assert q_res.krona_classified is None
+    q_res.build_classification_result(rank="phylum")  # , force_resummarize=True)
     print(q_res.krona_classified)
-    assert q_res.krona_classified == (0.2, 'a', 'b')
-    assert q_res.krona_unclassified == (0.8, 'unclassified', 'unclassified')
-    q_res.build_classification_result(rank='superkingdom')
+    assert q_res.krona_classified == (0.2, "a", "b")
+    assert q_res.krona_unclassified == (0.8, "unclassified", "unclassified")
+    q_res.build_classification_result(rank="superkingdom")
     print(q_res.krona_classified)
-    assert q_res.krona_classified == (0.2, 'a')
-    assert q_res.krona_unclassified == (0.8, 'unclassified')
+    assert q_res.krona_classified == (0.2, "a")
+    assert q_res.krona_unclassified == (0.8, "unclassified")
     # make sure this goes back to None if we reclassify without rank
     q_res.build_classification_result()
-    assert q_res.krona_classified == None
-    assert q_res.krona_unclassified == None
+    assert q_res.krona_classified is None
+    assert q_res.krona_unclassified is None
     assert q_res.krona_header == []
 
 
 def test_make_krona_header_basic():
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
+    gather_results = [{}, {"name": "gB"}]
     phy_header = ["fraction", "superkingdom", "phylum"]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
-    q_res.build_classification_result(rank='phylum')
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
+    q_res.build_classification_result(rank="phylum")
     print(q_res.krona_classified)
     print(q_res.krona_header)
     assert q_res.krona_header == phy_header
-    hd = q_res.make_krona_header('phylum')
+    hd = q_res.make_krona_header("phylum")
     print("header: ", hd)
     assert hd == phy_header
 
 
 def test_make_krona_header_basic_1():
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
+    gather_results = [{}, {"name": "gB"}]
     class_header = ["fraction", "superkingdom", "phylum", "class"]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True)
-    q_res.build_classification_result(rank='class')
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True
+    )
+    q_res.build_classification_result(rank="class")
     assert q_res.krona_header == class_header
-    hd = q_res.make_krona_header(min_rank='class')
+    hd = q_res.make_krona_header(min_rank="class")
     print("header: ", hd)
     assert hd == class_header
 
 
 def test_make_krona_header_fail():
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True, summarize=True)
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True, summarize=True
+    )
     with pytest.raises(ValueError) as exc:
         q_res.make_krona_header("order")
     assert "Rank 'order' not present in summarized ranks." in str(exc.value)
@@ -2690,305 +3748,740 @@ def test_make_krona_header_fail():
 
 def test_make_human_summary():
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True, summarize=True)
-    hs = q_res.make_human_summary(display_rank = "superkingdom")
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True, summarize=True
+    )
+    hs = q_res.make_human_summary(display_rank="superkingdom")
     print(hs)
-    assert hs == [{'rank': 'superkingdom', 'fraction': '0.800', 'lineage': 'unclassified',
-                   'f_weighted_at_rank': '60.0%', 'bp_match_at_rank': "60", 'query_ani_at_rank': '-    ',
-                   'query_name': 'q1', 'query_md5': 'md5', 'query_filename': 'query_fn',
-                   'total_weighted_hashes': "0"},
-                  {'rank': 'superkingdom', 'fraction': '0.200', 'lineage': "a",
-                  'f_weighted_at_rank': '40.0%', 'bp_match_at_rank': "40", 'query_ani_at_rank': '94.9%',
-                  'query_name': 'q1', 'query_md5': 'md5', 'query_filename': 'query_fn', 'total_weighted_hashes': "0"}]
+    assert hs == [
+        {
+            "rank": "superkingdom",
+            "fraction": "0.800",
+            "lineage": "unclassified",
+            "f_weighted_at_rank": "60.0%",
+            "bp_match_at_rank": "60",
+            "query_ani_at_rank": "-    ",
+            "query_name": "q1",
+            "query_md5": "md5",
+            "query_filename": "query_fn",
+            "total_weighted_hashes": "0",
+        },
+        {
+            "rank": "superkingdom",
+            "fraction": "0.200",
+            "lineage": "a",
+            "f_weighted_at_rank": "40.0%",
+            "bp_match_at_rank": "40",
+            "query_ani_at_rank": "94.9%",
+            "query_name": "q1",
+            "query_md5": "md5",
+            "query_filename": "query_fn",
+            "total_weighted_hashes": "0",
+        },
+    ]
 
 
 def test_make_human_summary_2():
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True, summarize=True)
-    hs = q_res.make_human_summary(display_rank = "phylum")
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True, summarize=True
+    )
+    hs = q_res.make_human_summary(display_rank="phylum")
     print(hs)
-    assert hs == [{'rank': 'phylum', 'fraction': '0.800', 'lineage': 'unclassified',
-                   'f_weighted_at_rank': '60.0%', 'bp_match_at_rank': "60", 'query_ani_at_rank': '-    ',
-                   'query_name': 'q1', 'query_md5': 'md5', 'query_filename': 'query_fn',
-                   'total_weighted_hashes': "0"},
-                  {'rank': 'phylum', 'fraction': '0.200', 'lineage': 'a;b',
-                  'f_weighted_at_rank': '40.0%', 'bp_match_at_rank': "40", 'query_ani_at_rank': '94.9%',
-                  'query_name': 'q1', 'query_md5': 'md5', 'query_filename': 'query_fn', 'total_weighted_hashes': "0"}]
+    assert hs == [
+        {
+            "rank": "phylum",
+            "fraction": "0.800",
+            "lineage": "unclassified",
+            "f_weighted_at_rank": "60.0%",
+            "bp_match_at_rank": "60",
+            "query_ani_at_rank": "-    ",
+            "query_name": "q1",
+            "query_md5": "md5",
+            "query_filename": "query_fn",
+            "total_weighted_hashes": "0",
+        },
+        {
+            "rank": "phylum",
+            "fraction": "0.200",
+            "lineage": "a;b",
+            "f_weighted_at_rank": "40.0%",
+            "bp_match_at_rank": "40",
+            "query_ani_at_rank": "94.9%",
+            "query_name": "q1",
+            "query_md5": "md5",
+            "query_filename": "query_fn",
+            "total_weighted_hashes": "0",
+        },
+    ]
 
 
 def test_make_human_summary_classification():
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True, classify=True, classify_rank="superkingdom")
-    hs = q_res.make_human_summary(display_rank = "superkingdom", classification=True)
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results,
+        taxD=taxD,
+        single_query=True,
+        classify=True,
+        classify_rank="superkingdom",
+    )
+    hs = q_res.make_human_summary(display_rank="superkingdom", classification=True)
     print(hs)
-    assert hs == [{'rank': 'superkingdom', 'fraction': '0.200', 'lineage': 'a',
-                  'f_weighted_at_rank': '40.0%', 'bp_match_at_rank': "40",
-                  'query_ani_at_rank': '94.9%', 'status': 'match', 'query_name': 'q1',
-                  'query_md5': 'md5', 'query_filename': 'query_fn', 'total_weighted_hashes': "0"}]
+    assert hs == [
+        {
+            "rank": "superkingdom",
+            "fraction": "0.200",
+            "lineage": "a",
+            "f_weighted_at_rank": "40.0%",
+            "bp_match_at_rank": "40",
+            "query_ani_at_rank": "94.9%",
+            "status": "match",
+            "query_name": "q1",
+            "query_md5": "md5",
+            "query_filename": "query_fn",
+            "total_weighted_hashes": "0",
+        }
+    ]
 
 
 def test_make_human_summary_classification_2():
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True, classify=True, classify_rank="phylum")
-    hs = q_res.make_human_summary(display_rank = "phylum", classification=True)
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results,
+        taxD=taxD,
+        single_query=True,
+        classify=True,
+        classify_rank="phylum",
+    )
+    hs = q_res.make_human_summary(display_rank="phylum", classification=True)
     print(hs)
-    assert hs == [{'rank': 'phylum', 'fraction': '0.200', 'lineage': 'a;b',
-                   'f_weighted_at_rank': '40.0%', 'bp_match_at_rank': "40",
-                   'query_ani_at_rank': '94.9%', 'status': 'match',
-                   'query_name': 'q1', 'query_md5': 'md5',
-                   'query_filename': 'query_fn', 'total_weighted_hashes': "0"}]
+    assert hs == [
+        {
+            "rank": "phylum",
+            "fraction": "0.200",
+            "lineage": "a;b",
+            "f_weighted_at_rank": "40.0%",
+            "bp_match_at_rank": "40",
+            "query_ani_at_rank": "94.9%",
+            "status": "match",
+            "query_name": "q1",
+            "query_md5": "md5",
+            "query_filename": "query_fn",
+            "total_weighted_hashes": "0",
+        }
+    ]
 
 
 def test_make_full_summary():
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True, summarize=True)
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True, summarize=True
+    )
     header, fs = q_res.make_full_summary()
-    assert header == ['query_name', 'rank', 'fraction', 'lineage', 'query_md5', 'query_filename', 
-                   'f_weighted_at_rank', 'bp_match_at_rank', 'query_ani_at_rank', 'total_weighted_hashes']
+    assert header == [
+        "query_name",
+        "rank",
+        "fraction",
+        "lineage",
+        "query_md5",
+        "query_filename",
+        "f_weighted_at_rank",
+        "bp_match_at_rank",
+        "query_ani_at_rank",
+        "total_weighted_hashes",
+    ]
     print(fs)
-    assert fs == [{'rank': 'superkingdom', 'fraction': '0.2', 'lineage': 'a', 'f_weighted_at_rank': '0.4',
-                   'bp_match_at_rank': '40', 'query_ani_at_rank': approx(0.949,rel=1e-3), 'query_name': 'q1',
-                   'query_md5': 'md5', 'query_filename': 'query_fn', 'total_weighted_hashes': '0'},
-                   {'rank': 'superkingdom', 'fraction': '0.8', 'lineage': 'unclassified', 'f_weighted_at_rank':
-                   '0.6', 'bp_match_at_rank': '60', 'query_ani_at_rank': None,
-                   'query_name': 'q1', 'query_md5': 'md5', 'query_filename': 'query_fn',
-                   'total_weighted_hashes': '0'},
-                   {'rank': 'phylum', 'fraction': '0.2', 'lineage': 'a;b', 'f_weighted_at_rank': '0.4',
-                   'bp_match_at_rank': '40', 'query_ani_at_rank': approx(0.949,rel=1e-3), 'query_name': 'q1',
-                   'query_md5': 'md5', 'query_filename': 'query_fn', 'total_weighted_hashes': '0'},
-                   {'rank': 'phylum', 'fraction': '0.8', 'lineage': 'unclassified', 'f_weighted_at_rank': '0.6',
-                   'bp_match_at_rank': '60', 'query_ani_at_rank': None, 'query_name': 'q1', 'query_md5': 'md5',
-                   'query_filename': 'query_fn', 'total_weighted_hashes': '0'},
-                   {'rank': 'class', 'fraction': '0.1', 'lineage': 'a;b;c', 'f_weighted_at_rank': '0.2',
-                   'bp_match_at_rank': '20', 'query_ani_at_rank': approx(0.928, rel=1e-3),
-                   'query_name': 'q1', 'query_md5': 'md5', 'query_filename': 'query_fn', 'total_weighted_hashes': '0'},
-                   {'rank': 'class', 'fraction': '0.1', 'lineage': 'a;b;d','f_weighted_at_rank': '0.2',
-                   'bp_match_at_rank': '20', 'query_ani_at_rank': approx(0.928, rel=1e-3), 'query_name': 'q1',
-                   'query_md5': 'md5', 'query_filename': 'query_fn', 'total_weighted_hashes': '0'},
-                   {'rank': 'class', 'fraction': '0.8', 'lineage': 'unclassified', 'f_weighted_at_rank': '0.6',
-                   'bp_match_at_rank': '60', 'query_ani_at_rank': None, 'query_name': 'q1', 'query_md5': 'md5',
-                   'query_filename': 'query_fn', 'total_weighted_hashes': '0'}]
-    
+    assert fs == [
+        {
+            "rank": "superkingdom",
+            "fraction": "0.2",
+            "lineage": "a",
+            "f_weighted_at_rank": "0.4",
+            "bp_match_at_rank": "40",
+            "query_ani_at_rank": approx(0.949, rel=1e-3),
+            "query_name": "q1",
+            "query_md5": "md5",
+            "query_filename": "query_fn",
+            "total_weighted_hashes": "0",
+        },
+        {
+            "rank": "superkingdom",
+            "fraction": "0.8",
+            "lineage": "unclassified",
+            "f_weighted_at_rank": "0.6",
+            "bp_match_at_rank": "60",
+            "query_ani_at_rank": None,
+            "query_name": "q1",
+            "query_md5": "md5",
+            "query_filename": "query_fn",
+            "total_weighted_hashes": "0",
+        },
+        {
+            "rank": "phylum",
+            "fraction": "0.2",
+            "lineage": "a;b",
+            "f_weighted_at_rank": "0.4",
+            "bp_match_at_rank": "40",
+            "query_ani_at_rank": approx(0.949, rel=1e-3),
+            "query_name": "q1",
+            "query_md5": "md5",
+            "query_filename": "query_fn",
+            "total_weighted_hashes": "0",
+        },
+        {
+            "rank": "phylum",
+            "fraction": "0.8",
+            "lineage": "unclassified",
+            "f_weighted_at_rank": "0.6",
+            "bp_match_at_rank": "60",
+            "query_ani_at_rank": None,
+            "query_name": "q1",
+            "query_md5": "md5",
+            "query_filename": "query_fn",
+            "total_weighted_hashes": "0",
+        },
+        {
+            "rank": "class",
+            "fraction": "0.1",
+            "lineage": "a;b;c",
+            "f_weighted_at_rank": "0.2",
+            "bp_match_at_rank": "20",
+            "query_ani_at_rank": approx(0.928, rel=1e-3),
+            "query_name": "q1",
+            "query_md5": "md5",
+            "query_filename": "query_fn",
+            "total_weighted_hashes": "0",
+        },
+        {
+            "rank": "class",
+            "fraction": "0.1",
+            "lineage": "a;b;d",
+            "f_weighted_at_rank": "0.2",
+            "bp_match_at_rank": "20",
+            "query_ani_at_rank": approx(0.928, rel=1e-3),
+            "query_name": "q1",
+            "query_md5": "md5",
+            "query_filename": "query_fn",
+            "total_weighted_hashes": "0",
+        },
+        {
+            "rank": "class",
+            "fraction": "0.8",
+            "lineage": "unclassified",
+            "f_weighted_at_rank": "0.6",
+            "bp_match_at_rank": "60",
+            "query_ani_at_rank": None,
+            "query_name": "q1",
+            "query_md5": "md5",
+            "query_filename": "query_fn",
+            "total_weighted_hashes": "0",
+        },
+    ]
+
     header, fs = q_res.make_full_summary(limit_float=True)
-    assert header == ['query_name', 'rank', 'fraction', 'lineage', 'query_md5', 'query_filename', 
-                   'f_weighted_at_rank', 'bp_match_at_rank', 'query_ani_at_rank', 'total_weighted_hashes']
+    assert header == [
+        "query_name",
+        "rank",
+        "fraction",
+        "lineage",
+        "query_md5",
+        "query_filename",
+        "f_weighted_at_rank",
+        "bp_match_at_rank",
+        "query_ani_at_rank",
+        "total_weighted_hashes",
+    ]
     print(fs)
-    assert fs == [{'rank': 'superkingdom', 'fraction': '0.200', 'lineage': 'a', 'f_weighted_at_rank': '0.400',
-                   'bp_match_at_rank': '40', 'query_ani_at_rank': "0.949", 'query_name': 'q1',
-                   'query_md5': 'md5', 'query_filename': 'query_fn', 'total_weighted_hashes': '0'},
-                  {'rank': 'superkingdom', 'fraction': '0.800', 'lineage': 'unclassified', 'f_weighted_at_rank':
-                   '0.600', 'bp_match_at_rank': '60', 'query_ani_at_rank': None,
-                   'query_name': 'q1', 'query_md5': 'md5', 'query_filename': 'query_fn',
-                   'total_weighted_hashes': '0'},
-                   {'rank': 'phylum', 'fraction': '0.200', 'lineage': 'a;b', 'f_weighted_at_rank': '0.400',
-                   'bp_match_at_rank': '40', 'query_ani_at_rank': "0.949", 'query_name': 'q1',
-                   'query_md5': 'md5', 'query_filename': 'query_fn', 'total_weighted_hashes': '0'},
-                   {'rank': 'phylum', 'fraction': '0.800', 'lineage': 'unclassified', 'f_weighted_at_rank': '0.600',
-                   'bp_match_at_rank': '60', 'query_ani_at_rank': None, 'query_name': 'q1', 'query_md5': 'md5',
-                   'query_filename': 'query_fn', 'total_weighted_hashes': '0'},
-                   {'rank': 'class', 'fraction': '0.100', 'lineage': 'a;b;c', 'f_weighted_at_rank': '0.200',
-                   'bp_match_at_rank': '20', 'query_ani_at_rank': "0.928",
-                   'query_name': 'q1', 'query_md5': 'md5', 'query_filename': 'query_fn', 'total_weighted_hashes': '0'},
-                   {'rank': 'class', 'fraction': '0.100', 'lineage': 'a;b;d','f_weighted_at_rank': '0.200',
-                   'bp_match_at_rank': '20', 'query_ani_at_rank': "0.928", 'query_name': 'q1',
-                   'query_md5': 'md5', 'query_filename': 'query_fn', 'total_weighted_hashes': '0'},
-                   {'rank': 'class', 'fraction': '0.800', 'lineage': 'unclassified', 'f_weighted_at_rank': '0.600',
-                   'bp_match_at_rank': '60', 'query_ani_at_rank': None, 'query_name': 'q1', 'query_md5': 'md5',
-                   'query_filename': 'query_fn', 'total_weighted_hashes': '0'}]
+    assert fs == [
+        {
+            "rank": "superkingdom",
+            "fraction": "0.200",
+            "lineage": "a",
+            "f_weighted_at_rank": "0.400",
+            "bp_match_at_rank": "40",
+            "query_ani_at_rank": "0.949",
+            "query_name": "q1",
+            "query_md5": "md5",
+            "query_filename": "query_fn",
+            "total_weighted_hashes": "0",
+        },
+        {
+            "rank": "superkingdom",
+            "fraction": "0.800",
+            "lineage": "unclassified",
+            "f_weighted_at_rank": "0.600",
+            "bp_match_at_rank": "60",
+            "query_ani_at_rank": None,
+            "query_name": "q1",
+            "query_md5": "md5",
+            "query_filename": "query_fn",
+            "total_weighted_hashes": "0",
+        },
+        {
+            "rank": "phylum",
+            "fraction": "0.200",
+            "lineage": "a;b",
+            "f_weighted_at_rank": "0.400",
+            "bp_match_at_rank": "40",
+            "query_ani_at_rank": "0.949",
+            "query_name": "q1",
+            "query_md5": "md5",
+            "query_filename": "query_fn",
+            "total_weighted_hashes": "0",
+        },
+        {
+            "rank": "phylum",
+            "fraction": "0.800",
+            "lineage": "unclassified",
+            "f_weighted_at_rank": "0.600",
+            "bp_match_at_rank": "60",
+            "query_ani_at_rank": None,
+            "query_name": "q1",
+            "query_md5": "md5",
+            "query_filename": "query_fn",
+            "total_weighted_hashes": "0",
+        },
+        {
+            "rank": "class",
+            "fraction": "0.100",
+            "lineage": "a;b;c",
+            "f_weighted_at_rank": "0.200",
+            "bp_match_at_rank": "20",
+            "query_ani_at_rank": "0.928",
+            "query_name": "q1",
+            "query_md5": "md5",
+            "query_filename": "query_fn",
+            "total_weighted_hashes": "0",
+        },
+        {
+            "rank": "class",
+            "fraction": "0.100",
+            "lineage": "a;b;d",
+            "f_weighted_at_rank": "0.200",
+            "bp_match_at_rank": "20",
+            "query_ani_at_rank": "0.928",
+            "query_name": "q1",
+            "query_md5": "md5",
+            "query_filename": "query_fn",
+            "total_weighted_hashes": "0",
+        },
+        {
+            "rank": "class",
+            "fraction": "0.800",
+            "lineage": "unclassified",
+            "f_weighted_at_rank": "0.600",
+            "bp_match_at_rank": "60",
+            "query_ani_at_rank": None,
+            "query_name": "q1",
+            "query_md5": "md5",
+            "query_filename": "query_fn",
+            "total_weighted_hashes": "0",
+        },
+    ]
 
 
 def test_make_full_summary_summarization_fail():
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True, summarize=False)
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True, summarize=False
+    )
     with pytest.raises(ValueError) as exc:
         q_res.make_full_summary()
     print(str(exc))
-    assert 'not summarized yet' in str(exc)
+    assert "not summarized yet" in str(exc)
 
 
 def test_make_full_summary_classification():
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True, classify=True)
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True, classify=True
+    )
     header, fs = q_res.make_full_summary(classification=True)
-    assert header == ["query_name", "status", "rank", "fraction", "lineage",
-                     "query_md5", "query_filename", "f_weighted_at_rank",
-                     "bp_match_at_rank", "query_ani_at_rank"]
+    assert header == [
+        "query_name",
+        "status",
+        "rank",
+        "fraction",
+        "lineage",
+        "query_md5",
+        "query_filename",
+        "f_weighted_at_rank",
+        "bp_match_at_rank",
+        "query_ani_at_rank",
+    ]
     print(fs)
-    assert fs == [{'rank': 'class', 'fraction': '0.1', 'lineage': 'a;b;c', 'f_weighted_at_rank': '0.2', 
-                   'bp_match_at_rank': '20', 'query_ani_at_rank': approx(0.928, rel=1e-3),
-                   'status': 'match', 'query_name': 'q1', 'query_md5': 'md5', 'query_filename': 'query_fn'}]
+    assert fs == [
+        {
+            "rank": "class",
+            "fraction": "0.1",
+            "lineage": "a;b;c",
+            "f_weighted_at_rank": "0.2",
+            "bp_match_at_rank": "20",
+            "query_ani_at_rank": approx(0.928, rel=1e-3),
+            "status": "match",
+            "query_name": "q1",
+            "query_md5": "md5",
+            "query_filename": "query_fn",
+        }
+    ]
+
 
- 
 def test_make_full_summary_classification_limit_float():
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True, classify=True)
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True, classify=True
+    )
     header, fs = q_res.make_full_summary(classification=True, limit_float=True)
-    assert header == ["query_name", "status", "rank", "fraction", "lineage",
-                     "query_md5", "query_filename", "f_weighted_at_rank",
-                     "bp_match_at_rank", "query_ani_at_rank"]
+    assert header == [
+        "query_name",
+        "status",
+        "rank",
+        "fraction",
+        "lineage",
+        "query_md5",
+        "query_filename",
+        "f_weighted_at_rank",
+        "bp_match_at_rank",
+        "query_ani_at_rank",
+    ]
     print(fs)
-    assert fs == [{'rank': 'class', 'fraction': '0.100', 'lineage': 'a;b;c', 'f_weighted_at_rank': '0.200', 
-                   'bp_match_at_rank': '20', 'query_ani_at_rank': "0.928",
-                   'status': 'match', 'query_name': 'q1', 'query_md5': 'md5', 'query_filename': 'query_fn'}]
+    assert fs == [
+        {
+            "rank": "class",
+            "fraction": "0.100",
+            "lineage": "a;b;c",
+            "f_weighted_at_rank": "0.200",
+            "bp_match_at_rank": "20",
+            "query_ani_at_rank": "0.928",
+            "status": "match",
+            "query_name": "q1",
+            "query_md5": "md5",
+            "query_filename": "query_fn",
+        }
+    ]
 
 
 def test_make_full_summary_classification_fail():
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True, summarize=True)
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True, summarize=True
+    )
     with pytest.raises(ValueError) as exc:
         q_res.make_full_summary(classification=True)
     print(str(exc))
-    assert 'not classified yet' in str(exc)
+    assert "not classified yet" in str(exc)
 
 
 def test_make_kreport_results():
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;c;d;e;f;g")])
-    #need to go down to species to check that `num_bp_assigned` is happening correctly
-    gather_results = [{"total_weighted_hashes":100}, {"name": 'gB', "total_weighted_hashes":100}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True, summarize=True)
+    # need to go down to species to check that `num_bp_assigned` is happening correctly
+    gather_results = [
+        {"total_weighted_hashes": 100},
+        {"name": "gB", "total_weighted_hashes": 100},
+    ]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True, summarize=True
+    )
     header, krepD = q_res.make_kreport_results()
     print(krepD)
-    assert krepD == [{'num_bp_assigned': '0', 'percent_containment': '40.00', 'num_bp_contained': '40',
-                    'rank_code': 'D', 'sci_name': 'a', 'ncbi_taxid': None},
-                    {'num_bp_assigned': '60', 'percent_containment': '60.00', 'num_bp_contained': '60',
-                    'sci_name': 'unclassified', 'rank_code': 'U', 'ncbi_taxid': None},
-                    {'num_bp_assigned': '0', 'percent_containment': '40.00', 'num_bp_contained': '40',
-                    'rank_code': 'P', 'sci_name': 'b', 'ncbi_taxid': None},
-                    {'num_bp_assigned': '0', 'percent_containment': '40.00', 'num_bp_contained': '40',
-                    'rank_code': 'C', 'sci_name': 'c', 'ncbi_taxid': None},
-                    {'num_bp_assigned': '0', 'percent_containment': '20.00', 'num_bp_contained': '20',
-                    'rank_code': 'O', 'sci_name': 'd', 'ncbi_taxid': None},
-                    {'num_bp_assigned': '0', 'percent_containment': '20.00', 'num_bp_contained': '20',
-                    'rank_code': 'F', 'sci_name': 'e', 'ncbi_taxid': None},
-                    {'num_bp_assigned': '0', 'percent_containment': '20.00', 'num_bp_contained': '20',
-                    'rank_code': 'G', 'sci_name': 'f', 'ncbi_taxid': None},
-                    {'num_bp_assigned': '20', 'percent_containment': '20.00', 'num_bp_contained': '20',
-                    'rank_code': 'S', 'sci_name': 'g', 'ncbi_taxid': None}]
+    assert krepD == [
+        {
+            "num_bp_assigned": "0",
+            "percent_containment": "40.00",
+            "num_bp_contained": "40",
+            "rank_code": "D",
+            "sci_name": "a",
+            "ncbi_taxid": None,
+        },
+        {
+            "num_bp_assigned": "60",
+            "percent_containment": "60.00",
+            "num_bp_contained": "60",
+            "sci_name": "unclassified",
+            "rank_code": "U",
+            "ncbi_taxid": None,
+        },
+        {
+            "num_bp_assigned": "0",
+            "percent_containment": "40.00",
+            "num_bp_contained": "40",
+            "rank_code": "P",
+            "sci_name": "b",
+            "ncbi_taxid": None,
+        },
+        {
+            "num_bp_assigned": "0",
+            "percent_containment": "40.00",
+            "num_bp_contained": "40",
+            "rank_code": "C",
+            "sci_name": "c",
+            "ncbi_taxid": None,
+        },
+        {
+            "num_bp_assigned": "0",
+            "percent_containment": "20.00",
+            "num_bp_contained": "20",
+            "rank_code": "O",
+            "sci_name": "d",
+            "ncbi_taxid": None,
+        },
+        {
+            "num_bp_assigned": "0",
+            "percent_containment": "20.00",
+            "num_bp_contained": "20",
+            "rank_code": "F",
+            "sci_name": "e",
+            "ncbi_taxid": None,
+        },
+        {
+            "num_bp_assigned": "0",
+            "percent_containment": "20.00",
+            "num_bp_contained": "20",
+            "rank_code": "G",
+            "sci_name": "f",
+            "ncbi_taxid": None,
+        },
+        {
+            "num_bp_assigned": "20",
+            "percent_containment": "20.00",
+            "num_bp_contained": "20",
+            "rank_code": "S",
+            "sci_name": "g",
+            "ncbi_taxid": None,
+        },
+    ]
 
 
 def test_make_kreport_results_with_taxids():
-    taxD = make_mini_taxonomy_with_taxids([("gA", "a;b;c", "1;2;3"), ("gB", "a;b;c;d;e;f;g", "1;2;3;4;5;6;7")])
+    taxD = make_mini_taxonomy_with_taxids(
+        [("gA", "a;b;c", "1;2;3"), ("gB", "a;b;c;d;e;f;g", "1;2;3;4;5;6;7")]
+    )
     print(taxD)
-    #need to go down to species to check that `num_bp_assigned` is happening correctly
-    gather_results = [{"total_weighted_hashes":100}, {"name": 'gB', "total_weighted_hashes":100}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True, summarize=True)
+    # need to go down to species to check that `num_bp_assigned` is happening correctly
+    gather_results = [
+        {"total_weighted_hashes": 100},
+        {"name": "gB", "total_weighted_hashes": 100},
+    ]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True, summarize=True
+    )
     header, krepD = q_res.make_kreport_results()
     print(krepD)
-    assert krepD == [{'num_bp_assigned': '0', 'percent_containment': '40.00', 'num_bp_contained': '40',
-                    'rank_code': 'D', 'sci_name': 'a', 'ncbi_taxid': '1'},
-                    {'num_bp_assigned': '60', 'percent_containment': '60.00', 'num_bp_contained': '60',
-                    'sci_name': 'unclassified', 'rank_code': 'U', 'ncbi_taxid': None},
-                    {'num_bp_assigned': '0', 'percent_containment': '40.00', 'num_bp_contained': '40',
-                    'rank_code': 'P', 'sci_name': 'b', 'ncbi_taxid': '2'},
-                    {'num_bp_assigned': '0', 'percent_containment': '40.00', 'num_bp_contained': '40',
-                    'rank_code': 'C', 'sci_name': 'c', 'ncbi_taxid': '3'},
-                    {'num_bp_assigned': '0', 'percent_containment': '20.00', 'num_bp_contained': '20',
-                    'rank_code': 'O', 'sci_name': 'd', 'ncbi_taxid': '4'},
-                    {'num_bp_assigned': '0', 'percent_containment': '20.00', 'num_bp_contained': '20',
-                    'rank_code': 'F', 'sci_name': 'e', 'ncbi_taxid': '5'},
-                    {'num_bp_assigned': '0', 'percent_containment': '20.00', 'num_bp_contained': '20',
-                    'rank_code': 'G', 'sci_name': 'f', 'ncbi_taxid': '6'},
-                    {'num_bp_assigned': '20', 'percent_containment': '20.00', 'num_bp_contained': '20',
-                    'rank_code': 'S', 'sci_name': 'g', 'ncbi_taxid': '7'}]
+    assert krepD == [
+        {
+            "num_bp_assigned": "0",
+            "percent_containment": "40.00",
+            "num_bp_contained": "40",
+            "rank_code": "D",
+            "sci_name": "a",
+            "ncbi_taxid": "1",
+        },
+        {
+            "num_bp_assigned": "60",
+            "percent_containment": "60.00",
+            "num_bp_contained": "60",
+            "sci_name": "unclassified",
+            "rank_code": "U",
+            "ncbi_taxid": None,
+        },
+        {
+            "num_bp_assigned": "0",
+            "percent_containment": "40.00",
+            "num_bp_contained": "40",
+            "rank_code": "P",
+            "sci_name": "b",
+            "ncbi_taxid": "2",
+        },
+        {
+            "num_bp_assigned": "0",
+            "percent_containment": "40.00",
+            "num_bp_contained": "40",
+            "rank_code": "C",
+            "sci_name": "c",
+            "ncbi_taxid": "3",
+        },
+        {
+            "num_bp_assigned": "0",
+            "percent_containment": "20.00",
+            "num_bp_contained": "20",
+            "rank_code": "O",
+            "sci_name": "d",
+            "ncbi_taxid": "4",
+        },
+        {
+            "num_bp_assigned": "0",
+            "percent_containment": "20.00",
+            "num_bp_contained": "20",
+            "rank_code": "F",
+            "sci_name": "e",
+            "ncbi_taxid": "5",
+        },
+        {
+            "num_bp_assigned": "0",
+            "percent_containment": "20.00",
+            "num_bp_contained": "20",
+            "rank_code": "G",
+            "sci_name": "f",
+            "ncbi_taxid": "6",
+        },
+        {
+            "num_bp_assigned": "20",
+            "percent_containment": "20.00",
+            "num_bp_contained": "20",
+            "rank_code": "S",
+            "sci_name": "g",
+            "ncbi_taxid": "7",
+        },
+    ]
 
 
 def test_make_kreport_results_fail():
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True, summarize=False)
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True, summarize=False
+    )
     with pytest.raises(ValueError) as exc:
         q_res.make_kreport_results()
     print(str(exc))
-    assert 'not summarized yet' in str(exc)
+    assert "not summarized yet" in str(exc)
 
 
 def test_make_kreport_results_fail_pre_v450():
     taxD = make_mini_taxonomy([("gA", "a;b;c"), ("gB", "a;b;d")])
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True, summarize=True)
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True, summarize=True
+    )
     with pytest.raises(ValueError) as exc:
         q_res.make_kreport_results()
     print(str(exc))
-    assert "cannot produce 'kreport' format from gather results before sourmash v4.5.0" in str(exc)
+    assert (
+        "cannot produce 'kreport' format from gather results before sourmash v4.5.0"
+        in str(exc)
+    )
 
 
 def test_make_cami_results_with_taxids():
-    taxD = make_mini_taxonomy_with_taxids([("gA", "a;b;c", "1;2;3"), ("gB", "a;b;c;d;e;f;g", "1;2;3;4;5;6;7")])
+    taxD = make_mini_taxonomy_with_taxids(
+        [("gA", "a;b;c", "1;2;3"), ("gB", "a;b;c;d;e;f;g", "1;2;3;4;5;6;7")]
+    )
     print(taxD)
-    #need to go down to species to check that `num_bp_assigned` is happening correctly
-    gather_results = [{"total_weighted_hashes":100}, {"name": 'gB', "total_weighted_hashes":100}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True, summarize=True)
+    # need to go down to species to check that `num_bp_assigned` is happening correctly
+    gather_results = [
+        {"total_weighted_hashes": 100},
+        {"name": "gB", "total_weighted_hashes": 100},
+    ]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results, taxD=taxD, single_query=True, summarize=True
+    )
     header, camires = q_res.make_cami_bioboxes()
     print(camires)
-    assert camires == [['1', 'superkingdom', '1', 'a', '40.00'],
-                       ['2', 'phylum', '1|2', 'a|b', '40.00'],
-                       ['3', 'class', '1|2|3', 'a|b|c', '40.00'],
-                       ['4', 'order', '1|2|3|4', 'a|b|c|d', '20.00'],
-                       ['5', 'family', '1|2|3|4|5', 'a|b|c|d|e', '20.00'],
-                       ['6', 'genus', '1|2|3|4|5|6', 'a|b|c|d|e|f', '20.00'],
-                       ['7', 'species', '1|2|3|4|5|6|7', 'a|b|c|d|e|f|g', '20.00']]
+    assert camires == [
+        ["1", "superkingdom", "1", "a", "40.00"],
+        ["2", "phylum", "1|2", "a|b", "40.00"],
+        ["3", "class", "1|2|3", "a|b|c", "40.00"],
+        ["4", "order", "1|2|3|4", "a|b|c|d", "20.00"],
+        ["5", "family", "1|2|3|4|5", "a|b|c|d|e", "20.00"],
+        ["6", "genus", "1|2|3|4|5|6", "a|b|c|d|e|f", "20.00"],
+        ["7", "species", "1|2|3|4|5|6|7", "a|b|c|d|e|f|g", "20.00"],
+    ]
 
 
 def test_make_lingroup_results():
-    taxD = make_mini_taxonomy([("gA", "1;0;0"), ("gB", "1;0;1"), ("gC", "1;1;0")], LIN=True)
+    taxD = make_mini_taxonomy(
+        [("gA", "1;0;0"), ("gB", "1;0;1"), ("gC", "1;1;0")], LIN=True
+    )
     print(taxD)
-    lingroupD = {"1":"lg1", "1;0":'lg2', '1;1': "lg3"}
+    lingroupD = {"1": "lg1", "1;0": "lg2", "1;1": "lg3"}
     print(lingroupD)
-    gather_results = [{"total_weighted_hashes":100},
-                      {"name": 'gB', "total_weighted_hashes":100},
-                      {"name": 'gC', "total_weighted_hashes":100}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True, summarize=True, LIN=True)
+    gather_results = [
+        {"total_weighted_hashes": 100},
+        {"name": "gB", "total_weighted_hashes": 100},
+        {"name": "gC", "total_weighted_hashes": 100},
+    ]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results,
+        taxD=taxD,
+        single_query=True,
+        summarize=True,
+        LIN=True,
+    )
     print(q_res.summarized_lineage_results)
 
-    header, lgD = q_res.make_lingroup_results(LINgroupsD = lingroupD)
+    header, lgD = q_res.make_lingroup_results(LINgroupsD=lingroupD)
     print(header)
-    assert header == ['name', 'lin', 'percent_containment', 'num_bp_contained']
+    assert header == ["name", "lin", "percent_containment", "num_bp_contained"]
     # order may change, just check that each lg entry is present in list of results
-    lg1 = {'percent_containment': '60.00', 'num_bp_contained': '60',
-                    'lin': '1', 'name': 'lg1'}
-    lg2 = {'percent_containment': '40.00', 'num_bp_contained': '40',
-                    'lin': '1;0', 'name': 'lg2'}
-    lg3 = {'percent_containment': '20.00', 'num_bp_contained': '20',
-                    'lin': '1;1', 'name': 'lg3'}
+    lg1 = {
+        "percent_containment": "60.00",
+        "num_bp_contained": "60",
+        "lin": "1",
+        "name": "lg1",
+    }
+    lg2 = {
+        "percent_containment": "40.00",
+        "num_bp_contained": "40",
+        "lin": "1;0",
+        "name": "lg2",
+    }
+    lg3 = {
+        "percent_containment": "20.00",
+        "num_bp_contained": "20",
+        "lin": "1;1",
+        "name": "lg3",
+    }
     assert lg1 in lgD
     assert lg2 in lgD
     assert lg3 in lgD
 
 
 def test_make_lingroup_results_fail_pre_v450():
-    taxD = make_mini_taxonomy([("gA", "1;0;0"), ("gB", "1;0;1"), ("gC", "1;1;0")], LIN=True)
-    gather_results = [{}, {"name": 'gB'}]
-    q_res = make_QueryTaxResults(gather_info=gather_results, taxD=taxD, single_query=True, summarize=True, LIN=True)
-    lingroupD = {"1":"lg1", "1;0":'lg2', '1;1': "lg3"}
+    taxD = make_mini_taxonomy(
+        [("gA", "1;0;0"), ("gB", "1;0;1"), ("gC", "1;1;0")], LIN=True
+    )
+    gather_results = [{}, {"name": "gB"}]
+    q_res = make_QueryTaxResults(
+        gather_info=gather_results,
+        taxD=taxD,
+        single_query=True,
+        summarize=True,
+        LIN=True,
+    )
+    lingroupD = {"1": "lg1", "1;0": "lg2", "1;1": "lg3"}
     with pytest.raises(ValueError) as exc:
         q_res.make_lingroup_results(lingroupD)
     print(str(exc))
-    assert "cannot produce 'lingroup' format from gather results before sourmash v4.5.0" in str(exc)
+    assert (
+        "cannot produce 'lingroup' format from gather results before sourmash v4.5.0"
+        in str(exc)
+    )
 
 
 def test_read_lingroups(runtmp):
     lg_file = runtmp.output("test.lg.csv")
-    with open(lg_file, 'w') as out:
-        out.write('lin,name\n')
-        out.write('1,lg1\n')
-        out.write('1;0,lg2\n')
-        out.write('1;1,lg3\n')
+    with open(lg_file, "w") as out:
+        out.write("lin,name\n")
+        out.write("1,lg1\n")
+        out.write("1;0,lg2\n")
+        out.write("1;1,lg3\n")
     lgD = read_lingroups(lg_file)
 
-    assert lgD == {"1":"lg1", "1;0":'lg2', '1;1': "lg3"}
+    assert lgD == {"1": "lg1", "1;0": "lg2", "1;1": "lg3"}
+
 
 def test_read_lingroups_empty_file(runtmp):
     lg_file = runtmp.output("test.lg.csv")
-    with open(lg_file, 'w') as out:
+    with open(lg_file, "w") as out:
         out.write("")
     with pytest.raises(ValueError) as exc:
         read_lingroups(lg_file)
@@ -2998,8 +4491,8 @@ def test_read_lingroups_empty_file(runtmp):
 
 def test_read_lingroups_only_header(runtmp):
     lg_file = runtmp.output("test.lg.csv")
-    with open(lg_file, 'w') as out:
-        out.write('lin,name\n')
+    with open(lg_file, "w") as out:
+        out.write("lin,name\n")
     with pytest.raises(ValueError) as exc:
         read_lingroups(lg_file)
     print(str(exc))
@@ -3008,8 +4501,8 @@ def test_read_lingroups_only_header(runtmp):
 
 def test_read_lingroups_bad_header(runtmp):
     lg_file = runtmp.output("test.lg.csv")
-    with open(lg_file, 'w') as out:
-        out.write('LINgroup_pfx,LINgroup_nm\n')
+    with open(lg_file, "w") as out:
+        out.write("LINgroup_pfx,LINgroup_nm\n")
     with pytest.raises(ValueError) as exc:
         read_lingroups(lg_file)
     print(str(exc))
@@ -3021,8 +4514,10 @@ def test_LineageTree_init():
     lin1 = RankLineageInfo(lineage_str=x)
     print(lin1)
     tree = LineageTree([lin1])
-    assert tree.tree == { LineagePair('superkingdom', 'a'):
-                         { LineagePair('phylum', 'b') : {}} }
+    assert tree.tree == {
+        LineagePair("superkingdom", "a"): {LineagePair("phylum", "b"): {}}
+    }
+
 
 def test_LineageTree_init_mult():
     x = "a;b"
@@ -3031,10 +4526,14 @@ def test_LineageTree_init_mult():
     lin2 = RankLineageInfo(lineage_str=y)
     print(lin1)
     from sourmash.tax.tax_utils import LineageTree
+
     tree = LineageTree([lin1, lin2])
-    assert tree.tree == {LineagePair(rank='superkingdom', name='a', taxid=None): 
-                          {LineagePair(rank='phylum', name='b', taxid=None): {},
-                           LineagePair(rank='phylum', name='c', taxid=None): {}}}
+    assert tree.tree == {
+        LineagePair(rank="superkingdom", name="a", taxid=None): {
+            LineagePair(rank="phylum", name="b", taxid=None): {},
+            LineagePair(rank="phylum", name="c", taxid=None): {},
+        }
+    }
 
 
 def test_LineageTree_init_and_add_lineage():
@@ -3044,13 +4543,18 @@ def test_LineageTree_init_and_add_lineage():
     lin2 = RankLineageInfo(lineage_str=y)
     print(lin1)
     from sourmash.tax.tax_utils import LineageTree
+
     tree = LineageTree([lin1])
-    assert tree.tree == { LineagePair('superkingdom', 'a'):
-                         { LineagePair('phylum', 'b') : {}} }
+    assert tree.tree == {
+        LineagePair("superkingdom", "a"): {LineagePair("phylum", "b"): {}}
+    }
     tree.add_lineage(lin2)
-    assert tree.tree == {LineagePair(rank='superkingdom', name='a', taxid=None): 
-                          {LineagePair(rank='phylum', name='b', taxid=None): {},
-                           LineagePair(rank='phylum', name='c', taxid=None): {}}}
+    assert tree.tree == {
+        LineagePair(rank="superkingdom", name="a", taxid=None): {
+            LineagePair(rank="phylum", name="b", taxid=None): {},
+            LineagePair(rank="phylum", name="c", taxid=None): {},
+        }
+    }
 
 
 def test_LineageTree_init_and_add_lineages():
@@ -3060,13 +4564,18 @@ def test_LineageTree_init_and_add_lineages():
     lin2 = RankLineageInfo(lineage_str=y)
     print(lin1)
     from sourmash.tax.tax_utils import LineageTree
+
     tree = LineageTree([lin1])
-    assert tree.tree == { LineagePair('superkingdom', 'a'):
-                         { LineagePair('phylum', 'b') : {}} }
+    assert tree.tree == {
+        LineagePair("superkingdom", "a"): {LineagePair("phylum", "b"): {}}
+    }
     tree.add_lineages([lin2])
-    assert tree.tree == {LineagePair(rank='superkingdom', name='a', taxid=None): 
-                          {LineagePair(rank='phylum', name='b', taxid=None): {},
-                           LineagePair(rank='phylum', name='c', taxid=None): {}}}
+    assert tree.tree == {
+        LineagePair(rank="superkingdom", name="a", taxid=None): {
+            LineagePair(rank="phylum", name="b", taxid=None): {},
+            LineagePair(rank="phylum", name="c", taxid=None): {},
+        }
+    }
 
 
 def test_build_tree_RankLineageInfo():
@@ -3074,8 +4583,9 @@ def test_build_tree_RankLineageInfo():
     lin1 = RankLineageInfo(lineage_str=x)
     print(lin1)
     tree = LineageTree([lin1])
-    assert tree.tree == { LineagePair('superkingdom', 'a'):
-                         { LineagePair('phylum', 'b') : {}} }
+    assert tree.tree == {
+        LineagePair("superkingdom", "a"): {LineagePair("phylum", "b"): {}}
+    }
 
 
 def test_build_tree_LINLineageInfo():
@@ -3083,8 +4593,7 @@ def test_build_tree_LINLineageInfo():
     lin1 = LINLineageInfo(lineage_str=x)
     print(lin1)
     tree = LineageTree([lin1])
-    assert tree.tree == { LineagePair('0', '0'):
-                         { LineagePair('1', '3') : {}} }
+    assert tree.tree == {LineagePair("0", "0"): {LineagePair("1", "3"): {}}}
 
 
 def test_build_tree_2():
@@ -3094,68 +4603,96 @@ def test_build_tree_2():
     lin2 = RankLineageInfo(lineage_str=y)
     print(lin1)
     print(lin2)
-    tree = LineageTree([lin1,lin2])
+    tree = LineageTree([lin1, lin2])
 
-    assert tree.tree == { LineagePair('superkingdom', 'a'): { LineagePair('phylum', 'b') : {},
-                                           LineagePair('phylum', 'c') : {}} }
+    assert tree.tree == {
+        LineagePair("superkingdom", "a"): {
+            LineagePair("phylum", "b"): {},
+            LineagePair("phylum", "c"): {},
+        }
+    }
 
 
 def test_build_tree_2_LineagePairs():
     # build tree from LineagePairs
-    tree = LineageTree([[LineagePair('superkingdom', 'a'), LineagePair('phylum', 'b')],
-                       [LineagePair('superkingdom', 'a'), LineagePair('phylum', 'c')],
-                      ])
+    tree = LineageTree(
+        [
+            [LineagePair("superkingdom", "a"), LineagePair("phylum", "b")],
+            [LineagePair("superkingdom", "a"), LineagePair("phylum", "c")],
+        ]
+    )
 
-    assert tree.tree == { LineagePair('superkingdom', 'a'): { LineagePair('phylum', 'b') : {},
-                                           LineagePair('phylum', 'c') : {}} }
+    assert tree.tree == {
+        LineagePair("superkingdom", "a"): {
+            LineagePair("phylum", "b"): {},
+            LineagePair("phylum", "c"): {},
+        }
+    }
 
 
 def test_build_tree_3():
     # empty phylum name
-    x='a;'
+    x = "a;"
     lin1 = RankLineageInfo(lineage_str=x)
     tree = LineageTree([lin1])
-    assert tree.tree == { LineagePair('superkingdom', 'a'): {} }
+    assert tree.tree == {LineagePair("superkingdom", "a"): {}}
 
 
 def test_build_tree_3_LineagePairs():
     # empty phylum name: LineagePair input
-    lin1 = (LineagePair('superkingdom', "a", '3'),
-            LineagePair('phylum', '', ''),)
+    lin1 = (
+        LineagePair("superkingdom", "a", "3"),
+        LineagePair("phylum", "", ""),
+    )
     tree = LineageTree([lin1])
-    assert tree.tree == { LineagePair('superkingdom', 'a', '3'): {} }
+    assert tree.tree == {LineagePair("superkingdom", "a", "3"): {}}
 
 
 def test_build_tree_5():
     with pytest.raises(ValueError):
-        tree = LineageTree([])
+        LineageTree([])
 
 
 def test_build_tree_5b():
     with pytest.raises(ValueError):
-        tree = LineageTree("")
+        LineageTree("")
 
 
 def test_build_tree_iterable():
     with pytest.raises(ValueError) as exc:
-        tree = LineageTree(RankLineageInfo())
-    assert "Must pass in an iterable containing LineagePair or LineageInfo objects"  in str(exc)
+        LineageTree(RankLineageInfo())
+    assert (
+        "Must pass in an iterable containing LineagePair or LineageInfo objects"
+        in str(exc)
+    )
 
 
 def test_find_lca():
-    x='a;b'
+    x = "a;b"
     lin1 = RankLineageInfo(lineage_str=x)
     tree = LineageTree([lin1])
     lca = tree.find_lca()
 
-    assert lca == ((LineagePair('superkingdom', 'a'), LineagePair('phylum', 'b'),), 0)
+    assert lca == (
+        (
+            LineagePair("superkingdom", "a"),
+            LineagePair("phylum", "b"),
+        ),
+        0,
+    )
 
 
 def test_find_lca_LineagePairs():
-    tree = LineageTree([[LineagePair('rank1', 'name1'), LineagePair('rank2', 'name2')]])
+    tree = LineageTree([[LineagePair("rank1", "name1"), LineagePair("rank2", "name2")]])
     lca = tree.find_lca()
 
-    assert lca == ((LineagePair('rank1', 'name1'), LineagePair('rank2', 'name2'),), 0)
+    assert lca == (
+        (
+            LineagePair("rank1", "name1"),
+            LineagePair("rank2", "name2"),
+        ),
+        0,
+    )
 
 
 def test_find_lca_2():
@@ -3167,7 +4704,7 @@ def test_find_lca_2():
     tree = LineageTree([lin1, lin2])
     lca = tree.find_lca()
 
-    assert lca == ((LineagePair('superkingdom', 'a'),), 2)
+    assert lca == ((LineagePair("superkingdom", "a"),), 2)
 
 
 def test_find_lca_LIN():
@@ -3179,17 +4716,20 @@ def test_find_lca_LIN():
     tree = LineageTree([lin1, lin2])
     lca = tree.find_lca()
 
-    assert lca == ((LineagePair('0', '5'),), 2)
+    assert lca == ((LineagePair("0", "5"),), 2)
     print(lca)
 
 
 def test_find_lca_2_LineagePairs():
-    tree = LineageTree([[LineagePair('rank1', 'name1'), LineagePair('rank2', 'name2a')],
-                       [LineagePair('rank1', 'name1'), LineagePair('rank2', 'name2b')],
-                      ])
+    tree = LineageTree(
+        [
+            [LineagePair("rank1", "name1"), LineagePair("rank2", "name2a")],
+            [LineagePair("rank1", "name1"), LineagePair("rank2", "name2b")],
+        ]
+    )
     lca = tree.find_lca()
 
-    assert lca == ((LineagePair('rank1', 'name1'),), 2)
+    assert lca == ((LineagePair("rank1", "name1"),), 2)
 
 
 def test_find_lca_3():
@@ -3198,7 +4738,7 @@ def test_find_lca_3():
 
     tree = LineageTree([lin1, lin2])
     lca, reason = tree.find_lca()
-    assert lca == lin1.filled_lineage           # find most specific leaf node
+    assert lca == lin1.filled_lineage  # find most specific leaf node
     print(lca)
 
 
@@ -3214,12 +4754,17 @@ def test_build_tree_with_initial():
     lca = tree.find_lca()
 
     print(lca)
-    assert lca == ((LineagePair(rank='superkingdom', name='a', taxid=None),
-                    LineagePair(rank='phylum', name='b', taxid=None)), 2)
+    assert lca == (
+        (
+            LineagePair(rank="superkingdom", name="a", taxid=None),
+            LineagePair(rank="phylum", name="b", taxid=None),
+        ),
+        2,
+    )
     tree.add_lineages([lin3])
     lca2 = tree.find_lca()
     print(lca2)
-    assert lca2 == ((LineagePair('superkingdom', 'a'),), 2)
+    assert lca2 == ((LineagePair("superkingdom", "a"),), 2)
 
 
 def test_LineageTree_find_ordered_paths():
@@ -3234,14 +4779,22 @@ def test_LineageTree_find_ordered_paths():
     paths = tree.ordered_paths()
 
     print(paths)
-    assert paths == [(LineagePair(rank='superkingdom', name='a', taxid=None),
-                        LineagePair(rank='phylum', name='e', taxid=None)),
-                     (LineagePair(rank='superkingdom', name='a', taxid=None),
-                        LineagePair(rank='phylum', name='b', taxid=None),
-                        LineagePair(rank='class', name='c', taxid=None)),
-                     (LineagePair(rank='superkingdom', name='a', taxid=None),
-                        LineagePair(rank='phylum', name='b', taxid=None),
-                        LineagePair(rank='class', name='d', taxid=None))]
+    assert paths == [
+        (
+            LineagePair(rank="superkingdom", name="a", taxid=None),
+            LineagePair(rank="phylum", name="e", taxid=None),
+        ),
+        (
+            LineagePair(rank="superkingdom", name="a", taxid=None),
+            LineagePair(rank="phylum", name="b", taxid=None),
+            LineagePair(rank="class", name="c", taxid=None),
+        ),
+        (
+            LineagePair(rank="superkingdom", name="a", taxid=None),
+            LineagePair(rank="phylum", name="b", taxid=None),
+            LineagePair(rank="class", name="d", taxid=None),
+        ),
+    ]
 
 
 def test_LineageTree_find_ordered_paths_include_internal():
@@ -3257,14 +4810,24 @@ def test_LineageTree_find_ordered_paths_include_internal():
 
     print(paths)
 
-    assert paths == [(LineagePair(rank='superkingdom', name='a', taxid=None),),
-                     (LineagePair(rank='superkingdom', name='a', taxid=None),
-                        LineagePair(rank='phylum', name='e', taxid=None)),
-                     (LineagePair(rank='superkingdom', name='a', taxid=None),
-                        LineagePair(rank='phylum', name='b', taxid=None)),
-                     (LineagePair(rank='superkingdom', name='a', taxid=None),
-                        LineagePair(rank='phylum', name='b', taxid=None),
-                        LineagePair(rank='class', name='c', taxid=None)),
-                      (LineagePair(rank='superkingdom', name='a', taxid=None),
-                        LineagePair(rank='phylum', name='b', taxid=None),
-                        LineagePair(rank='class', name='d', taxid=None))]
+    assert paths == [
+        (LineagePair(rank="superkingdom", name="a", taxid=None),),
+        (
+            LineagePair(rank="superkingdom", name="a", taxid=None),
+            LineagePair(rank="phylum", name="e", taxid=None),
+        ),
+        (
+            LineagePair(rank="superkingdom", name="a", taxid=None),
+            LineagePair(rank="phylum", name="b", taxid=None),
+        ),
+        (
+            LineagePair(rank="superkingdom", name="a", taxid=None),
+            LineagePair(rank="phylum", name="b", taxid=None),
+            LineagePair(rank="class", name="c", taxid=None),
+        ),
+        (
+            LineagePair(rank="superkingdom", name="a", taxid=None),
+            LineagePair(rank="phylum", name="b", taxid=None),
+            LineagePair(rank="class", name="d", taxid=None),
+        ),
+    ]
diff --git a/tests/test_test_framework.py b/tests/test_test_framework.py
index abf7e2c93a..85bb3e1020 100644
--- a/tests/test_test_framework.py
+++ b/tests/test_test_framework.py
@@ -5,4 +5,4 @@
 
 def test_failed_sourmash_exception(runtmp):
     with pytest.raises(SourmashCommandFailed):
-        runtmp.sourmash('')
+        runtmp.sourmash("")
diff --git a/tox.ini b/tox.ini
index 0e5602628c..1806e48778 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,28 +1,34 @@
 [tox]
-env_list =
-  py311,
-  py312,
-  py310,
-  coverage,
-  docs,
-  package_description
-  fix_lint,
-  hypothesis,
-  khmer,
-  khmer_master
-min_version = 3.27
 isolated_build = true
 skip_missing_interpreters = true
+env_list =
+    py311,
+    py312,
+    py310,
+    coverage,
+    docs,
+    package_description
+    fix_lint,
+    hypothesis,
+    khmer,
+    khmer_master
+min_version = 3.27
 
 [testenv]
 description = run the tests with pytest under {basepython}
+deps =
+    pip>=19.3.1
+extras =
+    storage
+    test
+commands =
+    pytest \
+      --cov "{envsitepackagesdir}/sourmash" \
+      --cov-config "{toxinidir}/tox.ini" \
+      --cov-report= \
+      --junitxml {toxworkdir}/junit.{envname}.xml \
+      {posargs:doc tests}
 package = wheel
-wheel_build_env = .pkg
-set_env =
-    PIP_DISABLE_VERSION_CHECK = 1
-    COVERAGE_FILE = {env:COVERAGE_FILE:{toxworkdir}/.coverage.{envname}}
-    VIRTUALENV_NO_DOWNLOAD = 1
-    PIP_EXTRA_INDEX_URL = https://antocuni.github.io/pypy-wheels/manylinux2010
 pass_env =
     TOXENV
     CURL_CA_BUNDLE
@@ -38,140 +44,160 @@ pass_env =
     PYTHONTRACEMALLOC
     LIBCLANG_PATH
     BINDGEN_EXTRA_CLANG_ARGS
-deps =
-    pip >= 19.3.1
-extras =
-    test
-    storage
-commands = pytest \
-           --cov "{envsitepackagesdir}/sourmash" \
-           --cov-config "{toxinidir}/tox.ini" \
-           --cov-report= \
-           --junitxml {toxworkdir}/junit.{envname}.xml \
-           {posargs:doc tests}
+    NIX_LD
+set_env =
+    PIP_DISABLE_VERSION_CHECK = 1
+    COVERAGE_FILE = {env:COVERAGE_FILE:{toxworkdir}/.coverage.{envname}}
+    VIRTUALENV_NO_DOWNLOAD = 1
+    PIP_EXTRA_INDEX_URL = https://antocuni.github.io/pypy-wheels/manylinux2010
+wheel_build_env = .pkg
 
 [testenv:.pkg]
 pass_env =
-  LIBCLANG_PATH
-  BINDGEN_EXTRA_CLANG_ARGS
+    LIBCLANG_PATH
+    BINDGEN_EXTRA_CLANG_ARGS
 
 [testenv:pypy3]
 deps =
-  pip >= 19.3.1
-  psutil <= 5.6.7
+    pip>=19.3.1
+    psutil<=5.6.7
 
 [testenv:hypothesis]
-commands = pytest \
-           --cov "{envsitepackagesdir}/sourmash" \
-           --cov-config "{toxinidir}/tox.ini" \
-           --cov-report= \
-           --junitxml {toxworkdir}/junit.{envname}.xml \
-           --run-hypothesis \
-           --hypothesis-show-statistics \
-           --hypothesis-profile ci \
-           {posargs:.}
+commands =
+    pytest \
+      --cov "{envsitepackagesdir}/sourmash" \
+      --cov-config "{toxinidir}/tox.ini" \
+      --cov-report= \
+      --junitxml {toxworkdir}/junit.{envname}.xml \
+      --run-hypothesis \
+      --hypothesis-show-statistics \
+      --hypothesis-profile ci \
+      {posargs:.}
 
 [testenv:khmer]
 basepython = python3.10
 deps =
-  khmer
-commands = pytest \
-           --cov "{envsitepackagesdir}/sourmash" \
-           --cov-config "{toxinidir}/tox.ini" \
-           --cov-report= \
-           --junitxml {toxworkdir}/junit.{envname}.xml \
-           -k test_nodegraph \
-           {posargs:.}
+    khmer
+commands =
+    pytest \
+      --cov "{envsitepackagesdir}/sourmash" \
+      --cov-config "{toxinidir}/tox.ini" \
+      --cov-report= \
+      --junitxml {toxworkdir}/junit.{envname}.xml \
+      -k test_nodegraph \
+      {posargs:.}
 
 [testenv:khmer_master]
 basepython = python3.10
 deps =
-  -e git+https://github.com/dib-lab/khmer.git\#egg=khmer
-commands = pytest \
-           --cov "{envsitepackagesdir}/sourmash" \
-           --cov-config "{toxinidir}/tox.ini" \
-           --cov-report= \
-           --junitxml {toxworkdir}/junit.{envname}.xml \
-           -k test_nodegraph \
-           {posargs:.}
+    -e
+commands =
+    pytest \
+      --cov "{envsitepackagesdir}/sourmash" \
+      --cov-config "{toxinidir}/tox.ini" \
+      --cov-report= \
+      --junitxml {toxworkdir}/junit.{envname}.xml \
+      -k test_nodegraph \
+      {posargs:.}
 
 [testenv:asv]
 description = run asv for benchmarking (compare current commit with latest)
 deps =
-  asv==0.5.1
-  virtualenv
+    asv==0.5.1
+    virtualenv
 changedir = {toxinidir}
 commands =
-  asv machine --yes
-  asv continuous latest HEAD {posargs}
+    asv machine --yes
+    asv continuous latest HEAD {posargs}
 
 [testenv:docs]
 description = invoke sphinx-build to build the HTML docs
 basepython = python3.10
-extras = doc
+extras =
+    doc
+commands =
+    sphinx-build -d "{toxworkdir}/docs_doctree" doc "{toxworkdir}/docs_out" --color -bhtml {posargs}
+    python -c 'import pathlib; print("documentation available under file://\{0\}".format(pathlib.Path(r"{toxworkdir}") / "docs_out" / "index.html"))'
 allowlist_externals = pandoc
-pass_env = HOME
 change_dir = {toxinidir}
-#commands = sphinx-build -d "{toxworkdir}/docs_doctree" doc "{toxworkdir}/docs_out" --color -W -bhtml {posargs}
-commands = sphinx-build -d "{toxworkdir}/docs_doctree" doc "{toxworkdir}/docs_out" --color -bhtml {posargs}
-           python -c 'import pathlib; print("documentation available under file://\{0\}".format(pathlib.Path(r"{toxworkdir}") / "docs_out" / "index.html"))'
+pass_env = HOME
 
 [testenv:package_description]
 description = check that the long description is valid
 basepython = python3.10
-deps = twine >= 1.12.1
-       # TODO installing readme-renderer[md] should not be necessary
-       readme-renderer[md] >= 24.0
-       pip >= 19.1
 skip_install = true
-change_dir = {toxinidir}
+deps =
+    pip>=19.1
+    readme-renderer[md]>=24
+    twine>=1.12.1
 extras =
-commands = pip wheel -w {envtmpdir}/build --no-deps .
-           twine check {envtmpdir}/build/*
+commands =
+    pip wheel -w {envtmpdir}/build --no-deps .
+    twine check {envtmpdir}/build/*
+change_dir = {toxinidir}
 
 [testenv:mypy]
 description = run mypy checker
 basepython = python3.10
+deps =
+    mypy
+commands =
+    mypy src/sourmash
 pass_env = {[testenv]pass_env}
-          # without PROGRAMDATA cloning using git for Windows will fail with an `error setting certificate verify locations` error
-          PROGRAMDATA
-deps  = mypy
-commands = mypy src/sourmash
+    PROGRAMDATA
 
 [testenv:fix_lint]
 description = format the code base to adhere to our styles, and complain about what we cannot do automatically
 basepython = python3.10
+skip_install = true
+deps =
+    pre-commit>=2
+extras =
+    lint
+commands =
+    pre-commit run --all-files --show-diff-on-failure {posargs}
+    python -c 'import pathlib; print("hint: run \{\} install to add checks as pre-commit hook".format(pathlib.Path(r"{envdir}") / "bin" / "pre-commit"))'
 pass_env = {[testenv]pass_env}
-          # without PROGRAMDATA cloning using git for Windows will fail with an `error setting certificate verify locations` error
-          PROGRAMDATA
-          PRE_COMMIT_HOME
-extras = lint
-deps = pre-commit>=2
-skip_install = True
-commands = pre-commit run --all-files --show-diff-on-failure {posargs}
-           python -c 'import pathlib; print("hint: run \{\} install to add checks as pre-commit hook".format(pathlib.Path(r"{envdir}") / "bin" / "pre-commit"))'
+    PROGRAMDATA
+    PRE_COMMIT_HOME
 
 [testenv:coverage]
 description = [run locally after tests]: combine coverage data and create report;
-              generates a diff coverage against origin/latest (can be changed by setting DIFF_AGAINST env var)
-deps = {[testenv]deps}
-       coverage >= 5.0.1
-       diff_cover
-skip_install = True
+    generates a diff coverage against origin/latest (can be changed by setting DIFF_AGAINST env var)
+skip_install = true
+deps =
+    {[testenv]deps}
+    coverage>=5.0.1
+    diff_cover
+parallel_show_output = true
+commands =
+    coverage combine
+    coverage report -i -m
+    coverage xml -i -o {toxworkdir}/coverage.xml
+    coverage html -i -d {toxworkdir}/htmlcov
+    diff-cover --compare-branch {env:DIFF_AGAINST:origin/latest} {toxworkdir}/coverage.xml
+depends = py312, py311, py310, pypy3
 pass_env = {[testenv]pass_env}
-          DIFF_AGAINST
+    DIFF_AGAINST
 set_env = COVERAGE_FILE={toxworkdir}/.coverage
-commands = coverage combine
-           coverage report -i -m
-           coverage xml -i -o {toxworkdir}/coverage.xml
-           coverage html -i -d {toxworkdir}/htmlcov
-           diff-cover --compare-branch {env:DIFF_AGAINST:origin/latest} {toxworkdir}/coverage.xml
-depends = py312, py311, py310, pypy3
-parallel_show_output = True
 
 [testenv:X]
 description = print the positional arguments passed in with echo
-commands = echo {posargs}
+commands =
+    echo {posargs}
+
+[testenv:dev]
+description = dev environment with all deps at {envdir}
+usedevelop = true
+deps =
+    {[testenv]deps}
+extras =
+    doc
+    storage
+    test
+commands =
+    python -m pip list --format=columns
+    python -c "print(r'{envpython}')"
 
 [coverage:run]
 branch = true
@@ -190,20 +216,20 @@ exclude_lines =
 
 [coverage:paths]
 source = src/sourmash/
-         tests/
-         */.tox/*/lib/python*/site-packages/sourmash
-         */.tox/pypy*/site-packages/sourmash
-         */.tox\*\Lib\site-packages\sourmash
-         */src/sourmash
-         *\src\sourmash
-         */tests
-         *\tests
+    tests/
+    */.tox/*/lib/python*/site-packages/sourmash
+    */.tox/pypy*/site-packages/sourmash
+    */.tox\*\Lib\site-packages\sourmash
+    */src/sourmash
+    *\src\sourmash
+    */tests
+    *\tests
 
 [gh-actions]
 python =
-  3.10: py310, docs, package_description, coverage
-  3.11: py311, coverage
-  3.12: py312, coverage
+    3.10: py310, docs, package_description, coverage
+    3.11: py311, coverage
+    3.12: py312, coverage
 
 [flake8]
 max-complexity = 22
@@ -212,14 +238,3 @@ ignore = E203, W503, C901, E402, B011
 
 [pep8]
 max-line-length = 99
-
-[testenv:dev]
-description = dev environment with all deps at {envdir}
-extras =
-  test
-  storage
-  doc
-deps = {[testenv]deps}
-usedevelop = True
-commands = python -m pip list --format=columns
-           python -c "print(r'{envpython}')"
diff --git a/utils/cardinality_estimate_confidence.py b/utils/cardinality_estimate_confidence.py
index 1f8471fbeb..85c6e5cc75 100644
--- a/utils/cardinality_estimate_confidence.py
+++ b/utils/cardinality_estimate_confidence.py
@@ -13,7 +13,7 @@ def set_size_chernoff(set_size, scale, relative_error=0.05):
     @param relative_error: the desired relative error (defaults to 5%)
     @return: float (the upper bound probability)
     """
-    upper_bound = 1 - 2 * np.exp(- relative_error**2*set_size/(scale * 3))
+    upper_bound = 1 - 2 * np.exp(-(relative_error**2) * set_size / (scale * 3))
     return upper_bound
 
 
@@ -28,7 +28,9 @@ def get_set_size(scale, num_sketches):
     return int(np.floor(scale * num_sketches))
 
 
-def set_size_estimate_is_accurate(scale, num_sketches, relative_error=0.05, confidence=0.95):
+def set_size_estimate_is_accurate(
+    scale, num_sketches, relative_error=0.05, confidence=0.95
+):
     set_size = get_set_size(scale, num_sketches)
     probability = set_size_chernoff(set_size, scale, relative_error)
     if probability >= confidence:
@@ -38,48 +40,96 @@ def set_size_estimate_is_accurate(scale, num_sketches, relative_error=0.05, conf
 
 
 def test_set_size_chernoff():
-    eps = 10**(-6)
+    eps = 10 ** (-6)
     rel_error = 0.01
     set_size = 1000000
-    s = 1/0.1  # I'm used to using a scale value between 0 and 1
+    s = 1 / 0.1  # I'm used to using a scale value between 0 and 1
     value_from_mathematica = 0.928652
-    assert np.abs(set_size_chernoff(set_size, s, rel_error) - value_from_mathematica) < eps
+    assert (
+        np.abs(set_size_chernoff(set_size, s, rel_error) - value_from_mathematica) < eps
+    )
 
     rel_error = 0.05
     set_size = 10000
     s = 1
     value_from_mathematica = 0.999519
-    assert np.abs(set_size_chernoff(set_size, s, rel_error) - value_from_mathematica) < eps
+    assert (
+        np.abs(set_size_chernoff(set_size, s, rel_error) - value_from_mathematica) < eps
+    )
 
     rel_error = 0.001
     set_size = 10
-    s = 1/.01
+    s = 1 / 0.01
     value_from_mathematica = -1
-    assert np.abs(set_size_chernoff(set_size, s, rel_error) - value_from_mathematica) < eps
+    assert (
+        np.abs(set_size_chernoff(set_size, s, rel_error) - value_from_mathematica) < eps
+    )
 
 
 def test_set_size_estimate_is_accurate():
-    eps = 10 ** (-6)
+    10 ** (-6)
     rel_error = 0.05
     set_size = 1000000
     s = 1 / 0.1  # I'm used to using a scale value between 0 and 1
     num_sketches = set_size / s  # idealized case
     confidence = 0.95
-    assert set_size_estimate_is_accurate(scale=s, num_sketches=num_sketches, relative_error=rel_error, confidence=confidence) is True
+    assert (
+        set_size_estimate_is_accurate(
+            scale=s,
+            num_sketches=num_sketches,
+            relative_error=rel_error,
+            confidence=confidence,
+        )
+        is True
+    )
     confidence = set_size_chernoff(set_size=set_size, scale=s, relative_error=rel_error)
-    assert set_size_estimate_is_accurate(scale=s, num_sketches=num_sketches, relative_error=rel_error, confidence=confidence) is True
+    assert (
+        set_size_estimate_is_accurate(
+            scale=s,
+            num_sketches=num_sketches,
+            relative_error=rel_error,
+            confidence=confidence,
+        )
+        is True
+    )
     # Horrible values
-    assert set_size_estimate_is_accurate(scale=10000, num_sketches=num_sketches, relative_error=0, confidence=1) is False
+    assert (
+        set_size_estimate_is_accurate(
+            scale=10000, num_sketches=num_sketches, relative_error=0, confidence=1
+        )
+        is False
+    )
     # Less horrible, but still bad values
     confidence = set_size_chernoff(set_size=set_size, scale=s, relative_error=rel_error)
-    assert set_size_estimate_is_accurate(scale=s, num_sketches=num_sketches, relative_error=rel_error, confidence=confidence*2) is False
+    assert (
+        set_size_estimate_is_accurate(
+            scale=s,
+            num_sketches=num_sketches,
+            relative_error=rel_error,
+            confidence=confidence * 2,
+        )
+        is False
+    )
     # one where the confidence is negative
-    rel_error = .001
+    rel_error = 0.001
     set_size = 10
     s = 100
-    num_sketches = set_size/s
-    assert set_size_estimate_is_accurate(scale=s, num_sketches=num_sketches, relative_error=rel_error, confidence=confidence) is False
-    assert set_size_estimate_is_accurate(scale=s, num_sketches=0, relative_error=rel_error, confidence=confidence) is False
+    num_sketches = set_size / s
+    assert (
+        set_size_estimate_is_accurate(
+            scale=s,
+            num_sketches=num_sketches,
+            relative_error=rel_error,
+            confidence=confidence,
+        )
+        is False
+    )
+    assert (
+        set_size_estimate_is_accurate(
+            scale=s, num_sketches=0, relative_error=rel_error, confidence=confidence
+        )
+        is False
+    )
 
 
 def run_tests():
@@ -87,7 +137,7 @@ def run_tests():
     test_set_size_estimate_is_accurate()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     print("Running tests")
     run_tests()
     print("Tests completed successfully")
diff --git a/utils/check-tree.py b/utils/check-tree.py
index 12fc0190de..639e376e3b 100644
--- a/utils/check-tree.py
+++ b/utils/check-tree.py
@@ -7,12 +7,12 @@
 import sourmash
 from sourmash.sbtmh import search_minhashes
 
-THRESHOLD=0.08
+THRESHOLD = 0.08
 
 
 def main():
     p = argparse.ArgumentParser()
-    p.add_argument('sbt')
+    p.add_argument("sbt")
     args = p.parse_args()
 
     db = sourmash.sbtmh.load_sbt_index(args.sbt)
@@ -21,11 +21,11 @@ def main():
     for leaf in db.leaves():
         query = leaf.data
         matches = db.find(search_minhashes, query, threshold)
-        matches = list([ x.data for x in matches ])
+        matches = list([x.data for x in matches])
         if query not in matches:
             print(query)
             assert 0
-                                                 
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     main()
diff --git a/utils/compute-dna-mh-another-way.py b/utils/compute-dna-mh-another-way.py
index aad7198092..c197298d75 100755
--- a/utils/compute-dna-mh-another-way.py
+++ b/utils/compute-dna-mh-another-way.py
@@ -7,7 +7,9 @@
 The output of this is used in test_sourmash.py to verify our C++ code.
 """
 
-__complementTranslation = { "A": "T", "C": "G", "G": "C", "T": "A", "N": "N" }
+__complementTranslation = {"A": "T", "C": "G", "G": "C", "T": "A", "N": "N"}
+
+
 def complement(s):
     """
     Return complement of 's'.
@@ -26,21 +28,24 @@ def reverse(s):
 
 def kmers(seq, k):
     for start in range(len(seq) - k + 1):
-        yield seq[start:start + k]
+        yield seq[start : start + k]
+
 
 ###
 
 K = 21
 
-import sys, screed
+import sys
+import screed
 import mmh3
 import sourmash
-print('imported sourmash:', sourmash, file=sys.stderr)
+
+print("imported sourmash:", sourmash, file=sys.stderr)
 import sourmash.signature
 
 record = next(iter(screed.open(sys.argv[1])))
-print('loaded', record.name, file=sys.stderr)
-revcomp = reverse(complement((record.sequence)))
+print("loaded", record.name, file=sys.stderr)
+revcomp = reverse(complement(record.sequence))
 
 mh = sourmash.MinHash(ksize=K, n=500, is_protein=False)
 
@@ -69,5 +74,5 @@ def kmers(seq, k):
 
     mh.add_hash(hash)
 
-s = sourmash.signature.SourmashSignature('', mh, name=record.name)
+s = sourmash.signature.SourmashSignature("", mh, name=record.name)
 print(sourmash.signature.save_signatures([s]))
diff --git a/utils/compute-input-prot-another-way.py b/utils/compute-input-prot-another-way.py
index 5c1202eaee..7dec10d849 100755
--- a/utils/compute-input-prot-another-way.py
+++ b/utils/compute-input-prot-another-way.py
@@ -7,25 +7,77 @@
 The output of this is used in test_sourmash.py to verify our C++ code.
 """
 
-dna_to_aa={'TTT':'F','TTC':'F', 'TTA':'L','TTG':'L',
-                'TCT':'S','TCC':'S','TCA':'S','TCG':'S',
-                'TAT':'Y','TAC':'Y', 'TAA':'*','TAG':'*','TGA':'*',
-                'TGT':'C','TGC':'C', 'TGG':'W',
-                'CTT':'L','CTC':'L','CTA':'L','CTG':'L',
-                'CCT':'P','CCC':'P','CCA':'P','CCG':'P',
-                'CAT':'H','CAC':'H', 'CAA':'Q','CAG':'Q',
-                'CGT':'R','CGC':'R','CGA':'R','CGG':'R',
-                'ATT':'I','ATC':'I','ATA':'I', 'ATG':'M',
-                'ACT':'T','ACC':'T','ACA':'T','ACG':'T',
-                'AAT':'N','AAC':'N', 'AAA':'K','AAG':'K',
-                'AGT':'S','AGC':'S', 'AGA':'R','AGG':'R',
-                'GTT':'V','GTC':'V','GTA':'V','GTG':'V',
-                'GCT':'A','GCC':'A','GCA':'A','GCG':'A',
-                'GAT':'D','GAC':'D', 'GAA':'E','GAG':'E',
-                'GGT':'G','GGC':'G','GGA':'G','GGG':'G'}
-
-
-__complementTranslation = { "A": "T", "C": "G", "G": "C", "T": "A", "N": "N" }
+dna_to_aa = {
+    "TTT": "F",
+    "TTC": "F",
+    "TTA": "L",
+    "TTG": "L",
+    "TCT": "S",
+    "TCC": "S",
+    "TCA": "S",
+    "TCG": "S",
+    "TAT": "Y",
+    "TAC": "Y",
+    "TAA": "*",
+    "TAG": "*",
+    "TGA": "*",
+    "TGT": "C",
+    "TGC": "C",
+    "TGG": "W",
+    "CTT": "L",
+    "CTC": "L",
+    "CTA": "L",
+    "CTG": "L",
+    "CCT": "P",
+    "CCC": "P",
+    "CCA": "P",
+    "CCG": "P",
+    "CAT": "H",
+    "CAC": "H",
+    "CAA": "Q",
+    "CAG": "Q",
+    "CGT": "R",
+    "CGC": "R",
+    "CGA": "R",
+    "CGG": "R",
+    "ATT": "I",
+    "ATC": "I",
+    "ATA": "I",
+    "ATG": "M",
+    "ACT": "T",
+    "ACC": "T",
+    "ACA": "T",
+    "ACG": "T",
+    "AAT": "N",
+    "AAC": "N",
+    "AAA": "K",
+    "AAG": "K",
+    "AGT": "S",
+    "AGC": "S",
+    "AGA": "R",
+    "AGG": "R",
+    "GTT": "V",
+    "GTC": "V",
+    "GTA": "V",
+    "GTG": "V",
+    "GCT": "A",
+    "GCC": "A",
+    "GCA": "A",
+    "GCG": "A",
+    "GAT": "D",
+    "GAC": "D",
+    "GAA": "E",
+    "GAG": "E",
+    "GGT": "G",
+    "GGC": "G",
+    "GGA": "G",
+    "GGG": "G",
+}
+
+
+__complementTranslation = {"A": "T", "C": "G", "G": "C", "T": "A", "N": "N"}
+
+
 def complement(s):
     """
     Return complement of 's'.
@@ -44,7 +96,7 @@ def reverse(s):
 
 def peptides(seq, start):
     for i in range(start, len(seq), 3):
-        yield dna_to_aa.get(seq[i:i+3], "X")
+        yield dna_to_aa.get(seq[i : i + 3], "X")
 
 
 def translate(seq):
@@ -52,27 +104,31 @@ def translate(seq):
         pep = peptides(seq, i)
         yield "".join(pep)
 
-    revcomp = reverse(complement((seq)))
+    revcomp = reverse(complement(seq))
     for i in range(3):
         pep = peptides(revcomp, i)
         yield "".join(pep)
 
+
 def kmers(seq, k):
     for start in range(len(seq) - k + 1):
-        yield seq[start:start + k]
+        yield seq[start : start + k]
+
 
 ###
 
 K = 21
 
-import sys, screed
+import sys
+import screed
 import mmh3
 import sourmash
-print('imported sourmash:', sourmash, file=sys.stderr)
+
+print("imported sourmash:", sourmash, file=sys.stderr)
 import sourmash.signature
 
 record = next(iter(screed.open(sys.argv[1])))
-print('loaded', record.name, file=sys.stderr)
+print("loaded", record.name, file=sys.stderr)
 
 mh = sourmash.MinHash(ksize=K, n=500, is_protein=True)
 prot_ksize = int(K / 3)
@@ -86,5 +142,5 @@ def kmers(seq, k):
 
     mh.add_hash(hash)
 
-s = sourmash.signature.SourmashSignature('', mh, name=record.name)
+s = sourmash.signature.SourmashSignature("", mh, name=record.name)
 print(sourmash.signature.save_signatures([s]))
diff --git a/utils/compute-prot-mh-another-way.py b/utils/compute-prot-mh-another-way.py
index 6295204f3b..e859268d05 100755
--- a/utils/compute-prot-mh-another-way.py
+++ b/utils/compute-prot-mh-another-way.py
@@ -7,25 +7,77 @@
 The output of this is used in test_sourmash.py to verify our C++ code.
 """
 
-dna_to_aa={'TTT':'F','TTC':'F', 'TTA':'L','TTG':'L',
-                'TCT':'S','TCC':'S','TCA':'S','TCG':'S',
-                'TAT':'Y','TAC':'Y', 'TAA':'*','TAG':'*','TGA':'*',
-                'TGT':'C','TGC':'C', 'TGG':'W',
-                'CTT':'L','CTC':'L','CTA':'L','CTG':'L',
-                'CCT':'P','CCC':'P','CCA':'P','CCG':'P',
-                'CAT':'H','CAC':'H', 'CAA':'Q','CAG':'Q',
-                'CGT':'R','CGC':'R','CGA':'R','CGG':'R',
-                'ATT':'I','ATC':'I','ATA':'I', 'ATG':'M',
-                'ACT':'T','ACC':'T','ACA':'T','ACG':'T',
-                'AAT':'N','AAC':'N', 'AAA':'K','AAG':'K',
-                'AGT':'S','AGC':'S', 'AGA':'R','AGG':'R',
-                'GTT':'V','GTC':'V','GTA':'V','GTG':'V',
-                'GCT':'A','GCC':'A','GCA':'A','GCG':'A',
-                'GAT':'D','GAC':'D', 'GAA':'E','GAG':'E',
-                'GGT':'G','GGC':'G','GGA':'G','GGG':'G'}
-
-
-__complementTranslation = { "A": "T", "C": "G", "G": "C", "T": "A", "N": "N" }
+dna_to_aa = {
+    "TTT": "F",
+    "TTC": "F",
+    "TTA": "L",
+    "TTG": "L",
+    "TCT": "S",
+    "TCC": "S",
+    "TCA": "S",
+    "TCG": "S",
+    "TAT": "Y",
+    "TAC": "Y",
+    "TAA": "*",
+    "TAG": "*",
+    "TGA": "*",
+    "TGT": "C",
+    "TGC": "C",
+    "TGG": "W",
+    "CTT": "L",
+    "CTC": "L",
+    "CTA": "L",
+    "CTG": "L",
+    "CCT": "P",
+    "CCC": "P",
+    "CCA": "P",
+    "CCG": "P",
+    "CAT": "H",
+    "CAC": "H",
+    "CAA": "Q",
+    "CAG": "Q",
+    "CGT": "R",
+    "CGC": "R",
+    "CGA": "R",
+    "CGG": "R",
+    "ATT": "I",
+    "ATC": "I",
+    "ATA": "I",
+    "ATG": "M",
+    "ACT": "T",
+    "ACC": "T",
+    "ACA": "T",
+    "ACG": "T",
+    "AAT": "N",
+    "AAC": "N",
+    "AAA": "K",
+    "AAG": "K",
+    "AGT": "S",
+    "AGC": "S",
+    "AGA": "R",
+    "AGG": "R",
+    "GTT": "V",
+    "GTC": "V",
+    "GTA": "V",
+    "GTG": "V",
+    "GCT": "A",
+    "GCC": "A",
+    "GCA": "A",
+    "GCG": "A",
+    "GAT": "D",
+    "GAC": "D",
+    "GAA": "E",
+    "GAG": "E",
+    "GGT": "G",
+    "GGC": "G",
+    "GGA": "G",
+    "GGG": "G",
+}
+
+
+__complementTranslation = {"A": "T", "C": "G", "G": "C", "T": "A", "N": "N"}
+
+
 def complement(s):
     """
     Return complement of 's'.
@@ -44,7 +96,7 @@ def reverse(s):
 
 def peptides(seq, start):
     for i in range(start, len(seq), 3):
-        yield dna_to_aa.get(seq[i:i+3], "X")
+        yield dna_to_aa.get(seq[i : i + 3], "X")
 
 
 def translate(seq):
@@ -52,27 +104,31 @@ def translate(seq):
         pep = peptides(seq, i)
         yield "".join(pep)
 
-    revcomp = reverse(complement((seq)))
+    revcomp = reverse(complement(seq))
     for i in range(3):
         pep = peptides(revcomp, i)
         yield "".join(pep)
 
+
 def kmers(seq, k):
     for start in range(len(seq) - k + 1):
-        yield seq[start:start + k]
+        yield seq[start : start + k]
+
 
 ###
 
 K = 21
 
-import sys, screed
+import sys
+import screed
 import mmh3
 import sourmash
-print('imported sourmash:', sourmash, file=sys.stderr)
+
+print("imported sourmash:", sourmash, file=sys.stderr)
 import sourmash.signature
 
 record = next(iter(screed.open(sys.argv[1])))
-print('loaded', record.name, file=sys.stderr)
+print("loaded", record.name, file=sys.stderr)
 
 mh = sourmash.MinHash(ksize=K, n=500, is_protein=True)
 prot_ksize = int(K / 3)
@@ -87,5 +143,5 @@ def kmers(seq, k):
 
         mh.add_hash(hash)
 
-s = sourmash.signature.SourmashSignature('', mh, name=record.name)
+s = sourmash.signature.SourmashSignature("", mh, name=record.name)
 print(sourmash.signature.save_signatures([s]))