KwanLab · evanroyrees · May 6, 2022 · May 6, 2022 · May 6, 2022 · May 6, 2022
diff --git a/.github/workflows/pytest_codecov.yml b/.github/workflows/pytest_codecov.yml
@@ -33,18 +33,42 @@ jobs:
     name: pytest & codecov
     steps:
       - uses: actions/checkout@v2
-      - name: Setup conda
-        uses: s-weigand/setup-conda@v1
+      - name: Cache conda
+        uses: actions/cache@v2
+        with:
+          path: ~/conda_pkgs_dir
+          key: ${{ runner.os }}-conda-py${{ matrix.python-version }}-${{ hashFiles('tests/environment.yml') }}
+      - name: Cache test data
+        uses: actions/cache@v2
+        with:
+          path: tests/data/test_data.json
+          key: ${{ runner.os }}-test-data
+      - name: Setup mamba
+        uses: conda-incubator/setup-miniconda@v2
         with:
-          update-conda: true
           python-version: ${{ matrix.python-version }}
-          conda-channels: anaconda, conda-forge, bioconda
-      - run: conda --version
-      - run: |
-          conda env update -n $CONDA_DEFAULT_ENV --file=tests/environment.yml
+          mamba-version: "*"
+          channels: conda-forge,bioconda,defaults
+          channel-priority: true
+          activate-environment: autometa
+          environment-file: tests/environment.yml
+      - name: Conda config info
+        shell: bash -l {0}
+        run: |
+          conda info
           conda list
-          gdown https://drive.google.com/uc\?\id=1bSlPldaq3C6Cf9Y5Rm7iwtUDcjxAaeEk -O tests/data/test_data.json
-          python -m pytest --cov-report=xml --cov=autometa tests/
+          conda config --show-sources
+          conda config --show
+          printenv | sort
+      - name: Download test data
+        shell: bash -l {0}
+        run: gdown https://drive.google.com/uc\?\id=1bSlPldaq3C6Cf9Y5Rm7iwtUDcjxAaeEk -O tests/data/test_data.json
+      - name: Install Autometa
+        shell: bash -l {0}
+        run: python -m pip install . --ignore-installed --no-deps -vv
+      - name: Run pytest
+        shell: bash -l {0}
+        run: python -m pytest --cov-report=xml --cov=autometa tests/
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v2
         with:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/psf/black
-    rev: 21.7b0 # This should correspond to the black version listed in tests/environment.yml
+    rev: 22.3.0 # This should correspond to the black version listed in tests/environment.yml
     hooks:
       - id: black
   - repo: https://github.com/pre-commit/pre-commit-hooks

diff --git a/Dockerfile b/Dockerfile
@@ -46,6 +46,7 @@ RUN echo "Testing autometa import" \
 
 # Check entrypoints are available
 RUN echo "Checking autometa entrypoints" \
+    && autometa --version > /dev/null \
     && autometa-config -h > /dev/null \
     && autometa-update-databases -h > /dev/null \
     && autometa-length-filter -h > /dev/null \

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -2,3 +2,5 @@ include LICENSE
 include MANIFEST.in
 include README.md
 include setup.py
+recursive-include autometa *
+prune autometa/__pycache__
diff --git a/Makefile b/Makefile
@@ -31,7 +31,7 @@ clean:
 
 ## Apply black formatting
 black:
-	black --exclude autometa/validation autometa
+	black --exclude autometa/validation autometa bin tests autometa/validation/benchmark.py autometa/validation/datasets.py
 
 ## Set up python interpreter environment
 create_environment: autometa-env.yml

diff --git a/autometa/__init__.py b/autometa/__init__.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python
+
+import pkg_resources
+
+dist = pkg_resources.get_distribution("autometa")
+
+__version__ = dist.version
+console_scripts = dist.get_entry_map()["console_scripts"].keys()
diff --git a/autometa/__main__.py b/autometa/__main__.py
@@ -0,0 +1,68 @@
+import sys
+from autometa import __version__, console_scripts
+
+import argparse
+
+citation = """
+APA:
+
+Miller, I. J., Rees, E. R., Ross, J., Miller, I., Baxa, J., Lopera, J., Kerby, R. L.,
+Rey, F. E., & Kwan, J. C. (2019). Autometa: automated extraction of microbial genomes
+from individual shotgun metagenomes. Nucleic Acids Research, 47(10).
+https://doi.org/10.1093/nar/gkz148
+
+BibTeX:
+
+@article{
+    Miller_Autometa_automated_extraction_2019,
+    author = {Miller, Ian J. and Rees, Evan R. and Ross, Jennifer and Miller, Izaak and Baxa, Jared and Lopera, Juan and Kerby, Robert L. and Rey, Federico E. and Kwan, Jason C.},
+    doi = {10.1093/nar/gkz148},
+    journal = {Nucleic Acids Research},
+    number = {10},
+    title = {{Autometa: automated extraction of microbial genomes from individual shotgun metagenomes}},
+    url = {https://github.com/KwanLab/Autometa},
+    volume = {47},
+    year = {2019}
+}
+"""
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Describe Autometa citation & version."
+        "No arguments will list the available autometa commands, docs and code information"
+    )
+    parser.add_argument(
+        "-V", "--version", help="Print autometa version", action="store_true"
+    )
+    parser.add_argument(
+        "-C",
+        "--citation",
+        help="Print autometa citation (APA and BibTex)",
+        action="store_true",
+    )
+    args = parser.parse_args()
+
+    if args.version:
+        print(f"autometa: {__version__}")
+        sys.exit(0)
+
+    if args.citation:
+        print(citation)
+        sys.exit(0)
+
+    print("Autometa Commands")
+    commands_header = "\t├──> "
+    commands_body = "\n\t├──> ".join(list(console_scripts)[:-1])
+    commands_footer = f"└──> {list(console_scripts)[-1]}"
+
+    print(f"{commands_header}{commands_body}\n\t{commands_footer}")
+    print(
+        "\nRun 'autometa --version' or 'autometa --citation' for respective info"
+        "\nDocs: https://autometa.readthedocs.io"
+        "\nCode: https://github.com/KwanLab/Autometa"
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/autometa/common/kmers.py b/autometa/common/kmers.py
@@ -470,7 +470,7 @@ def embed(
         embed_dimensions` to embed k-mer frequencies (the default is 2).
 
         The output embedded kmers will follow columns of `x_1` to `x_{embed_dimensions}`
-        
+
         NOTE: The columns are 1-indexed, i.e. at x_1 *not* x_0
 
     pca_dimensions : int, optional
@@ -486,22 +486,22 @@ def embed(
 
     seed: int, optional
         Seed to use for `method`. Allows for reproducibility from random state.
-    
+
     n_jobs: int, optional
-        
+
         Used with `sksne`, `densmap` and `umap`, (the default is -1 which will attempt to use all available CPUs)
-        
+
         Note
         ----
 
         For n_jobs below -1, (CPUS + 1 + n_jobs) are used. For example with n_jobs=-2, all CPUs but one are used.
 
         * scikit-learn TSNE `n_jobs glossary <https://scikit-learn.org/stable/glossary.html#term-n_jobs>`_
-        * UMAP and DensMAP's 
-        `invocation <https://github.com/lmcinnes/umap/blob/2c5232f7b946efab30e279c0b095b37f5648ed8b/umap/umap_.py#L328-L341>`_ 
-        use this with 
+        * UMAP and DensMAP's
+        `invocation <https://github.com/lmcinnes/umap/blob/2c5232f7b946efab30e279c0b095b37f5648ed8b/umap/umap_.py#L328-L341>`_
+        use this with
         `pynndescent <https://github.com/lmcinnes/pynndescent/blob/cc6ed32e25f7afb14913bff04d3b01723b33e5b5/pynndescent/pynndescent_.py#L629-L632>`_
-        
+
 
     **method_kwargs : Dict[str, Any], optional
 
@@ -524,7 +524,7 @@ def embed(
 
         NOTE: Setting duplicate arguments will result in an error
 
-        Here we specify ``UMAP(densmap=True)`` using ``method='densmap'`` 
+        Here we specify ``UMAP(densmap=True)`` using ``method='densmap'``
         and also attempt to overwrite to ``UMAP(densmap=False)``
         with the method_kwargs, ``**{'densmap':False}``, resulting
         in a TypeError.
@@ -681,17 +681,24 @@ def do_trimap():
         # When method_kwargs = **{'output_dens': True}
         # X : tuple[np.ndarray, np.ndarray, np.ndarray]
         # X : tuple[embedding, original local radii, embedding local radii]
-        output_dens_ndarray_cols = [embed_cols, ["original_local_radius"], ["embedded_local_radius"]]
-        embedded_df = pd.concat([
+        output_dens_ndarray_cols = [
+            embed_cols,
+            ["original_local_radius"],
+            ["embedded_local_radius"],
+        ]
+        embedded_df = pd.concat(
+            [
                 pd.DataFrame(result, index=df.index, columns=cols)
-                for result,cols in zip(X, output_dens_ndarray_cols)
+                for result, cols in zip(X, output_dens_ndarray_cols)
             ],
-            axis=1
+            axis=1,
         )
     elif isinstance(X, np.ndarray):
         embedded_df = pd.DataFrame(X, index=df.index, columns=embed_cols)
     else:
-        logger.warning(f"Unrecognized {method} transform (method_kwargs={method_kwargs}) output type: {type(X)}")
+        logger.warning(
+            f"Unrecognized {method} transform (method_kwargs={method_kwargs}) output type: {type(X)}"
+        )
         embedded_df = pd.DataFrame(X, index=df.index, columns=embed_cols)
     if out:
         embedded_df.to_csv(out, sep="\t", index=True, header=True)

diff --git a/autometa/common/utilities.py b/autometa/common/utilities.py
@@ -440,8 +440,9 @@ def internet_is_connected(
     except socket.error:
         return False
 
+
 def ncbi_is_connected(
-    filepath: str = "rsync://ftp.ncbi.nlm.nih.gov/genbank/GB_Release_Number"
+    filepath: str = "rsync://ftp.ncbi.nlm.nih.gov/genbank/GB_Release_Number",
 ) -> bool:
     """Check if ncbi databases are reachable. This can be used instead of a check for internet connection.
 
@@ -462,6 +463,7 @@ def ncbi_is_connected(
     proc = subprocess.run(cmd)
     return proc.returncode == 0
 
+
 if __name__ == "__main__":
     print(
         "This file contains utilities for Autometa pipeline and should not be run directly!"

diff --git a/autometa/taxonomy/lca.py b/autometa/taxonomy/lca.py
@@ -237,10 +237,10 @@ def preprocess_minimums(self):
         ):
             for row in range(0, nrows):
                 # First we check that the exponent of the column does not exceed the rows
-                if 2 ** col > nrows:
+                if 2**col > nrows:
                     continue
                 # Next check whether element at pos is within rows
-                if row + (2 ** col) - 1 >= nrows:
+                if row + (2**col) - 1 >= nrows:
                     sparse_array[row, col] = False
                     continue
                 # We now have our range in terms of indices
@@ -325,7 +325,7 @@ def lca(self, node1, node2):
         # equipartition range b/w both nodes.
         cutoff_range = int(np.floor(np.log2(high - low + 1)))
         lower_index = self.sparse[low, cutoff_range]
-        upper_index = self.sparse[(high - (2 ** cutoff_range) + 1), cutoff_range]
+        upper_index = self.sparse[(high - (2**cutoff_range) + 1), cutoff_range]
         lower_index, upper_index = map(int, [lower_index, upper_index])
         lower_range = self.level[lower_index]
         upper_range = self.level[upper_index]

diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
@@ -72,7 +72,7 @@ def check_samplesheet(file_in, file_out):
         "cov_from_assembly",
     ]
     num_required_cols = len(req_header_cols)
-    with open(file_in, "r", encoding='utf-8-sig') as fh:
+    with open(file_in, "r", encoding="utf-8-sig") as fh:
         ## Check header
         header = fh.readline().strip()
         header_cols = [header_col.strip('"') for header_col in header.split(",")]

diff --git a/setup.py b/setup.py
@@ -3,8 +3,7 @@
 
 import os
 
-from setuptools import setup
-from setuptools import find_packages
+from setuptools import find_packages, setup
 
 
 def read(fname):
@@ -42,6 +41,7 @@ def read(fname):
             "autometa-unclustered-recruitment = autometa.binning.unclustered_recruitment:main",
             "autometa-download-dataset = autometa.validation.datasets:main",
             "autometa-benchmark = autometa.validation.benchmark:main",
+            "autometa = autometa.__main__:main",
         ]
     },
     author="Jason C. Kwan",

diff --git a/tests/environment.yml b/tests/environment.yml
@@ -7,7 +7,7 @@ dependencies:
   - attrs # test-data requirement
   - bedtools
   - biopython
-  - black==21.7b0
+  - black==22.3.0
   - bowtie2
   - diamond>=2.0
   - gdown

diff --git a/tests/unit_tests/test_kmers.py b/tests/unit_tests/test_kmers.py
@@ -155,7 +155,7 @@ def test_embed_methods(norm_df, method, tmp_path):
     method_kwargs = {}
     verbose = 1 if method == "sksne" else True
     method_kwargs.update({"verbose": verbose})
-    output_dens = {'output_dens':True} if method == "densmap" else {}
+    output_dens = {"output_dens": True} if method == "densmap" else {}
     method_kwargs.update(output_dens)
     df = kmers.embed(
         kmers=norm_df,