diff --git a/.github/workflows/test_and_deploy.yml b/.github/workflows/test_and_deploy.yml index 7a1516f..3a2cdfc 100644 --- a/.github/workflows/test_and_deploy.yml +++ b/.github/workflows/test_and_deploy.yml @@ -28,43 +28,43 @@ jobs: #python-version: ['3.8', '3.9', '3.10'] python-version: ['3.10'] + defaults: + run: + shell: bash -el {0} steps: - - uses: actions/checkout@v3 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - # these libraries enable testing on Qt on linux - - uses: tlambert03/setup-qt-libs@v1 - - # strategy borrowed from vispy for installing opengl libs on windows - - name: Install Windows OpenGL - if: runner.os == 'Windows' - run: | - git clone --depth 1 https://github.com/pyvista/gl-ci-helpers.git - powershell gl-ci-helpers/appveyor/install_opengl.ps1 - - # note: if you need dependencies from conda, considering using - # setup-miniconda: https://github.com/conda-incubator/setup-miniconda - # and - # tox-conda: https://github.com/tox-dev/tox-conda - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install setuptools tox tox-gh-actions - - # this runs the platform-specific tests declared in tox.ini - - name: Test with tox - uses: GabrielBB/xvfb-action@v1 - with: - run: python -m tox - env: - PLATFORM: ${{ matrix.platform }} - - - name: Coverage - uses: codecov/codecov-action@v2 + - uses: actions/checkout@v3 + - name: Set up conda ${{ matrix.python-version }} + uses: conda-incubator/setup-miniconda@v2 + with: + mamba-version: "*" + activate-environment: napari-tomotwin + channel-priority: true + python-version: ${{ matrix.python-version }} + channels: conda-forge, defaults + environment-file: conda_env.yml + - run: conda --version + - run: conda init bash + - run: | + which python + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools setuptools_scm pylint tox tox-gh-actions pytest pytest-coverage twine build + pip install . + - name: Debug Info + run: | + which python + pip freeze + - name: Analysing the code with pylint + run: | + pylint -E $(git ls-files '*.py') + - name: Tests + run: | + pytest -v --cov=./ --cov-report=xml --cov-config=.coveragerc + env: + PLATFORM: ${{ matrix.platform }} + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v3 deploy: # this will run when you have tagged a commit, starting with "v*" diff --git a/conda_env.yml b/conda_env.yml new file mode 100644 index 0000000..d45d21a --- /dev/null +++ b/conda_env.yml @@ -0,0 +1,14 @@ +name: napari-tomotwin +channels: + - conda-forge + - defaults +dependencies: + - python=3.10 + - napari=0.4.18 + - pandas + - numpy + - pyqt + - matplotlib + - pip: + - tqdm + - napari-clusters-plotter>=0.7.2 \ No newline at end of file diff --git a/notebooks/test.ipynb b/notebooks/test.ipynb new file mode 100644 index 0000000..f40019c --- /dev/null +++ b/notebooks/test.ipynb @@ -0,0 +1,43 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Kannst du das lesen?" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 1b6fc38..0000000 --- a/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -numpy -pandas -napari -napari-clusters-plotter -matplotlib -tqdm \ No newline at end of file diff --git a/src/napari_tomotwin/_tests/test_dummy.py b/src/napari_tomotwin/_tests/test_dummy.py deleted file mode 100644 index 32dda30..0000000 --- a/src/napari_tomotwin/_tests/test_dummy.py +++ /dev/null @@ -1,2 +0,0 @@ -def test_dummy(): - return True diff --git a/src/napari_tomotwin/_tests/test_make_targets.py b/src/napari_tomotwin/_tests/test_make_targets.py new file mode 100644 index 0000000..ac41cf4 --- /dev/null +++ b/src/napari_tomotwin/_tests/test_make_targets.py @@ -0,0 +1,129 @@ +import os +import tempfile +import unittest + +import numpy as np +import pandas as pd +from napari_tomotwin.make_targets import _run + +from glob import glob + +class MyTestCase(unittest.TestCase): + def test_make_targets_single_cluster_medoid(self): + fake_embedding = { + "X": [0, 1, 2], + "Y": [0, 1, 2], + "Z": [0, 1, 2], + "1": [5, 6, 7], + "2": [5, 6, 7], + "filepath": ["a.mrc","b.mrc","c.mrc"] + } + cluster = np.array([1,1,1]) + with tempfile.TemporaryDirectory() as tmpdirname: + _run(clusters=cluster, + embeddings=pd.DataFrame(fake_embedding), + average_method_name="Medoid", + output_folder=tmpdirname) + + box_data: pd.DataFrame = pd.read_csv( + os.path.join(tmpdirname,"cluster_1_medoid.coords"), + delim_whitespace=True, + index_col=False, + header=None, + dtype=float, + names=["X","Y","Z"] + ).astype(np.int32) # type: ignore + self.assertEqual(box_data.iloc[0, 0], 1) + self.assertEqual(box_data.iloc[0, 1], 1) + self.assertEqual(box_data.iloc[0, 2], 1) + + def test_make_targets_two_clusters_medoid(self): + range(6) + fake_embedding = { + "X": [0, 1, 2, 8, 9, 10], + "Y": [0, 1, 2, 8, 9, 10], + "Z": [0, 1, 2, 8, 9, 10], + "1": [5, 6, 7, 10, 11, 12], + "2": [5, 6, 7, 10, 11, 12], + } + fake_embedding['filepath'] = [f"{i}.mrc" for i in range(len(fake_embedding["X"]))] + cluster = np.array([1,1,1,2,2,2]) + with tempfile.TemporaryDirectory() as tmpdirname: + _run(clusters=cluster, + embeddings=pd.DataFrame(fake_embedding), + average_method_name="Medoid", + output_folder=tmpdirname) + + box_data: pd.DataFrame = pd.read_csv( + os.path.join(tmpdirname,"cluster_1_medoid.coords"), + delim_whitespace=True, + index_col=False, + header=None, + dtype=float, + names=["X","Y","Z"] + ).astype(np.int32) # type: ignore + self.assertEqual(box_data.iloc[0, 0], 1) + self.assertEqual(box_data.iloc[0, 1], 1) + self.assertEqual(box_data.iloc[0, 2], 1) + + box_data: pd.DataFrame = pd.read_csv( + os.path.join(tmpdirname, "cluster_2_medoid.coords"), + delim_whitespace=True, + index_col=False, + header=None, + dtype=float, + names=["X", "Y", "Z"] + ).astype(np.int32) # type: ignore + self.assertEqual(box_data.iloc[0, 0], 9) + self.assertEqual(box_data.iloc[0, 1], 9) + self.assertEqual(box_data.iloc[0, 2], 9) + + def test_make_targets_single_cluster_average(self): + fake_embedding = { + "X": [0, 1, 2], + "Y": [0, 1, 2], + "Z": [0, 1, 2], + "1": [5, 6, 7], + "2": [5, 6, 7], + "filepath": ["a.mrc","b.mrc","c.mrc"] + } + cluster = np.array([1,1,1]) + with tempfile.TemporaryDirectory() as tmpdirname: + _run(clusters=cluster, + embeddings=pd.DataFrame(fake_embedding), + average_method_name="Average", + output_folder=tmpdirname) + + targets_emb: pd.DataFrame = pd.read_pickle( + os.path.join(tmpdirname,"cluster_targets.temb"), + ) + self.assertEqual(targets_emb["1"].iloc[0],6) + self.assertEqual(targets_emb["2"].iloc[0],6) + + def test_make_targets_single_cluster_no_coords_written(self): + fake_embedding = { + "X": [0, 1, 2], + "Y": [0, 1, 2], + "Z": [0, 1, 2], + "1": [5, 6, 7], + "2": [5, 6, 7], + "filepath": ["a.mrc","b.mrc","c.mrc"] + } + cluster = np.array([1,1,1]) + with tempfile.TemporaryDirectory() as tmpdirname: + _run(clusters=cluster, + embeddings=pd.DataFrame(fake_embedding), + average_method_name="Average", + output_folder=tmpdirname) + + r = glob(os.path.join(tmpdirname,"*.coords")) + print(r) + self.assertEqual(len(r),0) + + + + + + +if __name__ == '__main__': + unittest.main() diff --git a/src/napari_tomotwin/make_targets.py b/src/napari_tomotwin/make_targets.py index 09590eb..560f96b 100644 --- a/src/napari_tomotwin/make_targets.py +++ b/src/napari_tomotwin/make_targets.py @@ -1,19 +1,47 @@ -from magicgui import magic_factory +import os import pathlib -import pandas as pd +from typing import List, Tuple, Literal, Callable + import numpy as np -import os -from typing import List, Tuple +import numpy.typing as npt +import pandas as pd +from magicgui import magic_factory +from scipy.spatial.distance import cdist -def _make_targets(embeddings: pd.DataFrame, clusters: pd.DataFrame) -> Tuple[pd.DataFrame, List[pd.DataFrame]]: +def _get_medoid_embedding(embeddings: pd.DataFrame, max_embeddings: int = 50000) -> Tuple[pd.DataFrame, npt.ArrayLike]: + """ + Calculates the medoid based of subset of the embeddings. + """ + if len(embeddings)>max_embeddings: + # For samples more than 50k it's way to slow and memory hungry. + embeddings = embeddings.sample(max_embeddings) + print(f"Your cluster size ({len(embeddings)}) is bigger then {max_embeddings}. Make a random sample to calculate medoid.") + only_emb = embeddings.drop(columns=["X", "Y", "Z", "filepath"], errors="ignore").astype(np.float32) + distance_matrix=cdist(only_emb,only_emb,metric='cosine') # its not the cosine similarity, rather a distance (its 0 in case of same embeddings) + medoid_index = np.argmin(np.sum(distance_matrix,axis=0)) + medoid = only_emb.iloc[medoid_index,:] + return medoid, embeddings.iloc[[medoid_index]][['X','Y','Z']] + +def _get_avg_embedding(embeddings: pd.DataFrame) -> Tuple[pd.DataFrame, npt.ArrayLike]: + only_emb = embeddings.drop(columns=["X", "Y", "Z", "filepath"], errors="ignore").astype(np.float32) + target = only_emb.mean(axis=0) + return target, np.array([]) + + +def _make_targets(embeddings: pd.DataFrame, clusters: pd.DataFrame, avg_func: Callable[[pd.DataFrame], npt.ArrayLike]) -> Tuple[pd.DataFrame, List[pd.DataFrame], dict]: targets = [] sub_embeddings = [] target_names = [] + target_locations = { + + } for cluster in set(clusters): if cluster == 0: continue - target = embeddings.drop(columns=["X", "Y", "Z", "filepath"], errors="ignore").loc[clusters == cluster, :].astype(np.float32).mean(axis=0) + cluster_embeddings = embeddings.loc[clusters == cluster, :] + target, position = avg_func(cluster_embeddings) + target_locations[cluster] = position sub_embeddings.append(embeddings.loc[clusters == cluster, :]) target = target.to_frame().T targets.append(target) @@ -21,13 +49,51 @@ def _make_targets(embeddings: pd.DataFrame, clusters: pd.DataFrame) -> Tuple[pd. targets = pd.concat(targets, ignore_index=True) targets["filepath"] = target_names - return targets, sub_embeddings + return targets, sub_embeddings, target_locations + + +def _run(clusters, + embeddings: pd.DataFrame, + output_folder: pathlib.Path, + average_method_name: Literal["Average", "Medoid"] = "Medoid", +): + assert len(embeddings) == len(clusters), "Cluster and embedding file are not compatible." + + avg_method = _get_medoid_embedding + if average_method_name == "Average": + avg_method = _get_avg_embedding + + print("Make targets") + embeddings = embeddings.reset_index() + + targets, sub_embeddings, target_locations = _make_targets(embeddings, clusters, avg_func=avg_method) + + print("Write targets") + os.makedirs(output_folder, exist_ok="True") + pth_ref = os.path.join(output_folder, "cluster_targets.temb") + + targets.to_pickle(pth_ref) + print(target_locations) + for cluster_id in target_locations: + df_loc = target_locations[cluster_id] + print(df_loc) + if df_loc is not None and len(df_loc) > 0: + pth_loc = os.path.join(output_folder, f"cluster_{cluster_id}_medoid.coords") + df_loc[["X", "Y", "Z"]].to_csv(pth_loc, sep=" ", header=None, index=None) + + print("Write custer embeddings") + for emb_i, emb in enumerate(sub_embeddings): + pth_emb = os.path.join(output_folder, f"embeddings_cluster_{emb_i}.temb") + emb.to_pickle(pth_emb) + + print("Done") @magic_factory( call_button="Save", label_layer={'label': 'TomoTwin Label Mask:'}, embeddings_filepath={'label': 'Path to embeddings file:', 'filter': '*.temb'}, + average_method_name={'label': "Average method"}, output_folder={ 'label': "Output folder", 'mode': 'd' @@ -36,30 +102,14 @@ def _make_targets(embeddings: pd.DataFrame, clusters: pd.DataFrame) -> Tuple[pd. def make_targets( label_layer: "napari.layers.Labels", embeddings_filepath: pathlib.Path, - output_folder: pathlib.Path + output_folder: pathlib.Path, + average_method_name: Literal["Average", "Medoid"] = "Medoid", ): - print("Read embeddings") - embeddings = pd.read_pickle(embeddings_filepath) print("Read clusters") clusters = label_layer.features['MANUAL_CLUSTER_ID'] - assert len(embeddings) == len(clusters), "Cluster and embedding file are not compatible." - - print("Make targets") - embeddings = embeddings.reset_index() - - targets, sub_embeddings = _make_targets(embeddings, clusters) - - print("Write targets") - os.makedirs(output_folder, exist_ok="True") - pth_ref = os.path.join(output_folder, "cluster_targets.temb") - - targets.to_pickle(pth_ref) - - print("Write custer embeddings") - for emb_i, emb in enumerate(sub_embeddings): - pth_emb = os.path.join(output_folder, f"embeddings_cluster_{emb_i}.temb") - emb.to_pickle(pth_emb) + print("Read embeddings") + embeddings = pd.read_pickle(embeddings_filepath) - print("Done") \ No newline at end of file + _run(clusters, embeddings, output_folder, average_method_name)