Skip to content

Commit

Permalink
Add progress bars to neighborhood analysis metric computation (#758)
Browse files Browse the repository at this point in the history
  • Loading branch information
alex-l-kong authored Oct 11, 2022
1 parent 0d89228 commit ddfcbbd
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 7 deletions.
2 changes: 1 addition & 1 deletion ark/spLDA/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ def compute_topic_eda(features, featurization, topics, silhouette=False, num_boo
stat_names = ['inertia', 'silhouette', 'gap_stat', 'gap_sds', 'percent_var_exp', "cell_counts"]
stats = dict(zip(stat_names, [{} for name in stat_names]))

# iterative over topic number candidates
# iterate over topic number candidates
pb_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]'
for k in tqdm(topics, bar_format=pb_format):
# cluster with KMeans
Expand Down
17 changes: 11 additions & 6 deletions ark/utils/spatial_analysis_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from scipy.spatial.distance import cdist
from sklearn.cluster import KMeans
from statsmodels.stats.multitest import multipletests
from tqdm.notebook import tqdm

import ark.settings as settings
from ark.utils import io_utils, misc_utils
Expand Down Expand Up @@ -481,8 +482,8 @@ def compute_kmeans_inertia(neighbor_mat_data, min_k=2, max_k=10):
Returns:
xarray.DataArray:
contains a single dimension, cluster_num, which indicates the inertia
when cluster_num was set as k for k-means clustering
contains a single dimension, `cluster_num`, which indicates the inertia
when `cluster_num` was set as k for k-means clustering
"""

# create array we can store the results of each k for clustering
Expand All @@ -491,7 +492,9 @@ def compute_kmeans_inertia(neighbor_mat_data, min_k=2, max_k=10):
stats_raw_data = np.zeros(max_k - 1)
cluster_stats = xr.DataArray(stats_raw_data, coords=coords, dims=dims)

for n in range(min_k, max_k + 1):
# iterate over each k value
pb_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]'
for n in tqdm(range(min_k, max_k + 1), bar_format=pb_format):
cluster_fit = KMeans(n_clusters=n).fit(neighbor_mat_data)
cluster_stats.loc[n] = cluster_fit.inertia_

Expand All @@ -516,8 +519,8 @@ def compute_kmeans_silhouette(neighbor_mat_data, min_k=2, max_k=10, subsample=No
Returns:
xarray.DataArray:
contains a single dimension, cluster_num, which indicates the Silhouette score
when cluster_num was set as k for k-means clustering
contains a single dimension, `cluster_num`, which indicates the Silhouette score
when `cluster_num` was set as k for k-means clustering
"""

# create array we can store the results of each k for clustering
Expand All @@ -526,7 +529,9 @@ def compute_kmeans_silhouette(neighbor_mat_data, min_k=2, max_k=10, subsample=No
stats_raw_data = np.zeros(max_k - 1)
cluster_stats = xr.DataArray(stats_raw_data, coords=coords, dims=dims)

for n in range(min_k, max_k + 1):
# iterate over each k value
pb_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]'
for n in tqdm(range(min_k, max_k + 1), bar_format=pb_format):
cluster_fit = KMeans(n_clusters=n).fit(neighbor_mat_data)
cluster_labels = cluster_fit.labels_

Expand Down

0 comments on commit ddfcbbd

Please sign in to comment.