Skip to content

Commit

Permalink
remove distance function filters
Browse files Browse the repository at this point in the history
  • Loading branch information
samuel-amidon committed Dec 18, 2024
1 parent a1e40e7 commit 16c32fb
Showing 1 changed file with 94 additions and 117 deletions.
211 changes: 94 additions & 117 deletions bicytok/distanceMetricFuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,33 +42,29 @@ def KL_EMD_1D(
) # Check that not all cells are target cells

for rec in range(recAbundances.shape[1]):
if ( # Check these with pre-normalization values
np.mean(recAbundances[:, rec]) > 5
and np.mean(recAbundances[targ, rec]) > np.mean(recAbundances[offTarg, rec])
):
targAbun = targNorms[:, rec]
offTargAbun = offTargNorms[:, rec]

assert all(
targAbun == recAbundances[targ, rec] / np.mean(recAbundances[:, rec])
)

targKDE = KernelDensity(kernel="gaussian").fit(targAbun.reshape(-1, 1))
offTargKDE = KernelDensity(kernel="gaussian").fit(
offTargAbun.reshape(-1, 1)
)
minAbun = np.minimum(targAbun.min(), offTargAbun.min()) - 10
maxAbun = np.maximum(targAbun.max(), offTargAbun.max()) + 10
outcomes = np.arange(minAbun, maxAbun + 1).reshape(-1, 1)
targDist = np.exp(targKDE.score_samples(outcomes))
offTargDist = np.exp(offTargKDE.score_samples(outcomes))
KL_div_vals[rec] = stats.entropy(
offTargDist.flatten() + 1e-200, targDist.flatten() + 1e-200, base=2
)

EMD_vals[rec] = stats.wasserstein_distance(
targAbun, offTargAbun
) # Consider switching/comparing to ot.emd2_1d
targAbun = targNorms[:, rec]
offTargAbun = offTargNorms[:, rec]

assert all(
targAbun == recAbundances[targ, rec] / np.mean(recAbundances[:, rec])
)

targKDE = KernelDensity(kernel="gaussian").fit(targAbun.reshape(-1, 1))
offTargKDE = KernelDensity(kernel="gaussian").fit(
offTargAbun.reshape(-1, 1)
)
minAbun = np.minimum(targAbun.min(), offTargAbun.min()) - 10
maxAbun = np.maximum(targAbun.max(), offTargAbun.max()) + 10
outcomes = np.arange(minAbun, maxAbun + 1).reshape(-1, 1)
targDist = np.exp(targKDE.score_samples(outcomes))
offTargDist = np.exp(offTargKDE.score_samples(outcomes))
KL_div_vals[rec] = stats.entropy(
offTargDist.flatten() + 1e-200, targDist.flatten() + 1e-200, base=2
)

EMD_vals[rec] = stats.wasserstein_distance(
targAbun, offTargAbun
) # Consider switching/comparing to ot.emd2_1d

return KL_div_vals, EMD_vals

Expand Down Expand Up @@ -108,52 +104,44 @@ def KL_EMD_2D(
recAbundances.shape[1]
) # Triangle indices, includes diagonal (k=0 by default)
for rec1, rec2 in zip(row, col, strict=False):
if (
np.mean(recAbundances[:, rec1]) > 5
and np.mean(recAbundances[:, rec2]) > 5
and np.mean(recAbundances[targ, rec1])
> np.mean(recAbundances[offTarg, rec1])
and np.mean(recAbundances[targ, rec2])
> np.mean(recAbundances[offTarg, rec2])
):
targAbun1, targAbun2 = targNorms[:, rec1], targNorms[:, rec2]
offTargAbun1, offTargAbun2 = offTargNorms[:, rec1], offTargNorms[:, rec2]

assert all(
targAbun1 == recAbundances[targ, rec1] / np.mean(recAbundances[:, rec1])
)

targAbunAll = np.vstack((targAbun1, targAbun2)).transpose()
offTargAbunAll = np.vstack((offTargAbun1, offTargAbun2)).transpose()

assert targAbunAll.shape == (np.sum(targ), 2)

targKDE = KernelDensity(kernel="gaussian").fit(targAbunAll.reshape(-1, 2))
offTargKDE = KernelDensity(kernel="gaussian").fit(
offTargAbunAll.reshape(-1, 2)
)
minAbun = np.minimum(targAbunAll.min(), offTargAbunAll.min()) - 10
maxAbun = np.maximum(targAbunAll.max(), offTargAbunAll.max()) + 10
X, Y = np.mgrid[
minAbun : maxAbun : ((maxAbun - minAbun) / 100),
minAbun : maxAbun : ((maxAbun - minAbun) / 100),
]
outcomes = np.concatenate((X.reshape(-1, 1), Y.reshape(-1, 1)), axis=1)
targDist = np.exp(targKDE.score_samples(outcomes))
offTargDist = np.exp(offTargKDE.score_samples(outcomes))
KL_div_vals[rec1, rec2] = stats.entropy(
offTargDist.flatten() + 1e-200, targDist.flatten() + 1e-200, base=2
)

M = ot.dist(targAbunAll, offTargAbunAll)
a = np.ones((targAbunAll.shape[0],)) / targAbunAll.shape[0]
b = np.ones((offTargAbunAll.shape[0],)) / offTargAbunAll.shape[0]
EMD_vals[rec1, rec2] = ot.emd2(
a,
b,
M,
numItermax=100, # Check numIterMax
)
targAbun1, targAbun2 = targNorms[:, rec1], targNorms[:, rec2]
offTargAbun1, offTargAbun2 = offTargNorms[:, rec1], offTargNorms[:, rec2]

assert all(
targAbun1 == recAbundances[targ, rec1] / np.mean(recAbundances[:, rec1])
)

targAbunAll = np.vstack((targAbun1, targAbun2)).transpose()
offTargAbunAll = np.vstack((offTargAbun1, offTargAbun2)).transpose()

assert targAbunAll.shape == (np.sum(targ), 2)

targKDE = KernelDensity(kernel="gaussian").fit(targAbunAll.reshape(-1, 2))
offTargKDE = KernelDensity(kernel="gaussian").fit(
offTargAbunAll.reshape(-1, 2)
)
minAbun = np.minimum(targAbunAll.min(), offTargAbunAll.min()) - 10
maxAbun = np.maximum(targAbunAll.max(), offTargAbunAll.max()) + 10
X, Y = np.mgrid[
minAbun : maxAbun : ((maxAbun - minAbun) / 100),
minAbun : maxAbun : ((maxAbun - minAbun) / 100),
]
outcomes = np.concatenate((X.reshape(-1, 1), Y.reshape(-1, 1)), axis=1)
targDist = np.exp(targKDE.score_samples(outcomes))
offTargDist = np.exp(offTargKDE.score_samples(outcomes))
KL_div_vals[rec1, rec2] = stats.entropy(
offTargDist.flatten() + 1e-200, targDist.flatten() + 1e-200, base=2
)

M = ot.dist(targAbunAll, offTargAbunAll)
a = np.ones((targAbunAll.shape[0],)) / targAbunAll.shape[0]
b = np.ones((offTargAbunAll.shape[0],)) / offTargAbunAll.shape[0]
EMD_vals[rec1, rec2] = ot.emd2(
a,
b,
M,
numItermax=100, # Check numIterMax
)

return KL_div_vals, EMD_vals

Expand Down Expand Up @@ -187,49 +175,38 @@ def KL_EMD_3D(
for rec1, rec2, rec3 in combinations_with_replacement(
range(recAbundances.shape[1]), 3
): # 3D triangle (pyramidal?) indices, with replacement includes diagonals
if (
np.mean(recAbundances[:, rec1]) > 5
and np.mean(recAbundances[:, rec2]) > 5
and np.mean(recAbundances[:, rec3]) > 5
and np.mean(recAbundances[targ, rec1])
> np.mean(recAbundances[offTarg, rec1])
and np.mean(recAbundances[targ, rec2])
> np.mean(recAbundances[offTarg, rec2])
and np.mean(recAbundances[targ, rec3])
> np.mean(recAbundances[offTarg, rec3])
):
targAbun1, targAbun2, targAbun3 = (
targNorms[:, rec1],
targNorms[:, rec2],
targNorms[:, rec3],
)
offTargAbun1, offTargAbun2, offTargAbun3 = (
offTargNorms[:, rec1],
offTargNorms[:, rec2],
offTargNorms[:, rec3],
)

assert all(
targAbun1 == recAbundances[targ, rec1] / np.mean(recAbundances[:, rec1])
)

targAbunAll = np.vstack((targAbun1, targAbun2, targAbun3)).transpose()
offTargAbunAll = np.vstack(
(offTargAbun1, offTargAbun2, offTargAbun3)
).tranpose() # Check that this is the same as original concat

assert targAbunAll.shape == (np.sum(targ), 3)

KL_div_vals[rec1, rec2, rec3] = 0 # Placeholder unimplemented

M = ot.dist(targAbunAll, offTargAbunAll)
a = np.ones((targAbunAll.shape[0],)) / targAbunAll.shape[0]
b = np.ones((offTargAbunAll.shape[0],)) / offTargAbunAll.shape[0]
EMD_vals[rec1, rec2, rec3] = ot.emd2(
a,
b,
M,
numItermax=100, # Check numIterMax
)
targAbun1, targAbun2, targAbun3 = (
targNorms[:, rec1],
targNorms[:, rec2],
targNorms[:, rec3],
)
offTargAbun1, offTargAbun2, offTargAbun3 = (
offTargNorms[:, rec1],
offTargNorms[:, rec2],
offTargNorms[:, rec3],
)

assert all(
targAbun1 == recAbundances[targ, rec1] / np.mean(recAbundances[:, rec1])
)

targAbunAll = np.vstack((targAbun1, targAbun2, targAbun3)).transpose()
offTargAbunAll = np.vstack(
(offTargAbun1, offTargAbun2, offTargAbun3)
).tranpose() # Check that this is the same as original concat

assert targAbunAll.shape == (np.sum(targ), 3)

KL_div_vals[rec1, rec2, rec3] = 0 # Placeholder unimplemented

M = ot.dist(targAbunAll, offTargAbunAll)
a = np.ones((targAbunAll.shape[0],)) / targAbunAll.shape[0]
b = np.ones((offTargAbunAll.shape[0],)) / offTargAbunAll.shape[0]
EMD_vals[rec1, rec2, rec3] = ot.emd2(
a,
b,
M,
numItermax=100, # Check numIterMax
)

return KL_div_vals, EMD_vals

0 comments on commit 16c32fb

Please sign in to comment.