Skip to content

Commit

Permalink
3 cmp pac map (#92)
Browse files Browse the repository at this point in the history
* Investigating 3 cmps

* Plotting 3 cmps at a time

* Add topfun for cmp 15 and clean up function

* Fix plotting order

* Fix threshold

* Add lableing to FigA10

* Fix labeling

---------

Co-authored-by: Andrew Ramirez <aramirez@aretha.seas.ucla.edu>
  • Loading branch information
andrewram4287 and Andrew Ramirez authored Sep 23, 2024
1 parent 3372508 commit 658d89b
Show file tree
Hide file tree
Showing 7 changed files with 18,693 additions and 47 deletions.
18,470 changes: 18,470 additions & 0 deletions pf2/data/topp_fun_cmp15.csv

Large diffs are not rendered by default.

47 changes: 47 additions & 0 deletions pf2/figures/commonFuncs/plotGeneral.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,53 @@ def add_obs_label(X: anndata.AnnData, cmp1: str, cmp2: str):
return X




def plot_avegene_cmps(
X: anndata.AnnData,
gene: str,
ax: Axes,
order=None
):
"""Plots average gene expression across cell types"""
genesV = X[:, gene]
dataDF = genesV.to_df()
condition = "sample_id"
status1 = "binary_outcome"
status2 = "patient_category"
cellType = "combined_cell_type"

dataDF = dataDF.subtract(genesV.var["means"].values)
dataDF[status1] = genesV.obs[status1].values
dataDF[status2] = genesV.obs[status2].values
dataDF["Condition"] = genesV.obs[condition].values
dataDF["Cell Type"] = genesV.obs[cellType].values
dataDF["Label"] = genesV.obs["Label"].values
dataDF = dataDF.dropna(subset="Label")
dataDF = bal_combine_bo_covid(dataDF, status1, status2)

df = pd.melt(
dataDF, id_vars=["Label", "Condition", "Cell Type"], value_vars=gene
).rename(columns={"variable": "Gene", "value": "Value"})

df = df.groupby(["Label", "Gene", "Condition", "Cell Type"], observed=False).mean()
df = df.rename(columns={"Value": "Average Gene Expression"}).reset_index()

sns.boxplot(
data=df.loc[df["Gene"] == gene],
x="Label",
y="Average Gene Expression",
hue="Cell Type",
ax=ax,
order=order,
showfliers=False,
)
ax.set(ylabel=f"Average {gene}")

return df



def rotate_xaxis(ax, rotation=90):
"""Rotates text by 90 degrees for x-axis"""
ax.set_xticks(ax.get_xticks())
Expand Down
9 changes: 8 additions & 1 deletion pf2/figures/figureA10.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from ..data_import import convert_to_patients, import_meta
from ..predict import predict_mortality
from .common import subplotLabel, getSetup

import seaborn as sns

def makeFigure():
"""Get a list of the axis objects and create a figure."""
Expand Down Expand Up @@ -100,10 +100,17 @@ def plot_plsr_scores(plsr_results, meta_data, labels, ax1, ax2):
meta_data.loc[:, "patient_category"] != "COVID-19"
]

pal = sns.color_palette()
if i == 0:
numb1=0; numb2=2
else:
numb1=1; numb2=3

sns.scatterplot(
x=plsr_results[i].x_scores_[:, 0],
y=plsr_results[i].x_scores_[:, 1],
hue=score_labels.to_numpy(),
palette=[pal[numb1], pal[numb2]],
ax=ax[i],
)

Expand Down
45 changes: 1 addition & 44 deletions pf2/figures/figureA11.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from matplotlib.axes import Axes
import anndata
from .common import subplotLabel, getSetup
from ..figures.commonFuncs.plotGeneral import bal_combine_bo_covid, rotate_xaxis, add_obs_cmp_both_label, add_obs_label
from ..figures.commonFuncs.plotGeneral import bal_combine_bo_covid, rotate_xaxis, add_obs_cmp_both_label, add_obs_label, plot_avegene_cmps
from ..data_import import add_obs, combine_cell_types
from .commonFuncs.plotFactors import bot_top_genes

Expand Down Expand Up @@ -40,46 +40,3 @@ def makeFigure():

return f



def plot_avegene_cmps(
X: anndata.AnnData,
gene: str,
ax: Axes,
):
"""Plots average gene expression across cell types"""
genesV = X[:, gene]
dataDF = genesV.to_df()
condition = "sample_id"
status1 = "binary_outcome"
status2 = "patient_category"
cellType = "combined_cell_type"

dataDF = dataDF.subtract(genesV.var["means"].values)
dataDF[status1] = genesV.obs[status1].values
dataDF[status2] = genesV.obs[status2].values
dataDF["Condition"] = genesV.obs[condition].values
dataDF["Cell Type"] = genesV.obs[cellType].values
dataDF["Label"] = genesV.obs["Label"].values
dataDF = dataDF.dropna(subset="Label")
dataDF = bal_combine_bo_covid(dataDF, status1, status2)

df = pd.melt(
dataDF, id_vars=["Label", "Condition", "Cell Type"], value_vars=gene
).rename(columns={"variable": "Gene", "value": "Value"})

df = df.groupby(["Label", "Gene", "Condition", "Cell Type"], observed=False).mean()
df = df.rename(columns={"Value": "Average Gene Expression"}).reset_index()

sns.boxplot(
data=df.loc[df["Gene"] == gene],
x="Label",
y="Average Gene Expression",
hue="Cell Type",
ax=ax,
# order=["Both", "CmpX", "CmpY", "NoLabel"],
showfliers=False,
)
ax.set(ylabel=f"Average {gene}")

return df
3 changes: 2 additions & 1 deletion pf2/figures/figureA13.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ def makeFigure():
subplotLabel(ax)

plot_toppfun(cmp=9, ax=ax[0])
plot_toppfun(cmp=27, ax=ax[1])
plot_toppfun(cmp=15, ax=ax[1])
# plot_toppfun(cmp=27, ax=ax[1])
plot_toppfun(cmp=46, ax=ax[2])

return f
Expand Down
164 changes: 164 additions & 0 deletions pf2/figures/figureA15.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
"""
Figure A14:
"""
from .commonFuncs.plotPaCMAP import plot_labels_pacmap
from ..data_import import combine_cell_types, add_obs
import anndata
from .common import subplotLabel, getSetup
import seaborn as sns
import matplotlib.colors as mcolors
import numpy as np
from .commonFuncs.plotFactors import bot_top_genes
from ..figures.commonFuncs.plotGeneral import bal_combine_bo_covid, rotate_xaxis, plot_avegene_cmps
import pandas as pd

from ..figures.commonFuncs.plotPaCMAP import plot_gene_pacmap


def makeFigure():
"""Get a list of the axis objects and create a figure."""
ax, f = getSetup((12, 12), (4, 4))

subplotLabel(ax)

X = anndata.read_h5ad("/opt/northwest_bal/full_fitted.h5ad")
add_obs(X, "binary_outcome")
add_obs(X, "patient_category")
combine_cell_types(X)

cmp1 = 7; cmp2 = 10; cmp3 = 47
pos1 = True; pos2 = True; pos3 = True
threshold = 0.5
X = add_obs_cmp_both_label_three(X, cmp1, cmp2, cmp3, pos1, pos2, pos3, top_perc=threshold)
X = add_obs_label_three(X, cmp1, cmp2, cmp3)

colors = ["black", "fuchsia", "turquoise", "slateblue", "gainsboro"]
pal = []
for i in colors:
pal.append(mcolors.CSS4_COLORS[i])

plot_labels_pacmap(X, "Label", ax[0], color_key=pal)

genes1 = bot_top_genes(X, cmp=cmp1, geneAmount=1)
genes2 = bot_top_genes(X, cmp=cmp2, geneAmount=1)
genes3 = bot_top_genes(X, cmp=cmp3, geneAmount=1)
genes = np.concatenate([genes1, genes2, genes3])
for i, gene in enumerate(genes):
plot_avegene_cmps(X, gene, ax[i+1])
rotate_xaxis(ax[i+1])

for i, gene in enumerate(genes):
plot_gene_pacmap(gene, X, ax[i+7])



return f

def add_obs_cmp_both_label_three(
X: anndata.AnnData, cmp1: int, cmp2: int, cmp3: int, pos1=True, pos2=True, pos3=True, top_perc=1
):
"""Adds if cells in top/bot percentage"""
wprojs = X.obsm["weighted_projections"]
pos_neg = [pos1, pos2, pos3]
for i, cmp in enumerate([cmp1, cmp2, cmp3]):
if i == 0:
if pos_neg[i] is True:
thres_value = 100 - top_perc
threshold1 = np.percentile(wprojs, thres_value, axis=0)
idx = wprojs[:, cmp - 1] > threshold1[cmp - 1]

else:
thres_value = top_perc
threshold1 = np.percentile(wprojs, thres_value, axis=0)
idx = wprojs[:, cmp - 1] < threshold1[cmp - 1]

if i == 1:
if pos_neg[i] is True:
thres_value = 100 - top_perc
threshold2 = np.percentile(wprojs, thres_value, axis=0)
idx = wprojs[:, cmp - 1] > threshold2[cmp - 1]
else:
thres_value = top_perc
threshold2 = np.percentile(wprojs, thres_value, axis=0)
idx = wprojs[:, cmp - 1] < threshold2[cmp - 1]

if i == 2:
if pos_neg[i] is True:
thres_value = 100 - top_perc
threshold3 = np.percentile(wprojs, thres_value, axis=0)
idx = wprojs[:, cmp - 1] > threshold3[cmp - 1]
else:
thres_value = top_perc
threshold3 = np.percentile(wprojs, thres_value, axis=0)
idx = wprojs[:, cmp - 1] < threshold3[cmp - 1]

X.obs[f"Cmp{cmp}"] = idx

if pos1 is True and pos2 is True and pos3 is True:
idx = (wprojs[:, cmp1 - 1] >= threshold1[cmp1 - 1]) & (
wprojs[:, cmp2 - 1] >= threshold2[cmp2 - 1]) & (
wprojs[:, cmp3 - 1] >= threshold3[cmp3 - 1]
)
elif pos1 is False and pos2 is False and pos3 is False:
idx = (wprojs[:, cmp1 - 1] <= threshold1[cmp1 - 1]) & (
wprojs[:, cmp2 - 1] <= threshold2[cmp2 - 1]) & (
wprojs[:, cmp3 - 1] <= threshold3[cmp3 - 1]
)
elif pos1 is True and pos2 is True and pos3 is False:
idx = (wprojs[:, cmp1 - 1] >= threshold1[cmp1 - 1]) & (
wprojs[:, cmp2 - 1] >= threshold2[cmp2 - 1]) & (
wprojs[:, cmp3 - 1] <= threshold3[cmp3 - 1]
)

elif pos1 is True and pos2 is False and pos3 is True:
idx = (wprojs[:, cmp1 - 1] >= threshold1[cmp1 - 1]) & (
wprojs[:, cmp2 - 1] <= threshold2[cmp2 - 1]) & (
wprojs[:, cmp3 - 1] >= threshold3[cmp3 - 1]
)
elif pos1 is True and pos2 is False and pos3 is False:
idx = (wprojs[:, cmp1 - 1] >= threshold1[cmp1 - 1]) & (
wprojs[:, cmp2 - 1] <= threshold2[cmp2 - 1]) & (
wprojs[:, cmp3 - 1] <= threshold3[cmp3 - 1]
)

elif pos1 is False and pos2 is False and pos3 is True:
idx = (wprojs[:, cmp1 - 1] <= threshold1[cmp1 - 1]) & (
wprojs[:, cmp2 - 1] <= threshold2[cmp2 - 1]) & (
wprojs[:, cmp3 - 1] >= threshold3[cmp3 - 1]
)
elif pos1 is False and pos2 is True and pos3 is True:
idx = (wprojs[:, cmp1 - 1] <= threshold1[cmp1 - 1]) & (
wprojs[:, cmp2 - 1] >= threshold2[cmp2 - 1]) & (
wprojs[:, cmp3 - 1] >= threshold3[cmp3 - 1]
)
elif pos1 is False and pos2 is True and pos3 is False:
idx = (wprojs[:, cmp1 - 1] <= threshold1[cmp1 - 1]) & (
wprojs[:, cmp2 - 1] >= threshold2[cmp2 - 1]) & (
wprojs[:, cmp3 - 1] <= threshold3[cmp3 - 1]
)

X.obs["Both"] = idx

return X


def add_obs_label_three(X: anndata.AnnData, cmp1: int, cmp2: int, cmp3: int):
"""Creates AnnData observation column"""
X.obs.loc[((X.obs[f"Cmp{cmp1}"] == True) & (X.obs[f"Cmp{cmp2}"] == False)
& (X.obs[f"Cmp{cmp3}"] == False), "Label")] = f"Cmp{cmp1}"
X.obs.loc[(X.obs[f"Cmp{cmp1}"] == False) & (X.obs[f"Cmp{cmp2}"] == True)
& (X.obs[f"Cmp{cmp3}"] == False), "Label"] = f"Cmp{cmp2}"
X.obs.loc[(X.obs[f"Cmp{cmp1}"] == False) & (X.obs[f"Cmp{cmp2}"] == False)
& (X.obs[f"Cmp{cmp3}"] == True), "Label"] = f"Cmp{cmp3}"

X.obs.loc[(X.obs[f"Cmp{cmp1}"] == True) & (X.obs[f"Cmp{cmp2}"] == True)
& (X.obs[f"Cmp{cmp3}"] == True), "Label"] = "Both"
X.obs.loc[(X.obs[f"Cmp{cmp1}"] == False) & (X.obs[f"Cmp{cmp2}"] == False)
& (X.obs[f"Cmp{cmp3}"] == False), "Label"] = "NoLabel"

X = X[(X.obs["Label"] == f"Cmp{cmp1}") | (X.obs["Label"] == f"Cmp{cmp2}") |
(X.obs["Label"] == f"Cmp{cmp3}") | (X.obs["Label"] == "Both") |
(X.obs["Label"] == "NoLabel")]

return X

2 changes: 1 addition & 1 deletion pf2/figures/figureA2.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def makeFigure():

X = read_h5ad("/opt/northwest_bal/full_fitted.h5ad", backed="r")

for i in range(1, 51):
for i in [24, 25, 49]:
plot_wp_pacmap(X, i, ax[i - 1], cbarMax=0.4)
# plot_wp_per_celltype(X, i, ax[i-1])

Expand Down

0 comments on commit 658d89b

Please sign in to comment.