Skip to content

Commit

Permalink
CRC analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
aarmey committed Aug 7, 2024
1 parent 888b96b commit a25ac00
Show file tree
Hide file tree
Showing 9 changed files with 1,622 additions and 416 deletions.
364 changes: 364 additions & 0 deletions GSE178341.ipynb

Large diffs are not rendered by default.

166 changes: 166 additions & 0 deletions Vanderbilt.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pf2rnaseq/figures/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
matplotlib.rcParams["ytick.major.pad"] = 1.0
matplotlib.rcParams["xtick.minor.pad"] = 0.9
matplotlib.rcParams["ytick.minor.pad"] = 0.9
matplotlib.rcParams["legend.handletextpad"] = 0.5
matplotlib.rcParams["legend.handletextpad"] = 0.4
matplotlib.rcParams["legend.handlelength"] = 0.5
matplotlib.rcParams["legend.framealpha"] = 0.5
matplotlib.rcParams["legend.markerscale"] = 0.7
Expand Down
26 changes: 9 additions & 17 deletions pf2rnaseq/figures/commonFuncs/plotFactors.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,15 @@ def plot_condition_factors(
data: AnnData,
ax: Axes,
cond_group_labels: Optional[pd.Series] = None,
ThomsonNorm=False,
condition_label="Condition",
groupConditions=False,
):
"""Plots Pf2 condition factors"""
pd.set_option("display.max_rows", None)
yt = pd.Series(np.unique(data.obs["Condition"]))
yt = pd.Series(np.unique(data.obs[condition_label]))
X = np.array(data.uns["Pf2_A"])

X = np.log10(X)
if ThomsonNorm is True:
controls = yt.str.contains("CTRL")
X = X[controls]

X -= np.median(X, axis=0)
X /= np.std(X, axis=0)
Expand All @@ -46,14 +43,14 @@ def plot_condition_factors(
# extra padding to leave room for the row colors
# get list of colors for each label:
colors = sns.color_palette(
n_colors=pd.Series(cond_group_labels).nunique()
n_colors=cond_group_labels.nunique()
).as_hex()
lut = {}
legend_elements = []
for index, group in enumerate(pd.Series(cond_group_labels).unique()):
for index, group in enumerate(cond_group_labels.unique()):
lut[group] = colors[index]
legend_elements.append(Patch(color=colors[index], label=group))
row_colors = pd.Series(cond_group_labels).map(lut)
row_colors = cond_group_labels.map(lut)
for iii, color in enumerate(row_colors):
ax.add_patch(
plt.Rectangle(
Expand Down Expand Up @@ -135,24 +132,19 @@ def plot_gene_factors(data: AnnData, ax: Axes, trim=True):


def plot_gene_factors_partial(
cmp: int, dataIn: AnnData, ax: Axes, geneAmount: int = 5, top=True
cmp: int, dataIn: AnnData, ax: Axes, geneAmount: int = 5
):
"""Plotting weights for gene factors for both most negatively/positively weighted terms"""
cmpName = f"Cmp. {cmp}"

df = pd.DataFrame(
data=dataIn.varm["Pf2_C"][:, cmp - 1], index=dataIn.var_names, columns=[cmpName]
)
df["abs"] = df[cmpName].abs()
df = df.reset_index(names="Gene")
df = df.sort_values(by=cmpName)

if top:
sns.barplot(
data=df.iloc[-geneAmount:, :], x="Gene", y=cmpName, color="k", ax=ax
)
else:
sns.barplot(data=df.iloc[:geneAmount, :], x="Gene", y=cmpName, color="k", ax=ax)
df = df.sort_values(by="abs", ascending=False)

sns.barplot(data=df.iloc[:geneAmount, :], x="Gene", y=cmpName, color="k", ax=ax)
ax.tick_params(axis="x", rotation=90)


Expand Down
2 changes: 1 addition & 1 deletion pf2rnaseq/figures/commonFuncs/plotPaCMAP.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def plot_gene_pacmap(gene: str, decompType: str, X: anndata.AnnData, ax: Axes):

# Color by values
values -= np.min(values)
values /= np.max(values)
values /= np.max(values) / 10.0
data["val_cat"] = values
result = tf.shade(
agg=canvas.points(data, "x", "y", agg=ds.mean("val_cat")),
Expand Down
12 changes: 4 additions & 8 deletions pf2rnaseq/figures/figureCITEseq4.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,12 @@
def makeFigure():
"""Get a list of the axis objects and create a figure."""
# Get list of axis objects
ax, f = getSetup((10, 10), (4, 4))
ax, f = getSetup((10, 10), (6, 5))

# Add subplot labels
subplotLabel(ax)

X = read_h5ad("/opt/pf2/CITEseq_fitted_annotated.h5ad", backed="r")
comps = [22, 33, 47, 48, 23, 31, 43]
X = read_h5ad("/opt/extra-storage/CRC/GSE178341/crc10x_full_50cmp.h5ad", backed="r")
comps = range(20, 50)

for i, cmp in enumerate(comps):
plot_gene_factors_partial(cmp, X, ax[2 * i], geneAmount=10, top=True)
plot_gene_factors_partial(cmp, X, ax[2 * i + 1], geneAmount=10, top=False)
plot_gene_factors_partial(cmp, X, ax[i - 1], geneAmount=15)

return f
11 changes: 11 additions & 0 deletions pf2rnaseq/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,17 @@ def import_HTAN() -> anndata.AnnData:
return prepare_dataset(X, "Condition", geneThreshold=0.1)


def import_Vanderbilt_scRNAseq() -> anndata.AnnData:
"""Imports Vanderbilt's scRNAseq data."""

nl_epi = anndata.read_h5ad("/opt/extra-storage/CRC/Heiser/VUMC_HTAN_DIS_EPI_V2.h5ad")

X = anndata.AnnData(X=nl_epi.raw.X, obs=nl_epi.obs, var=nl_epi.raw.var)
X.X = csr_matrix(X.X)

return prepare_dataset(X, "HTAN Specimen ID", geneThreshold=0.01)


def import_CCLE() -> anndata.AnnData:
"""Imports barcoded cell data."""
# TODO: Still need to add gene names and barcodes.
Expand Down
Loading

0 comments on commit a25ac00

Please sign in to comment.