Skip to content
This repository has been archived by the owner on Dec 8, 2020. It is now read-only.

Develop #5

Merged
merged 3 commits into from
Mar 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
bin/reports/* linguist-vendored
105 changes: 74 additions & 31 deletions bin/reports/sc_harmony_report.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"source": [
"params = json.loads(WORKFLOW_PARAMETERS)\n",
"bec_params = params[\"sc\"][\"harmony\"]\n",
"batch = \"batch\" if \"batch\" in bec_params[\"varsUse\"] else \"sample_id\""
"batch = bec_params[\"varsUse\"][0] if \"varsUse\" in bec_params else \"sample_id\""
]
},
{
Expand Down Expand Up @@ -149,20 +149,41 @@
"outputs": [],
"source": [
"a = 0.6 # alpha setting\n",
"fig, ((ax1,ax2), (ax3,ax4),(ax5,ax6)) = plt.subplots(3,2, figsize=(10,10), dpi=150 )\n",
"sc.pl.tsne(adata1, color='sample_id', alpha=a, ax=ax1, show=False, wspace=0.5)\n",
"sc.pl.tsne(adata2, color='sample_id', alpha=a, ax=ax2, show=False, wspace=0.5)\n",
"sc.pl.tsne(adata1, color=batch, alpha=a, ax=ax3, show=False, wspace=0.5, title='batch')\n",
"sc.pl.tsne(adata2, color=batch, alpha=a, ax=ax4, show=False, wspace=0.5, title='batch')\n",
"sc.pl.tsne(adata1, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax5, show=False, wspace=0.5)\n",
"sc.pl.tsne(adata2, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax6, show=False, wspace=0.5)\n",
"number_of_subplots=len(annotations_to_plot)\n",
"fig, (axs) = plt.subplots(number_of_subplots,2, figsize=(10,5*number_of_subplots), dpi=150 )\n",
"\n",
"ax1.set_title('Pre-batch correction (sample)')\n",
"ax2.set_title('Post-batch correction (sample)')\n",
"ax3.set_title('Pre-batch correction (batch)')\n",
"ax4.set_title('Post-batch correction (batch)')\n",
"ax5.set_title(f'Pre-batch correction ({clustering_algorithm.capitalize()})')\n",
"ax6.set_title(f'Post-batch correction ({clustering_algorithm.capitalize()})')\n",
"for i,v in enumerate(range(number_of_subplots)):\n",
" annotation_to_plot = annotations_to_plot[i]\n",
" ax1 = axs[0] if number_of_subplots == 1 else axs[i%2][0]\n",
" ax1 = sc.pl.tsne(adata1, color=annotation_to_plot, alpha=a, ax=ax1, show=False, wspace=0.5)\n",
" ax1.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n",
" ax1.set_title(f\"Pre-batch correction ({annotation_to_plot})\")\n",
" ax2 = axs[1] if number_of_subplots == 1 else axs[i%2][1]\n",
" ax2 = sc.pl.tsne(adata2, color=annotation_to_plot, alpha=a, ax=ax2, show=False, wspace=0.5)\n",
" ax2.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n",
" ax2.set_title(f\"Post-batch correction ({annotation_to_plot})\")\n",
"\n",
"plt.tight_layout()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig, ((ax1,ax2), (ax3,ax4)) = plt.subplots(2,2, figsize=(10,10), dpi=150 )\n",
"ax1 = sc.pl.tsne(adata1, color=batch, alpha=a, ax=ax1, show=False, wspace=0.5, title='batch')\n",
"ax2 = sc.pl.tsne(adata2, color=batch, alpha=a, ax=ax2, show=False, wspace=0.5, title='batch')\n",
"ax1.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n",
"ax2.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n",
"sc.pl.tsne(adata1, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax3, show=False, wspace=0.5)\n",
"sc.pl.tsne(adata2, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax4, show=False, wspace=0.5)\n",
"\n",
"ax1.set_title('Pre-batch correction (batch)')\n",
"ax2.set_title('Post-batch correction (batch)')\n",
"ax3.set_title(f'Pre-batch correction ({clustering_algorithm.capitalize()})')\n",
"ax4.set_title(f'Post-batch correction ({clustering_algorithm.capitalize()})')\n",
"#\n",
"plt.tight_layout()"
]
Expand All @@ -181,20 +202,41 @@
"outputs": [],
"source": [
"a = 0.6 # alpha setting\n",
"fig, ((ax1,ax2), (ax3,ax4),(ax5,ax6)) = plt.subplots(3,2, figsize=(10,10), dpi=150 )\n",
"sc.pl.umap(adata1, color='sample_id', alpha=a, ax=ax1, show=False, wspace=0.5)\n",
"sc.pl.umap(adata2, color='sample_id', alpha=a, ax=ax2, show=False, wspace=0.5)\n",
"sc.pl.umap(adata1, color=batch, alpha=a, ax=ax3, show=False, wspace=0.5, title='batch')\n",
"sc.pl.umap(adata2, color=batch, alpha=a, ax=ax4, show=False, wspace=0.5, title='batch')\n",
"sc.pl.umap(adata1, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax5, show=False, wspace=0.5)\n",
"sc.pl.umap(adata2, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax6, show=False, wspace=0.5)\n",
"number_of_subplots=len(annotations_to_plot)\n",
"fig, (axs) = plt.subplots(number_of_subplots,2, figsize=(10,5*number_of_subplots), dpi=150 )\n",
"\n",
"for i,v in enumerate(range(number_of_subplots)):\n",
" annotation_to_plot = annotations_to_plot[i]\n",
" ax1 = axs[0] if number_of_subplots == 1 else axs[i%2][0]\n",
" ax1 = sc.pl.umap(adata1, color=annotation_to_plot, alpha=a, ax=ax1, show=False, wspace=0.5)\n",
" ax1.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n",
" ax1.set_title(f\"Pre-batch correction ({annotation_to_plot})\")\n",
" ax2 = axs[1] if number_of_subplots == 1 else axs[i%2][1]\n",
" ax2 = sc.pl.umap(adata2, color=annotation_to_plot, alpha=a, ax=ax2, show=False, wspace=0.5)\n",
" ax2.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n",
" ax2.set_title(f\"Post-batch correction ({annotation_to_plot})\")\n",
"\n",
"ax1.set_title('Pre-batch correction (sample)')\n",
"ax2.set_title('Post-batch correction (sample)')\n",
"ax3.set_title('Pre-batch correction (batch)')\n",
"ax4.set_title('Post-batch correction (batch)')\n",
"ax5.set_title(f'Pre-batch correction ({clustering_algorithm.capitalize()})')\n",
"ax6.set_title(f'Post-batch correction ({clustering_algorithm.capitalize()})')\n",
"plt.tight_layout()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig, ((ax1,ax2), (ax3,ax4)) = plt.subplots(2,2, figsize=(10,10), dpi=150 )\n",
"ax1 = sc.pl.umap(adata1, color=batch, alpha=a, ax=ax1, show=False, wspace=0.5, title='batch')\n",
"ax2 = sc.pl.umap(adata2, color=batch, alpha=a, ax=ax2, show=False, wspace=0.5, title='batch')\n",
"ax1.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n",
"ax2.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n",
"sc.pl.umap(adata1, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax3, show=False, wspace=0.5)\n",
"sc.pl.umap(adata2, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax4, show=False, wspace=0.5)\n",
"\n",
"ax1.set_title('Pre-batch correction (batch)')\n",
"ax2.set_title('Post-batch correction (batch)')\n",
"ax3.set_title(f'Pre-batch correction ({clustering_algorithm.capitalize()})')\n",
"ax4.set_title(f'Post-batch correction ({clustering_algorithm.capitalize()})')\n",
"#\n",
"plt.tight_layout()"
]
Expand All @@ -214,10 +256,11 @@
"metadata": {},
"outputs": [],
"source": [
"fig, ((ax1,ax2),(ax3,ax4)) = plt.subplots(2,2, figsize=(15,8), dpi=150 )\n",
"barPlotByAnnotation(adata1.obs, axis1=ax1, axis2=ax3, title=\"Pre-batch correction (sample)\", clustering_algorithm=clustering_algorithm, annotation=\"sample_id\")\n",
"barPlotByAnnotation(adata2.obs, axis1=ax2, axis2=ax4, title=\"Post-batch correction (sample)\", clustering_algorithm=clustering_algorithm, annotation=\"sample_id\")\n",
"plt.tight_layout()"
"for annotation_to_plot in annotations_to_plot:\n",
" fig, ((ax1,ax2),(ax3,ax4)) = plt.subplots(2,2, figsize=(15,8), dpi=150 )\n",
" barPlotByAnnotation(adata1.obs, axis1=ax1, axis2=ax3, title=\"Pre-batch correction (sample)\", clustering_algorithm=clustering_algorithm, annotation=annotation_to_plot)\n",
" barPlotByAnnotation(adata2.obs, axis1=ax2, axis2=ax4, title=\"Post-batch correction (sample)\", clustering_algorithm=clustering_algorithm, annotation=annotation_to_plot)\n",
" plt.tight_layout()"
]
},
{
Expand Down
24 changes: 18 additions & 6 deletions bin/run_harmony.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,22 @@ if(is.null(args$`vars-use`)) {
input_ext <- tools::file_ext(args$`input-file`)

if(input_ext == "h5ad") {
seurat <- Seurat::ReadH5AD(file = args$`input-file`)
if(!("pca" %in% names(seurat@reductions)) || is.null(x = seurat@reductions$pca))
stop("Expects a PCA embeddings data matrix but it does not exist.")
data <- seurat@reductions$pca
metadata <- seurat@meta.data
# Current fix until https://github.com/satijalab/seurat/issues/2485 is fixed
file <- hdf5r::h5file(filename = args$`input-file`, mode = 'r')
if(!("X_pca" %in% names(x = file[["obsm"]]))) {
stop("HI")
}
obs <- file[['obs']][]
pca_embeddings <- t(x = file[["obsm"]][["X_pca"]][,])
row.names(x = pca_embeddings) <- obs$index
colnames(x = pca_embeddings) <- paste0("PCA_", seq(from = 1, to = ncol(x = pca_embeddings)))
metadata <- obs
# seurat <- Seurat::ReadH5AD(file = args$`input-file`)
# if(!("pca" %in% names(seurat@reductions)) || is.null(x = seurat@reductions$pca))
# stop("Expects a PCA embeddings data matrix but it does not exist.")
# data <- seurat@reductions$pca
# pca_embeddings <- data@cell.embeddings
# metadata <- seurat@meta.data
} else {
stop(paste0("Unrecognized input file format: ", input_ext, "."))
}
Expand All @@ -68,7 +79,8 @@ if(sum(args$`vars-use` %in% colnames(x = metadata)) != length(x = args$`vars-use
}

# Run Harmony
harmony_embeddings <- harmony::HarmonyMatrix(data_mat = data@cell.embeddings
# Expects PCA matrix (Cells as rows and PCs as columns.)
harmony_embeddings <- harmony::HarmonyMatrix(data_mat = pca_embeddings
, meta_data = metadata
, vars_use = args$`vars-use`
, do_pca = args$`do-pca`
Expand Down