diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..1ea8f89 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +bin/reports/* linguist-vendored diff --git a/bin/reports/sc_harmony_report.ipynb b/bin/reports/sc_harmony_report.ipynb index 7816d8b..8cec786 100644 --- a/bin/reports/sc_harmony_report.ipynb +++ b/bin/reports/sc_harmony_report.ipynb @@ -28,7 +28,7 @@ "source": [ "params = json.loads(WORKFLOW_PARAMETERS)\n", "bec_params = params[\"sc\"][\"harmony\"]\n", - "batch = \"batch\" if \"batch\" in bec_params[\"varsUse\"] else \"sample_id\"" + "batch = bec_params[\"varsUse\"][0] if \"varsUse\" in bec_params else \"sample_id\"" ] }, { @@ -149,20 +149,41 @@ "outputs": [], "source": [ "a = 0.6 # alpha setting\n", - "fig, ((ax1,ax2), (ax3,ax4),(ax5,ax6)) = plt.subplots(3,2, figsize=(10,10), dpi=150 )\n", - "sc.pl.tsne(adata1, color='sample_id', alpha=a, ax=ax1, show=False, wspace=0.5)\n", - "sc.pl.tsne(adata2, color='sample_id', alpha=a, ax=ax2, show=False, wspace=0.5)\n", - "sc.pl.tsne(adata1, color=batch, alpha=a, ax=ax3, show=False, wspace=0.5, title='batch')\n", - "sc.pl.tsne(adata2, color=batch, alpha=a, ax=ax4, show=False, wspace=0.5, title='batch')\n", - "sc.pl.tsne(adata1, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax5, show=False, wspace=0.5)\n", - "sc.pl.tsne(adata2, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax6, show=False, wspace=0.5)\n", + "number_of_subplots=len(annotations_to_plot)\n", + "fig, (axs) = plt.subplots(number_of_subplots,2, figsize=(10,5*number_of_subplots), dpi=150 )\n", "\n", - "ax1.set_title('Pre-batch correction (sample)')\n", - "ax2.set_title('Post-batch correction (sample)')\n", - "ax3.set_title('Pre-batch correction (batch)')\n", - "ax4.set_title('Post-batch correction (batch)')\n", - "ax5.set_title(f'Pre-batch correction ({clustering_algorithm.capitalize()})')\n", - "ax6.set_title(f'Post-batch correction ({clustering_algorithm.capitalize()})')\n", + "for i,v in enumerate(range(number_of_subplots)):\n", + " annotation_to_plot = annotations_to_plot[i]\n", + " ax1 = axs[0] if number_of_subplots == 1 else axs[i%2][0]\n", + " ax1 = sc.pl.tsne(adata1, color=annotation_to_plot, alpha=a, ax=ax1, show=False, wspace=0.5)\n", + " ax1.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n", + " ax1.set_title(f\"Pre-batch correction ({annotation_to_plot})\")\n", + " ax2 = axs[1] if number_of_subplots == 1 else axs[i%2][1]\n", + " ax2 = sc.pl.tsne(adata2, color=annotation_to_plot, alpha=a, ax=ax2, show=False, wspace=0.5)\n", + " ax2.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n", + " ax2.set_title(f\"Post-batch correction ({annotation_to_plot})\")\n", + "\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, ((ax1,ax2), (ax3,ax4)) = plt.subplots(2,2, figsize=(10,10), dpi=150 )\n", + "ax1 = sc.pl.tsne(adata1, color=batch, alpha=a, ax=ax1, show=False, wspace=0.5, title='batch')\n", + "ax2 = sc.pl.tsne(adata2, color=batch, alpha=a, ax=ax2, show=False, wspace=0.5, title='batch')\n", + "ax1.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n", + "ax2.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n", + "sc.pl.tsne(adata1, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax3, show=False, wspace=0.5)\n", + "sc.pl.tsne(adata2, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax4, show=False, wspace=0.5)\n", + "\n", + "ax1.set_title('Pre-batch correction (batch)')\n", + "ax2.set_title('Post-batch correction (batch)')\n", + "ax3.set_title(f'Pre-batch correction ({clustering_algorithm.capitalize()})')\n", + "ax4.set_title(f'Post-batch correction ({clustering_algorithm.capitalize()})')\n", "#\n", "plt.tight_layout()" ] @@ -181,20 +202,41 @@ "outputs": [], "source": [ "a = 0.6 # alpha setting\n", - "fig, ((ax1,ax2), (ax3,ax4),(ax5,ax6)) = plt.subplots(3,2, figsize=(10,10), dpi=150 )\n", - "sc.pl.umap(adata1, color='sample_id', alpha=a, ax=ax1, show=False, wspace=0.5)\n", - "sc.pl.umap(adata2, color='sample_id', alpha=a, ax=ax2, show=False, wspace=0.5)\n", - "sc.pl.umap(adata1, color=batch, alpha=a, ax=ax3, show=False, wspace=0.5, title='batch')\n", - "sc.pl.umap(adata2, color=batch, alpha=a, ax=ax4, show=False, wspace=0.5, title='batch')\n", - "sc.pl.umap(adata1, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax5, show=False, wspace=0.5)\n", - "sc.pl.umap(adata2, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax6, show=False, wspace=0.5)\n", + "number_of_subplots=len(annotations_to_plot)\n", + "fig, (axs) = plt.subplots(number_of_subplots,2, figsize=(10,5*number_of_subplots), dpi=150 )\n", + "\n", + "for i,v in enumerate(range(number_of_subplots)):\n", + " annotation_to_plot = annotations_to_plot[i]\n", + " ax1 = axs[0] if number_of_subplots == 1 else axs[i%2][0]\n", + " ax1 = sc.pl.umap(adata1, color=annotation_to_plot, alpha=a, ax=ax1, show=False, wspace=0.5)\n", + " ax1.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n", + " ax1.set_title(f\"Pre-batch correction ({annotation_to_plot})\")\n", + " ax2 = axs[1] if number_of_subplots == 1 else axs[i%2][1]\n", + " ax2 = sc.pl.umap(adata2, color=annotation_to_plot, alpha=a, ax=ax2, show=False, wspace=0.5)\n", + " ax2.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n", + " ax2.set_title(f\"Post-batch correction ({annotation_to_plot})\")\n", "\n", - "ax1.set_title('Pre-batch correction (sample)')\n", - "ax2.set_title('Post-batch correction (sample)')\n", - "ax3.set_title('Pre-batch correction (batch)')\n", - "ax4.set_title('Post-batch correction (batch)')\n", - "ax5.set_title(f'Pre-batch correction ({clustering_algorithm.capitalize()})')\n", - "ax6.set_title(f'Post-batch correction ({clustering_algorithm.capitalize()})')\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, ((ax1,ax2), (ax3,ax4)) = plt.subplots(2,2, figsize=(10,10), dpi=150 )\n", + "ax1 = sc.pl.umap(adata1, color=batch, alpha=a, ax=ax1, show=False, wspace=0.5, title='batch')\n", + "ax2 = sc.pl.umap(adata2, color=batch, alpha=a, ax=ax2, show=False, wspace=0.5, title='batch')\n", + "ax1.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n", + "ax2.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n", + "sc.pl.umap(adata1, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax3, show=False, wspace=0.5)\n", + "sc.pl.umap(adata2, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax4, show=False, wspace=0.5)\n", + "\n", + "ax1.set_title('Pre-batch correction (batch)')\n", + "ax2.set_title('Post-batch correction (batch)')\n", + "ax3.set_title(f'Pre-batch correction ({clustering_algorithm.capitalize()})')\n", + "ax4.set_title(f'Post-batch correction ({clustering_algorithm.capitalize()})')\n", "#\n", "plt.tight_layout()" ] @@ -214,10 +256,11 @@ "metadata": {}, "outputs": [], "source": [ - "fig, ((ax1,ax2),(ax3,ax4)) = plt.subplots(2,2, figsize=(15,8), dpi=150 )\n", - "barPlotByAnnotation(adata1.obs, axis1=ax1, axis2=ax3, title=\"Pre-batch correction (sample)\", clustering_algorithm=clustering_algorithm, annotation=\"sample_id\")\n", - "barPlotByAnnotation(adata2.obs, axis1=ax2, axis2=ax4, title=\"Post-batch correction (sample)\", clustering_algorithm=clustering_algorithm, annotation=\"sample_id\")\n", - "plt.tight_layout()" + "for annotation_to_plot in annotations_to_plot:\n", + " fig, ((ax1,ax2),(ax3,ax4)) = plt.subplots(2,2, figsize=(15,8), dpi=150 )\n", + " barPlotByAnnotation(adata1.obs, axis1=ax1, axis2=ax3, title=\"Pre-batch correction (sample)\", clustering_algorithm=clustering_algorithm, annotation=annotation_to_plot)\n", + " barPlotByAnnotation(adata2.obs, axis1=ax2, axis2=ax4, title=\"Post-batch correction (sample)\", clustering_algorithm=clustering_algorithm, annotation=annotation_to_plot)\n", + " plt.tight_layout()" ] }, { diff --git a/bin/run_harmony.R b/bin/run_harmony.R index 269b1ac..8d51000 100755 --- a/bin/run_harmony.R +++ b/bin/run_harmony.R @@ -52,11 +52,22 @@ if(is.null(args$`vars-use`)) { input_ext <- tools::file_ext(args$`input-file`) if(input_ext == "h5ad") { - seurat <- Seurat::ReadH5AD(file = args$`input-file`) - if(!("pca" %in% names(seurat@reductions)) || is.null(x = seurat@reductions$pca)) - stop("Expects a PCA embeddings data matrix but it does not exist.") - data <- seurat@reductions$pca - metadata <- seurat@meta.data + # Current fix until https://github.com/satijalab/seurat/issues/2485 is fixed + file <- hdf5r::h5file(filename = args$`input-file`, mode = 'r') + if(!("X_pca" %in% names(x = file[["obsm"]]))) { + stop("HI") + } + obs <- file[['obs']][] + pca_embeddings <- t(x = file[["obsm"]][["X_pca"]][,]) + row.names(x = pca_embeddings) <- obs$index + colnames(x = pca_embeddings) <- paste0("PCA_", seq(from = 1, to = ncol(x = pca_embeddings))) + metadata <- obs + # seurat <- Seurat::ReadH5AD(file = args$`input-file`) + # if(!("pca" %in% names(seurat@reductions)) || is.null(x = seurat@reductions$pca)) + # stop("Expects a PCA embeddings data matrix but it does not exist.") + # data <- seurat@reductions$pca + # pca_embeddings <- data@cell.embeddings + # metadata <- seurat@meta.data } else { stop(paste0("Unrecognized input file format: ", input_ext, ".")) } @@ -68,7 +79,8 @@ if(sum(args$`vars-use` %in% colnames(x = metadata)) != length(x = args$`vars-use } # Run Harmony -harmony_embeddings <- harmony::HarmonyMatrix(data_mat = data@cell.embeddings +# Expects PCA matrix (Cells as rows and PCs as columns.) +harmony_embeddings <- harmony::HarmonyMatrix(data_mat = pca_embeddings , meta_data = metadata , vars_use = args$`vars-use` , do_pca = args$`do-pca`