vib-singlecell-nf · dweemx · Mar 10, 2020 · Mar 6, 2020 · Mar 6, 2020 · Mar 10, 2020
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1 @@
+bin/reports/* linguist-vendored
diff --git a/bin/reports/sc_harmony_report.ipynb b/bin/reports/sc_harmony_report.ipynb
@@ -28,7 +28,7 @@
    "source": [
     "params = json.loads(WORKFLOW_PARAMETERS)\n",
     "bec_params = params[\"sc\"][\"harmony\"]\n",
-    "batch = \"batch\" if \"batch\" in bec_params[\"varsUse\"] else \"sample_id\""
+    "batch = bec_params[\"varsUse\"][0] if \"varsUse\" in bec_params else \"sample_id\""
    ]
   },
   {
@@ -149,20 +149,41 @@
    "outputs": [],
    "source": [
     "a = 0.6 # alpha setting\n",
-    "fig, ((ax1,ax2), (ax3,ax4),(ax5,ax6)) = plt.subplots(3,2, figsize=(10,10), dpi=150 )\n",
-    "sc.pl.tsne(adata1, color='sample_id', alpha=a, ax=ax1, show=False, wspace=0.5)\n",
-    "sc.pl.tsne(adata2, color='sample_id', alpha=a, ax=ax2, show=False, wspace=0.5)\n",
-    "sc.pl.tsne(adata1, color=batch, alpha=a, ax=ax3, show=False, wspace=0.5, title='batch')\n",
-    "sc.pl.tsne(adata2, color=batch, alpha=a, ax=ax4, show=False, wspace=0.5, title='batch')\n",
-    "sc.pl.tsne(adata1, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax5, show=False, wspace=0.5)\n",
-    "sc.pl.tsne(adata2, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax6, show=False, wspace=0.5)\n",
+    "number_of_subplots=len(annotations_to_plot)\n",
+    "fig, (axs) = plt.subplots(number_of_subplots,2, figsize=(10,5*number_of_subplots), dpi=150 )\n",
     "\n",
-    "ax1.set_title('Pre-batch correction (sample)')\n",
-    "ax2.set_title('Post-batch correction (sample)')\n",
-    "ax3.set_title('Pre-batch correction (batch)')\n",
-    "ax4.set_title('Post-batch correction (batch)')\n",
-    "ax5.set_title(f'Pre-batch correction ({clustering_algorithm.capitalize()})')\n",
-    "ax6.set_title(f'Post-batch correction ({clustering_algorithm.capitalize()})')\n",
+    "for i,v in enumerate(range(number_of_subplots)):\n",
+    "    annotation_to_plot = annotations_to_plot[i]\n",
+    "    ax1 = axs[0] if number_of_subplots == 1 else axs[i%2][0]\n",
+    "    ax1 = sc.pl.tsne(adata1, color=annotation_to_plot, alpha=a, ax=ax1, show=False, wspace=0.5)\n",
+    "    ax1.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n",
+    "    ax1.set_title(f\"Pre-batch correction ({annotation_to_plot})\")\n",
+    "    ax2 = axs[1] if number_of_subplots == 1 else axs[i%2][1]\n",
+    "    ax2 = sc.pl.tsne(adata2, color=annotation_to_plot, alpha=a, ax=ax2, show=False, wspace=0.5)\n",
+    "    ax2.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n",
+    "    ax2.set_title(f\"Post-batch correction ({annotation_to_plot})\")\n",
+    "\n",
+    "plt.tight_layout()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ((ax1,ax2), (ax3,ax4)) = plt.subplots(2,2, figsize=(10,10), dpi=150 )\n",
+    "ax1 = sc.pl.tsne(adata1, color=batch, alpha=a, ax=ax1, show=False, wspace=0.5, title='batch')\n",
+    "ax2 = sc.pl.tsne(adata2, color=batch, alpha=a, ax=ax2, show=False, wspace=0.5, title='batch')\n",
+    "ax1.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n",
+    "ax2.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n",
+    "sc.pl.tsne(adata1, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax3, show=False, wspace=0.5)\n",
+    "sc.pl.tsne(adata2, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax4, show=False, wspace=0.5)\n",
+    "\n",
+    "ax1.set_title('Pre-batch correction (batch)')\n",
+    "ax2.set_title('Post-batch correction (batch)')\n",
+    "ax3.set_title(f'Pre-batch correction ({clustering_algorithm.capitalize()})')\n",
+    "ax4.set_title(f'Post-batch correction ({clustering_algorithm.capitalize()})')\n",
     "#\n",
     "plt.tight_layout()"
    ]
@@ -181,20 +202,41 @@
    "outputs": [],
    "source": [
     "a = 0.6 # alpha setting\n",
-    "fig, ((ax1,ax2), (ax3,ax4),(ax5,ax6)) = plt.subplots(3,2, figsize=(10,10), dpi=150 )\n",
-    "sc.pl.umap(adata1, color='sample_id', alpha=a, ax=ax1, show=False, wspace=0.5)\n",
-    "sc.pl.umap(adata2, color='sample_id', alpha=a, ax=ax2, show=False, wspace=0.5)\n",
-    "sc.pl.umap(adata1, color=batch, alpha=a, ax=ax3, show=False, wspace=0.5, title='batch')\n",
-    "sc.pl.umap(adata2, color=batch, alpha=a, ax=ax4, show=False, wspace=0.5, title='batch')\n",
-    "sc.pl.umap(adata1, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax5, show=False, wspace=0.5)\n",
-    "sc.pl.umap(adata2, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax6, show=False, wspace=0.5)\n",
+    "number_of_subplots=len(annotations_to_plot)\n",
+    "fig, (axs) = plt.subplots(number_of_subplots,2, figsize=(10,5*number_of_subplots), dpi=150 )\n",
+    "\n",
+    "for i,v in enumerate(range(number_of_subplots)):\n",
+    "    annotation_to_plot = annotations_to_plot[i]\n",
+    "    ax1 = axs[0] if number_of_subplots == 1 else axs[i%2][0]\n",
+    "    ax1 = sc.pl.umap(adata1, color=annotation_to_plot, alpha=a, ax=ax1, show=False, wspace=0.5)\n",
+    "    ax1.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n",
+    "    ax1.set_title(f\"Pre-batch correction ({annotation_to_plot})\")\n",
+    "    ax2 = axs[1] if number_of_subplots == 1 else axs[i%2][1]\n",
+    "    ax2 = sc.pl.umap(adata2, color=annotation_to_plot, alpha=a, ax=ax2, show=False, wspace=0.5)\n",
+    "    ax2.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n",
+    "    ax2.set_title(f\"Post-batch correction ({annotation_to_plot})\")\n",
     "\n",
-    "ax1.set_title('Pre-batch correction (sample)')\n",
-    "ax2.set_title('Post-batch correction (sample)')\n",
-    "ax3.set_title('Pre-batch correction (batch)')\n",
-    "ax4.set_title('Post-batch correction (batch)')\n",
-    "ax5.set_title(f'Pre-batch correction ({clustering_algorithm.capitalize()})')\n",
-    "ax6.set_title(f'Post-batch correction ({clustering_algorithm.capitalize()})')\n",
+    "plt.tight_layout()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ((ax1,ax2), (ax3,ax4)) = plt.subplots(2,2, figsize=(10,10), dpi=150 )\n",
+    "ax1 = sc.pl.umap(adata1, color=batch, alpha=a, ax=ax1, show=False, wspace=0.5, title='batch')\n",
+    "ax2 = sc.pl.umap(adata2, color=batch, alpha=a, ax=ax2, show=False, wspace=0.5, title='batch')\n",
+    "ax1.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n",
+    "ax2.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n",
+    "sc.pl.umap(adata1, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax3, show=False, wspace=0.5)\n",
+    "sc.pl.umap(adata2, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax4, show=False, wspace=0.5)\n",
+    "\n",
+    "ax1.set_title('Pre-batch correction (batch)')\n",
+    "ax2.set_title('Post-batch correction (batch)')\n",
+    "ax3.set_title(f'Pre-batch correction ({clustering_algorithm.capitalize()})')\n",
+    "ax4.set_title(f'Post-batch correction ({clustering_algorithm.capitalize()})')\n",
     "#\n",
     "plt.tight_layout()"
    ]
@@ -214,10 +256,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "fig, ((ax1,ax2),(ax3,ax4)) = plt.subplots(2,2, figsize=(15,8), dpi=150 )\n",
-    "barPlotByAnnotation(adata1.obs, axis1=ax1, axis2=ax3, title=\"Pre-batch correction (sample)\", clustering_algorithm=clustering_algorithm, annotation=\"sample_id\")\n",
-    "barPlotByAnnotation(adata2.obs, axis1=ax2, axis2=ax4, title=\"Post-batch correction (sample)\", clustering_algorithm=clustering_algorithm, annotation=\"sample_id\")\n",
-    "plt.tight_layout()"
+    "for annotation_to_plot in annotations_to_plot:\n",
+    "    fig, ((ax1,ax2),(ax3,ax4)) = plt.subplots(2,2, figsize=(15,8), dpi=150 )\n",
+    "    barPlotByAnnotation(adata1.obs, axis1=ax1, axis2=ax3, title=\"Pre-batch correction (sample)\", clustering_algorithm=clustering_algorithm, annotation=annotation_to_plot)\n",
+    "    barPlotByAnnotation(adata2.obs, axis1=ax2, axis2=ax4, title=\"Post-batch correction (sample)\", clustering_algorithm=clustering_algorithm, annotation=annotation_to_plot)\n",
+    "    plt.tight_layout()"
    ]
   },
   {

diff --git a/bin/run_harmony.R b/bin/run_harmony.R
@@ -52,11 +52,22 @@ if(is.null(args$`vars-use`)) {
 input_ext <- tools::file_ext(args$`input-file`)
 
 if(input_ext == "h5ad") {
-  seurat <- Seurat::ReadH5AD(file = args$`input-file`)
-  if(!("pca" %in% names(seurat@reductions)) || is.null(x = seurat@reductions$pca))
-    stop("Expects a PCA embeddings data matrix but it does not exist.")
-  data <- seurat@reductions$pca
-  metadata <- seurat@meta.data
+  # Current fix until https://github.com/satijalab/seurat/issues/2485 is fixed
+  file <- hdf5r::h5file(filename = args$`input-file`, mode = 'r')
+  if(!("X_pca" %in% names(x = file[["obsm"]]))) {
+    stop("HI")
+  }
+  obs <- file[['obs']][]
+  pca_embeddings <- t(x = file[["obsm"]][["X_pca"]][,])
+  row.names(x = pca_embeddings) <- obs$index
+  colnames(x = pca_embeddings) <- paste0("PCA_", seq(from = 1, to = ncol(x = pca_embeddings)))
+  metadata <- obs
+  # seurat <- Seurat::ReadH5AD(file = args$`input-file`)
+  # if(!("pca" %in% names(seurat@reductions)) || is.null(x = seurat@reductions$pca))
+  #   stop("Expects a PCA embeddings data matrix but it does not exist.")
+  # data <- seurat@reductions$pca
+  # pca_embeddings <- data@cell.embeddings
+  # metadata <- seurat@meta.data
 } else {
   stop(paste0("Unrecognized input file format: ", input_ext, "."))
 }
@@ -68,7 +79,8 @@ if(sum(args$`vars-use` %in% colnames(x = metadata)) != length(x = args$`vars-use
 }
 
 # Run Harmony
-harmony_embeddings <- harmony::HarmonyMatrix(data_mat = data@cell.embeddings
+# Expects PCA matrix (Cells as rows and PCs as columns.)
+harmony_embeddings <- harmony::HarmonyMatrix(data_mat = pca_embeddings
                                              , meta_data = metadata
                                              , vars_use = args$`vars-use`
                                              , do_pca = args$`do-pca`