From 788d07628bd63895929830a5520b29391f99d629 Mon Sep 17 00:00:00 2001 From: Peter Carbonetto Date: Thu, 9 Nov 2023 16:11:26 -0600 Subject: [PATCH] A few more improvements to the vignette. --- DESCRIPTION | 4 ++-- vignettes/intro_fastglmpca.Rmd | 43 ++++++++++++++++++++-------------- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index acf3ce7..d215c7e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Encoding: UTF-8 Type: Package Package: fastglmpca -Version: 0.1-75 -Date: 2023-10-25 +Version: 0.1-76 +Date: 2023-11-09 Title: Fast Algorithms for Generalized Principal Component Analysis Authors@R: c(person("Eric","Weine",role=c("aut","cre"), email="ericweine15@gmail.com"), diff --git a/vignettes/intro_fastglmpca.Rmd b/vignettes/intro_fastglmpca.Rmd index e95a5f8..497f296 100644 --- a/vignettes/intro_fastglmpca.Rmd +++ b/vignettes/intro_fastglmpca.Rmd @@ -140,27 +140,34 @@ ggplot(pdat,aes(x = obs,y = fitted)) + theme_cowplot(font_size = 12) ``` -Most users will be interested in `fit$U` and `fit$V`, the orthogonaal -matrices containing the latent representation of the data. As a basic -example, we can extract the loadings for each cell in the first and -second directions of variation as follows: - -```{r} -pc_df <- data.frame( - celltype = pbmc_facs$samples$celltype, - PC1 = fit$V[,1], - PC2 = fit$V[,2] -) -``` - -Then, we can visualize these loadings, coloring each point by cell type. - -```{r plot-V, fig.height=3, fig.width=5} -ggplot(pc_df,aes(x = PC1,y = PC2,color = celltype)) + - geom_point(size = 1.25) + +The U and V outputs in particular are interesting because they give +low-dimensional (in this case, 2-d) embeddings of the genes and cells, +respectively. Let's compare this 2-d embedding of the cells the +provided cell-type labels: + +```{r plot-v, fig.height=3, fig.width=4.5} +celltype_colors <- c("forestgreen","dodgerblue","darkmagenta", + "gray","hotpink","red") +celltype <- as.character(pbmc_facs$samples$celltype) +celltype[celltype == "CD4+/CD25 T Reg" | + celltype == "CD4+ T Helper2" | + celltype == "CD8+/CD45RA+ Naive Cytotoxic" | + celltype == "CD4+/CD45RA+/CD25- Naive T" | + celltype == "CD4+/CD45RO+ Memory"] <- "T cell" +celltype <- factor(celltype) +pdat <- data.frame(celltype = celltype, + pc1 = fit$V[,1], + pc2 = fit$V[,2]) +ggplot(pdat,aes(x = pc1,y = pc2,color = celltype)) + + geom_point() + + scale_color_manual(values = celltype_colors) + theme_cowplot(font_size = 10) ``` +The 2-d embedding clearly separates the CD34+ and CD14+ cells from the +others, and somewhat distinguishes the other cell types (B cells, T +cells, NK cells). + Session info ------------