content

Cell marker genes

Download /gdp from step7 on your working path WDP, cell marker genes and seurat object are the de_genes and seu.int, respectively

WDP='/Volumes/F/for_my_website/scrnabox0/related_to_scrnaR'
setwd(WDP)
mk_genes<-readRDS(paste0(WDP, "/gdp/de_genes.rds"))
seu.int<-readRDS(paste0(WDP, "/gdp/seu.int.c.rds"))

Select top 2

mk_genes %>%
    group_by(cluster) %>%
    top_n(-2, p_val_adj) -> top_sel
top_sel
## # A tibble: 15 × 7
## # Groups:   cluster [7]
##        p_val avg_log2FC pct.1 pct.2 p_val_adj cluster     gene      
##        <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>       <chr>     
##  1 0              1.09  0.802 0.573 0         DAneurons   FEV       
##  2 0              0.931 0.928 0.767 0         DAneurons   NTM       
##  3 0              0.750 0.85  0.605 0         DAneurons   LUZP2     
##  4 1.13e-105      0.266 0.33  0.813 2.26e-102 other       MTHFD2    
##  5 2.74e- 85      0.364 0.42  0.786 5.47e- 82 other       PCBD1     
##  6 4.61e-297      0.377 0.535 0.245 9.23e-294 neurons     AC022075.1
##  7 1.03e-273      0.752 0.875 0.707 2.06e-270 neurons     CNR1      
##  8 1.14e-260      0.633 0.643 0.284 2.27e-257 progenitors COL2A1    
##  9 4.05e-224      1.45  0.677 0.378 8.11e-221 progenitors SULF1     
## 10 1.15e-189      0.834 0.815 0.387 2.30e-186 ependymal   NKX2-2    
## 11 7.85e-149      0.863 0.991 0.779 1.57e-145 ependymal   POU2F2    
## 12 2.74e-222      2.55  0.921 0.427 5.48e-219 Glia        HOXA5     
## 13 1.34e-207      1.79  0.986 0.609 2.68e-204 Glia        LINC02381 
## 14 1.25e-221      0.633 0.93  0.27  2.50e-218 neuroblasts VSX2      
## 15 4.07e-208      1.71  0.917 0.274 8.13e-205 neuroblasts MAB21L1

See the plot of genes:

mypar(3, 3, mar = c(5, 5, 3, 3))
for (i in unique(top_sel$cluster)) {
    barplot(sort(setNames(top_sel$avg_log2FC, top_sel$gene)[top_sel$cluster == i], F),
        horiz = T, las = 1, main = paste0(i, " vs. rest"),)
    abline(v = c(0, 0.25))
}

Visualize via heatmap

seu.int_sel <- ScaleData(seu.int, features = as.character(unique(top_sel$gene)), assay = "RNA")
## Centering and scaling data matrix
DoHeatmap(seu.int_sel, features = as.character(unique(top_sel$gene)), group.by = "cell.types.pool",    assay = "RNA")

Visualize the overall expression via dot plot.

 DotPlot(seu.int_sel, features = rev(as.character(unique(top_sel$gene))), group.by = "cell.types.pool",
    assay = "RNA") + coord_flip()

Visulaize via violin plot

VlnPlot(seu.int_sel, features = as.character(unique(top_sel$gene)[1:5]), ncol = 5, group.by = "cell.types.pool", assay = "RNA")

Working with DGE Contrast

Download the contrasts relust from step 7, ### Volcano plot

suppressPackageStartupMessages({
library(EnhancedVolcano)
})
cont_a53t<-read.csv(paste0(WDP,'/cont_main/design1.csv'))
EnhancedVolcano(cont_a53t,lab = rownames(cont_a53t),x = 'logFC',  y = 'P.Value', FCcutoff=1, title = 'A53T - rest', drawConnectors = TRUE, max.overlaps= 30)
## Warning: One or more p-values is 0. Converting to 10^-1 * current lowest non-
## zero p-value...

ggsave(file = "EnhancedVolcano2.pdf")
## Saving 7 x 5 in image
suppressPackageStartupMessages({
library(htmlwidgets)
library(plotly)
})
cont_a53t_sig<-cont_a53t[cont_a53t$adj.P.Val<0.05,]
p <- plot_ly(data = cont_a53t_sig, x = cont_a53t_sig$logFC, y = -log(cont_a53t_sig$adj.P.Val), text = cont_a53t_sig$X, mode = "markers") %>% layout(title ="A53T vs rest")
p
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
#saveWidget(p, "A53T_vs_rest.html")

Spliting

If you want to split the contrast into up and down, you can run the following codes

file_path=paste0(WDP,'/cont_inte')
dest_path=paste0(WDP,'/cont_split')
scrnaboxR::split_up_down(file_path,dest_path,cut_pvalue=0.05,cut_log_low=-0.25,cut_log_up=0.25, n_row=200)

Distance between contrasts

file0<-extract_file(dest_path)
con<-file0[[2]]
#png(file = paste0(WDP,"/heatmap.png"), bg = "transparent")
image(1:dim, 1:dim, dst, axes = FALSE, xlab="", ylab="")
label0<-file0[[1]]
axis(1, 1:dim,label0 , cex.axis = 0.5, las=3)
axis(2, 1:dim, label0, cex.axis = 0.5, las=1)
text(expand.grid(1:dim, 1:dim), sprintf("%0.1f", dst), cex=0.6)

#dev.off()

Dit plot

gwas_genes<-read.csv(file=paste0(WDP,'/all_PD_gwas_loci_genes_filtered.csv'))$hgnc_symbol
file_path=paste0(WDP,'/cont_split')
des_path=paste0(WDP,'/Output')
dittplot(gwas_genes,file_path,des_path)
## Saving 7 x 5 in image
## Saving 7 x 5 in image

Gene Set Enrichment Analysis (GSEA)

genes_rank <- setNames(mk_genes$avg_log2FC, casefold(rownames(mk_genes),upper = T))
head(genes_rank)
##       FEV       NTM     LUZP2    ZNF503    PLXDC2     GRIA4 
## 1.0922771 0.9308030 0.7504550 0.7777264 0.6357738 0.6472063
msigdbgmt <- msigdbr::msigdbr("Homo sapiens")
msigdbgmt <- as.data.frame(msigdbgmt)

You check the list of available gene sets

unique(msigdbgmt$gs_subcat)
##  [1] "MIR:MIR_Legacy"  "TFT:TFT_Legacy"  "CGP"             "TFT:GTRD"       
##  [5] ""                "VAX"             "CP:BIOCARTA"     "CGN"            
##  [9] "GO:BP"           "GO:CC"           "IMMUNESIGDB"     "GO:MF"          
## [13] "HPO"             "CP:KEGG"         "MIR:MIRDB"       "CM"             
## [17] "CP"              "CP:PID"          "CP:REACTOME"     "CP:WIKIPATHWAYS"

Select the subset that you need.

msigdbgmt_subset <- msigdbgmt[msigdbgmt$gs_subcat == "GO:BP", ]
gmt <- lapply(unique(msigdbgmt_subset$gs_name), function(x) {
    msigdbgmt_subset[msigdbgmt_subset$gs_name == x, "gene_symbol"]
})
names(gmt) <- unique(paste0(msigdbgmt_subset$gs_name, "_", msigdbgmt_subset$gs_exact_source))
gmt %>% head() %>% lapply(head)
## $`GOBP_10_FORMYLTETRAHYDROFOLATE_METABOLIC_PROCESS_GO:0009256`
## [1] "AASDHPPT" "ALDH1L1"  "ALDH1L2"  "MTHFD1"   "MTHFD1L"  "MTHFD2L" 
## 
## $`GOBP_2_OXOGLUTARATE_METABOLIC_PROCESS_GO:0006103`
## [1] "AADAT"  "ADHFE1" "D2HGDH" "DLST"   "GOT1"   "GOT2"  
## 
## $`GOBP_2FE_2S_CLUSTER_ASSEMBLY_GO:0044571`
## [1] "BOLA2"  "BOLA2B" "GLRX3"  "GLRX5"  "HSCB"   "NFS1"  
## 
## $`GOBP_3_PHOSPHOADENOSINE_5_PHOSPHOSULFATE_BIOSYNTHETIC_PROCESS_GO:0050428`
## [1] "PAPSS1"  "PAPSS2"  "SLC26A1" "SLC26A2" "SLC35B2" "SLC35B3"
## 
## $`GOBP_3_PHOSPHOADENOSINE_5_PHOSPHOSULFATE_METABOLIC_PROCESS_GO:0050427`
## [1] "ABHD14B" "BPNT1"   "ENPP1"   "PAPSS1"  "PAPSS2"  "SLC26A1"
## 
## $`GOBP_3_UTR_MEDIATED_MRNA_DESTABILIZATION_GO:0061158`
## [1] "CPEB3"  "DHX36"  "DHX36"  "DND1"   "DND1"   "HNRNPD"

conduct enrichemnt analysis and order according the ES

fgseaRes <- fgsea(pathways = gmt, stats = genes_rank, minSize = 15, maxSize = 500, scoreType = "pos")
fgseaRes <- fgseaRes[order(fgseaRes$ES, decreasing = T), ]
barplot(sort(genes_rank, decreasing = T))

head(fgseaRes[order( -abs(NES)), ], n=10)
##                                                      pathway         pval
##  1:                          GOBP_PROTEIN_FOLDING_GO:0006457 1.043428e-07
##  2:                   GOBP_GLIAL_CELL_DEVELOPMENT_GO:0021782 1.555491e-03
##  3:     GOBP_MULTI_MULTICELLULAR_ORGANISM_PROCESS_GO:0044706 1.776387e-03
##  4:             GOBP_PEPTIDE_BIOSYNTHETIC_PROCESS_GO:0043043 1.066019e-04
##  5:                  GOBP_CYTOPLASMIC_TRANSLATION_GO:0002181 2.941356e-03
##  6:                    GOBP_RESPONSE_TO_METAL_ION_GO:0010038 3.089347e-03
##  7:               GOBP_AMIDE_BIOSYNTHETIC_PROCESS_GO:0043604 5.266381e-04
##  8: GOBP_RESPONSE_TO_ENDOPLASMIC_RETICULUM_STRESS_GO:0034976 2.127409e-03
##  9:    GOBP_PROTEASOMAL_PROTEIN_CATABOLIC_PROCESS_GO:0010498 8.892796e-03
## 10: GOBP_CELLULAR_RESPONSE_TO_INORGANIC_SUBSTANCE_GO:0071241 9.341162e-03
##             padj   log2err        ES      NES size
##  1: 6.271004e-05 0.7049757 0.7576315 2.446605   20
##  2: 1.334511e-01 0.4550599 0.6283919 1.946792   15
##  3: 1.334511e-01 0.4550599 0.6017259 1.930405   19
##  4: 3.203386e-02 0.5384341 0.5549910 1.919229   36
##  5: 1.428229e-01 0.4317077 0.6095427 1.888396   15
##  6: 1.428229e-01 0.4317077 0.5672391 1.852131   22
##  7: 1.051354e-01 0.4772708 0.5270206 1.846684   40
##  8: 1.345279e-01 0.4317077 0.5386361 1.815753   29
##  9: 2.545033e-01 0.3807304 0.5585477 1.791884   19
## 10: 2.551836e-01 0.3807304 0.5626956 1.789993   18
##                                  leadingEdge
##  1:  PDIA6,HSP90B1,HSPA5,CALR,PPIB,PDIA3,...
##  2:         VIM,MDK,SHH,NKX6-2,CLU,PRDM8,...
##  3:           IGFBP5,IGFBP2,CALR,TIMP1,NR2F2
##  4:    IGFBP5,VIM,CALR,DNAJC3,RPL12,ENC1,...
##  5: RPL12,RPS12,RPS20,RPS11,RPS27,RPL27A,...
##  6: IGFBP2,HSPA5,CALR,SERPINF1,SHH,CPNE8,...
##  7:    IGFBP5,VIM,CALR,DNAJC3,RPL12,ENC1,...
##  8:  HSP90B1,HSPA5,CALR,PDIA3,PDIA4,MANF,...
##  9:    HSP90B1,HSPA5,CALR,SDF2L1,SHH,CLU,...
## 10:      HSPA5,CALR,SERPINF1,SHH,CPNE8,SYT11

get enrichment score plot for a specific pathway

plotEnrichment(gmt[["GOBP_ACTIN_FILAMENT_BASED_PROCESS_GO:0030029"]], genes_rank)

You can plot enrichment for multiple pathways.

top_up <- fgseaRes %>% filter(ES > 0) %>% top_n(10, wt=-padj)
top_down <- fgseaRes %>% filter(ES < 0) %>% top_n(10, wt=-padj)
top_pathways <- bind_rows(top_up, top_down) %>% arrange(-ES)
plotGseaTable(gmt[top_pathways$pathway], genes_rank, fgseaRes, gseaParam = 0.5)

The following barplot can be used to compare Hallmark pathways (obtained from https://www.biostars.org/p/467197/)

fgseaResTidy <- fgseaRes %>% as_tibble() %>% arrange(desc(NES))
fgseaResTidy %>% dplyr::select(-leadingEdge, -ES) %>% arrange(padj) %>% DT::datatable()
#ggplot(fgseaResTidy, aes(reorder(pathway, NES), NES)) + geom_col(aes(fill=padj<0.05)) +coord_flip() +labs(x="Pathway", y="Normalized Enrichment Score",title="Hallmark pathways NES from GSEA") + theme_minimal()

Gene Ontology gene sets (KEGG gene sets)

setwd(paste0(WDP,'/Output'))
file0='design1down'
sel_genes <- read.csv(paste0(WDP,'/cont_split/',file0,'.csv',sep=""))[,1]
sel_genes_go <- clusterProfiler::enrichGO(sel_genes,"org.Hs.eg.db",keyType = "SYMBOL",ont = "BP",minGSSize = 50)
write.csv(sel_genes_go@result,file = paste(file0,'enrichGO','.csv',sep=''))
enr_go <- clusterProfiler::simplify(sel_genes_go)
enrichplot::emapplot(enrichplot::pairwise_termsim(enr_go),showCategory = 30, cex_label_category = 0.5)

ggsave(file = "enrich1.pdf")
## Saving 7 x 5 in image
dotplot(enrichplot::pairwise_termsim(enr_go))

ggsave(file = "dot1.pdf")
## Saving 7 x 5 in image
cnetplot(enrichplot::pairwise_termsim(enr_go)) 
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

ggsave(file = "cnet1.pdf")
## Saving 7 x 5 in image
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
gmt <- msigdbr::msigdbr(species = "human", category = "H")
sel_go0 <- clusterProfiler::enricher(gene = sel_genes,,pAdjustMethod = "BH",pvalueCutoff  = 0.05, qvalueCutoff  = 0.05, TERM2GENE = gmt[,c("gs_name", "gene_symbol")])
write.csv(sel_go0@result,file = paste(file0,'enricher','.csv',sep=''))

The follwoing run the KEGG pathway enrichment analysis.

sel_genes_id<-mapIds(org.Hs.eg.db, sel_genes, 'ENTREZID', 'SYMBOL',multiVals=last)
## 'select()' returned 1:1 mapping between keys and columns
get_ENTREZID <- function(x){
  ab<-NULL
  for (i in 1: length(x)){
    ab[i]<-x[[i]]
  }
  message("There are ", sum(is.na(ab)), " NA and they are dropped")
  ret<-ab
  }
genes_id<-get_ENTREZID(sel_genes_id)
## There are 8 NA and they are dropped
enrich_kegg <- enrichKEGG(gene = genes_id)
## Reading KEGG annotation online:
## Reading KEGG annotation online:
head(enrich_kegg, n=10)
##                ID                                       Description GeneRatio
## hsa03010 hsa03010                                          Ribosome    69/146
## hsa05171 hsa05171                    Coronavirus disease - COVID-19    69/146
## hsa05012 hsa05012                                 Parkinson disease    28/146
## hsa05010 hsa05010                                 Alzheimer disease    24/146
## hsa05022 hsa05022 Pathways of neurodegeneration - multiple diseases    27/146
## hsa05016 hsa05016                                Huntington disease    21/146
## hsa00190 hsa00190                         Oxidative phosphorylation    14/146
## hsa05020 hsa05020                                     Prion disease    19/146
## hsa05415 hsa05415                           Diabetic cardiomyopathy    15/146
## hsa05014 hsa05014                     Amyotrophic lateral sclerosis    20/146
##           BgRatio       pvalue     p.adjust       qvalue
## hsa03010 158/8163 3.685369e-84 5.712322e-82 4.926756e-82
## hsa05171 232/8163 3.355049e-70 2.600163e-68 2.242585e-68
## hsa05012 266/8163 1.531524e-14 7.912872e-13 6.824684e-13
## hsa05010 384/8163 6.521277e-08 2.061174e-06 1.777719e-06
## hsa05022 476/8163 6.648949e-08 2.061174e-06 1.777719e-06
## hsa05016 306/8163 9.896843e-08 2.205557e-06 1.902246e-06
## hsa00190 134/8163 9.960579e-08 2.205557e-06 1.902246e-06
## hsa05020 273/8163 3.440513e-07 6.665994e-06 5.749278e-06
## hsa05415 203/8163 3.090954e-06 5.323310e-05 4.591242e-05
## hsa05014 364/8163 6.690134e-06 1.036971e-04 8.943653e-05
##                                                                                                                                                                                                                                                                                                                                                               geneID
## hsa03010 6128/6229/6141/6134/6202/6164/6233/6138/6171/6189/6176/6209/6228/6159/6133/6135/6132/3921/6158/6191/6156/6143/6137/6161/6154/6187/6223/6210/6122/6142/23521/6144/6217/6207/6188/6168/6125/6208/6206/6231/6169/6230/9349/6152/6147/6203/2197/6201/11224/9045/6222/6165/6175/6130/6167/6193/6129/25873/6234/7311/4736/6232/6235/6181/6160/6146/6205/6155/6170
## hsa05171 6128/6229/6141/6134/6202/6164/6233/6138/6171/6189/6176/6209/6228/6159/6133/6135/6132/3921/6158/6191/6156/6143/6137/6161/6154/6187/6223/6210/6122/6142/23521/6144/6217/6207/6188/6168/6125/6208/6206/6231/6169/6230/9349/6152/6147/6203/2197/6201/11224/9045/6222/6165/6175/6130/6167/6193/6129/25873/6234/7311/4736/6232/6235/6181/6160/6146/6205/6155/6170
## hsa05012                                                                                                                                                                                                                6233/7345/1350/805/7316/347733/7417/7846/801/9377/4697/29796/517/7314/5688/518/7416/7388/7280/6622/1337/7311/4725/4729/1347/4718/203068/7295
## hsa05010                                                                                                                                                                                                                                   1350/2597/805/347733/7417/7846/801/9377/4697/29796/517/5688/518/7416/7388/7280/6622/1337/10313/4725/4729/1347/4718/203068
## hsa05022                                                                                                                                                                                                                     6233/7345/1350/805/7316/347733/7417/7846/801/9377/4697/29796/517/7314/5688/518/7416/7388/7280/6622/1337/7311/4725/4729/1347/4718/203068
## hsa05016                                                                                                                                                                                                                                                 1350/347733/7417/7846/9377/4697/29796/517/5688/518/7416/7388/7280/1337/4725/1211/4729/1347/4718/1173/203068
## hsa00190                                                                                                                                                                                                                                                                                       1350/9550/9377/4697/29796/517/518/7388/1337/4725/4729/1347/4718/10632
## hsa05020                                                                                                                                                                                                                                                           1350/347733/7417/7846/9377/4697/29796/517/5688/518/7416/7388/7280/1337/4725/4729/1347/4718/203068
## hsa05415                                                                                                                                                                                                                                                                                   1350/2597/7417/9377/4697/29796/517/518/7416/7388/1337/4725/4729/1347/4718
## hsa05014                                                                                                                                                                                                                                                          71/1350/60/347733/7846/9377/4697/29796/517/5688/518/7416/7388/7280/1337/4725/4729/1347/4718/203068
##          Count
## hsa03010    69
## hsa05171    69
## hsa05012    28
## hsa05010    24
## hsa05022    27
## hsa05016    21
## hsa00190    14
## hsa05020    19
## hsa05415    15
## hsa05014    20