content
- Cell marker genes
- Working with DGE Contrast
- Volcano plot
- split contrast
- Calculate distance between contrast
- ditt plot
- Gene Ontology gene sets (KEGG gene sets)
- Gene Set Enrichment Analysis (GSEA)
Cell marker genes
Download /gdp
from step7 on your working path
WDP
, cell marker genes and seurat object are the
de_genes
and seu.int
, respectively
WDP='/Volumes/F/for_my_website/scrnabox0/related_to_scrnaR'
setwd(WDP)
mk_genes<-readRDS(paste0(WDP, "/gdp/de_genes.rds"))
seu.int<-readRDS(paste0(WDP, "/gdp/seu.int.c.rds"))
Select top 2
mk_genes %>%
group_by(cluster) %>%
top_n(-2, p_val_adj) -> top_sel
top_sel
## # A tibble: 15 × 7
## # Groups: cluster [7]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 0 1.09 0.802 0.573 0 DAneurons FEV
## 2 0 0.931 0.928 0.767 0 DAneurons NTM
## 3 0 0.750 0.85 0.605 0 DAneurons LUZP2
## 4 1.13e-105 0.266 0.33 0.813 2.26e-102 other MTHFD2
## 5 2.74e- 85 0.364 0.42 0.786 5.47e- 82 other PCBD1
## 6 4.61e-297 0.377 0.535 0.245 9.23e-294 neurons AC022075.1
## 7 1.03e-273 0.752 0.875 0.707 2.06e-270 neurons CNR1
## 8 1.14e-260 0.633 0.643 0.284 2.27e-257 progenitors COL2A1
## 9 4.05e-224 1.45 0.677 0.378 8.11e-221 progenitors SULF1
## 10 1.15e-189 0.834 0.815 0.387 2.30e-186 ependymal NKX2-2
## 11 7.85e-149 0.863 0.991 0.779 1.57e-145 ependymal POU2F2
## 12 2.74e-222 2.55 0.921 0.427 5.48e-219 Glia HOXA5
## 13 1.34e-207 1.79 0.986 0.609 2.68e-204 Glia LINC02381
## 14 1.25e-221 0.633 0.93 0.27 2.50e-218 neuroblasts VSX2
## 15 4.07e-208 1.71 0.917 0.274 8.13e-205 neuroblasts MAB21L1
See the plot of genes:
mypar(3, 3, mar = c(5, 5, 3, 3))
for (i in unique(top_sel$cluster)) {
barplot(sort(setNames(top_sel$avg_log2FC, top_sel$gene)[top_sel$cluster == i], F),
horiz = T, las = 1, main = paste0(i, " vs. rest"),)
abline(v = c(0, 0.25))
}
Visualize via heatmap
seu.int_sel <- ScaleData(seu.int, features = as.character(unique(top_sel$gene)), assay = "RNA")
## Centering and scaling data matrix
DoHeatmap(seu.int_sel, features = as.character(unique(top_sel$gene)), group.by = "cell.types.pool", assay = "RNA")
Visualize the overall expression via dot plot.
DotPlot(seu.int_sel, features = rev(as.character(unique(top_sel$gene))), group.by = "cell.types.pool",
assay = "RNA") + coord_flip()
Visulaize via violin plot
VlnPlot(seu.int_sel, features = as.character(unique(top_sel$gene)[1:5]), ncol = 5, group.by = "cell.types.pool", assay = "RNA")
Working with DGE Contrast
Download the contrasts relust from step 7, ### Volcano plot
suppressPackageStartupMessages({
library(EnhancedVolcano)
})
cont_a53t<-read.csv(paste0(WDP,'/cont_main/design1.csv'))
EnhancedVolcano(cont_a53t,lab = rownames(cont_a53t),x = 'logFC', y = 'P.Value', FCcutoff=1, title = 'A53T - rest', drawConnectors = TRUE, max.overlaps= 30)
## Warning: One or more p-values is 0. Converting to 10^-1 * current lowest non-
## zero p-value...
ggsave(file = "EnhancedVolcano2.pdf")
## Saving 7 x 5 in image
suppressPackageStartupMessages({
library(htmlwidgets)
library(plotly)
})
cont_a53t_sig<-cont_a53t[cont_a53t$adj.P.Val<0.05,]
p <- plot_ly(data = cont_a53t_sig, x = cont_a53t_sig$logFC, y = -log(cont_a53t_sig$adj.P.Val), text = cont_a53t_sig$X, mode = "markers") %>% layout(title ="A53T vs rest")
p
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
#saveWidget(p, "A53T_vs_rest.html")
Spliting
If you want to split the contrast into up and down, you can run the following codes
file_path=paste0(WDP,'/cont_inte')
dest_path=paste0(WDP,'/cont_split')
scrnaboxR::split_up_down(file_path,dest_path,cut_pvalue=0.05,cut_log_low=-0.25,cut_log_up=0.25, n_row=200)
Distance between contrasts
file0<-extract_file(dest_path)
con<-file0[[2]]
#png(file = paste0(WDP,"/heatmap.png"), bg = "transparent")
image(1:dim, 1:dim, dst, axes = FALSE, xlab="", ylab="")
label0<-file0[[1]]
axis(1, 1:dim,label0 , cex.axis = 0.5, las=3)
axis(2, 1:dim, label0, cex.axis = 0.5, las=1)
text(expand.grid(1:dim, 1:dim), sprintf("%0.1f", dst), cex=0.6)
#dev.off()
Dit plot
gwas_genes<-read.csv(file=paste0(WDP,'/all_PD_gwas_loci_genes_filtered.csv'))$hgnc_symbol
file_path=paste0(WDP,'/cont_split')
des_path=paste0(WDP,'/Output')
dittplot(gwas_genes,file_path,des_path)
## Saving 7 x 5 in image
## Saving 7 x 5 in image
Gene Set Enrichment Analysis (GSEA)
genes_rank <- setNames(mk_genes$avg_log2FC, casefold(rownames(mk_genes),upper = T))
head(genes_rank)
## FEV NTM LUZP2 ZNF503 PLXDC2 GRIA4
## 1.0922771 0.9308030 0.7504550 0.7777264 0.6357738 0.6472063
msigdbgmt <- msigdbr::msigdbr("Homo sapiens")
msigdbgmt <- as.data.frame(msigdbgmt)
You check the list of available gene sets
unique(msigdbgmt$gs_subcat)
## [1] "MIR:MIR_Legacy" "TFT:TFT_Legacy" "CGP" "TFT:GTRD"
## [5] "" "VAX" "CP:BIOCARTA" "CGN"
## [9] "GO:BP" "GO:CC" "IMMUNESIGDB" "GO:MF"
## [13] "HPO" "CP:KEGG" "MIR:MIRDB" "CM"
## [17] "CP" "CP:PID" "CP:REACTOME" "CP:WIKIPATHWAYS"
Select the subset that you need.
msigdbgmt_subset <- msigdbgmt[msigdbgmt$gs_subcat == "GO:BP", ]
gmt <- lapply(unique(msigdbgmt_subset$gs_name), function(x) {
msigdbgmt_subset[msigdbgmt_subset$gs_name == x, "gene_symbol"]
})
names(gmt) <- unique(paste0(msigdbgmt_subset$gs_name, "_", msigdbgmt_subset$gs_exact_source))
gmt %>% head() %>% lapply(head)
## $`GOBP_10_FORMYLTETRAHYDROFOLATE_METABOLIC_PROCESS_GO:0009256`
## [1] "AASDHPPT" "ALDH1L1" "ALDH1L2" "MTHFD1" "MTHFD1L" "MTHFD2L"
##
## $`GOBP_2_OXOGLUTARATE_METABOLIC_PROCESS_GO:0006103`
## [1] "AADAT" "ADHFE1" "D2HGDH" "DLST" "GOT1" "GOT2"
##
## $`GOBP_2FE_2S_CLUSTER_ASSEMBLY_GO:0044571`
## [1] "BOLA2" "BOLA2B" "GLRX3" "GLRX5" "HSCB" "NFS1"
##
## $`GOBP_3_PHOSPHOADENOSINE_5_PHOSPHOSULFATE_BIOSYNTHETIC_PROCESS_GO:0050428`
## [1] "PAPSS1" "PAPSS2" "SLC26A1" "SLC26A2" "SLC35B2" "SLC35B3"
##
## $`GOBP_3_PHOSPHOADENOSINE_5_PHOSPHOSULFATE_METABOLIC_PROCESS_GO:0050427`
## [1] "ABHD14B" "BPNT1" "ENPP1" "PAPSS1" "PAPSS2" "SLC26A1"
##
## $`GOBP_3_UTR_MEDIATED_MRNA_DESTABILIZATION_GO:0061158`
## [1] "CPEB3" "DHX36" "DHX36" "DND1" "DND1" "HNRNPD"
conduct enrichemnt analysis and order according the ES
fgseaRes <- fgsea(pathways = gmt, stats = genes_rank, minSize = 15, maxSize = 500, scoreType = "pos")
fgseaRes <- fgseaRes[order(fgseaRes$ES, decreasing = T), ]
barplot(sort(genes_rank, decreasing = T))
head(fgseaRes[order( -abs(NES)), ], n=10)
## pathway pval
## 1: GOBP_PROTEIN_FOLDING_GO:0006457 1.043428e-07
## 2: GOBP_GLIAL_CELL_DEVELOPMENT_GO:0021782 1.555491e-03
## 3: GOBP_MULTI_MULTICELLULAR_ORGANISM_PROCESS_GO:0044706 1.776387e-03
## 4: GOBP_PEPTIDE_BIOSYNTHETIC_PROCESS_GO:0043043 1.066019e-04
## 5: GOBP_CYTOPLASMIC_TRANSLATION_GO:0002181 2.941356e-03
## 6: GOBP_RESPONSE_TO_METAL_ION_GO:0010038 3.089347e-03
## 7: GOBP_AMIDE_BIOSYNTHETIC_PROCESS_GO:0043604 5.266381e-04
## 8: GOBP_RESPONSE_TO_ENDOPLASMIC_RETICULUM_STRESS_GO:0034976 2.127409e-03
## 9: GOBP_PROTEASOMAL_PROTEIN_CATABOLIC_PROCESS_GO:0010498 8.892796e-03
## 10: GOBP_CELLULAR_RESPONSE_TO_INORGANIC_SUBSTANCE_GO:0071241 9.341162e-03
## padj log2err ES NES size
## 1: 6.271004e-05 0.7049757 0.7576315 2.446605 20
## 2: 1.334511e-01 0.4550599 0.6283919 1.946792 15
## 3: 1.334511e-01 0.4550599 0.6017259 1.930405 19
## 4: 3.203386e-02 0.5384341 0.5549910 1.919229 36
## 5: 1.428229e-01 0.4317077 0.6095427 1.888396 15
## 6: 1.428229e-01 0.4317077 0.5672391 1.852131 22
## 7: 1.051354e-01 0.4772708 0.5270206 1.846684 40
## 8: 1.345279e-01 0.4317077 0.5386361 1.815753 29
## 9: 2.545033e-01 0.3807304 0.5585477 1.791884 19
## 10: 2.551836e-01 0.3807304 0.5626956 1.789993 18
## leadingEdge
## 1: PDIA6,HSP90B1,HSPA5,CALR,PPIB,PDIA3,...
## 2: VIM,MDK,SHH,NKX6-2,CLU,PRDM8,...
## 3: IGFBP5,IGFBP2,CALR,TIMP1,NR2F2
## 4: IGFBP5,VIM,CALR,DNAJC3,RPL12,ENC1,...
## 5: RPL12,RPS12,RPS20,RPS11,RPS27,RPL27A,...
## 6: IGFBP2,HSPA5,CALR,SERPINF1,SHH,CPNE8,...
## 7: IGFBP5,VIM,CALR,DNAJC3,RPL12,ENC1,...
## 8: HSP90B1,HSPA5,CALR,PDIA3,PDIA4,MANF,...
## 9: HSP90B1,HSPA5,CALR,SDF2L1,SHH,CLU,...
## 10: HSPA5,CALR,SERPINF1,SHH,CPNE8,SYT11
get enrichment score plot for a specific pathway
plotEnrichment(gmt[["GOBP_ACTIN_FILAMENT_BASED_PROCESS_GO:0030029"]], genes_rank)
You can plot enrichment for multiple pathways.
top_up <- fgseaRes %>% filter(ES > 0) %>% top_n(10, wt=-padj)
top_down <- fgseaRes %>% filter(ES < 0) %>% top_n(10, wt=-padj)
top_pathways <- bind_rows(top_up, top_down) %>% arrange(-ES)
plotGseaTable(gmt[top_pathways$pathway], genes_rank, fgseaRes, gseaParam = 0.5)
The following barplot can be used to compare Hallmark pathways (obtained from https://www.biostars.org/p/467197/)
fgseaResTidy <- fgseaRes %>% as_tibble() %>% arrange(desc(NES))
fgseaResTidy %>% dplyr::select(-leadingEdge, -ES) %>% arrange(padj) %>% DT::datatable()
#ggplot(fgseaResTidy, aes(reorder(pathway, NES), NES)) + geom_col(aes(fill=padj<0.05)) +coord_flip() +labs(x="Pathway", y="Normalized Enrichment Score",title="Hallmark pathways NES from GSEA") + theme_minimal()
Gene Ontology gene sets (KEGG gene sets)
setwd(paste0(WDP,'/Output'))
file0='design1down'
sel_genes <- read.csv(paste0(WDP,'/cont_split/',file0,'.csv',sep=""))[,1]
sel_genes_go <- clusterProfiler::enrichGO(sel_genes,"org.Hs.eg.db",keyType = "SYMBOL",ont = "BP",minGSSize = 50)
write.csv(sel_genes_go@result,file = paste(file0,'enrichGO','.csv',sep=''))
enr_go <- clusterProfiler::simplify(sel_genes_go)
enrichplot::emapplot(enrichplot::pairwise_termsim(enr_go),showCategory = 30, cex_label_category = 0.5)
ggsave(file = "enrich1.pdf")
## Saving 7 x 5 in image
dotplot(enrichplot::pairwise_termsim(enr_go))
ggsave(file = "dot1.pdf")
## Saving 7 x 5 in image
cnetplot(enrichplot::pairwise_termsim(enr_go))
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggsave(file = "cnet1.pdf")
## Saving 7 x 5 in image
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
gmt <- msigdbr::msigdbr(species = "human", category = "H")
sel_go0 <- clusterProfiler::enricher(gene = sel_genes,,pAdjustMethod = "BH",pvalueCutoff = 0.05, qvalueCutoff = 0.05, TERM2GENE = gmt[,c("gs_name", "gene_symbol")])
write.csv(sel_go0@result,file = paste(file0,'enricher','.csv',sep=''))
The follwoing run the KEGG pathway enrichment analysis.
sel_genes_id<-mapIds(org.Hs.eg.db, sel_genes, 'ENTREZID', 'SYMBOL',multiVals=last)
## 'select()' returned 1:1 mapping between keys and columns
get_ENTREZID <- function(x){
ab<-NULL
for (i in 1: length(x)){
ab[i]<-x[[i]]
}
message("There are ", sum(is.na(ab)), " NA and they are dropped")
ret<-ab
}
genes_id<-get_ENTREZID(sel_genes_id)
## There are 8 NA and they are dropped
enrich_kegg <- enrichKEGG(gene = genes_id)
## Reading KEGG annotation online:
## Reading KEGG annotation online:
head(enrich_kegg, n=10)
## ID Description GeneRatio
## hsa03010 hsa03010 Ribosome 69/146
## hsa05171 hsa05171 Coronavirus disease - COVID-19 69/146
## hsa05012 hsa05012 Parkinson disease 28/146
## hsa05010 hsa05010 Alzheimer disease 24/146
## hsa05022 hsa05022 Pathways of neurodegeneration - multiple diseases 27/146
## hsa05016 hsa05016 Huntington disease 21/146
## hsa00190 hsa00190 Oxidative phosphorylation 14/146
## hsa05020 hsa05020 Prion disease 19/146
## hsa05415 hsa05415 Diabetic cardiomyopathy 15/146
## hsa05014 hsa05014 Amyotrophic lateral sclerosis 20/146
## BgRatio pvalue p.adjust qvalue
## hsa03010 158/8163 3.685369e-84 5.712322e-82 4.926756e-82
## hsa05171 232/8163 3.355049e-70 2.600163e-68 2.242585e-68
## hsa05012 266/8163 1.531524e-14 7.912872e-13 6.824684e-13
## hsa05010 384/8163 6.521277e-08 2.061174e-06 1.777719e-06
## hsa05022 476/8163 6.648949e-08 2.061174e-06 1.777719e-06
## hsa05016 306/8163 9.896843e-08 2.205557e-06 1.902246e-06
## hsa00190 134/8163 9.960579e-08 2.205557e-06 1.902246e-06
## hsa05020 273/8163 3.440513e-07 6.665994e-06 5.749278e-06
## hsa05415 203/8163 3.090954e-06 5.323310e-05 4.591242e-05
## hsa05014 364/8163 6.690134e-06 1.036971e-04 8.943653e-05
## geneID
## hsa03010 6128/6229/6141/6134/6202/6164/6233/6138/6171/6189/6176/6209/6228/6159/6133/6135/6132/3921/6158/6191/6156/6143/6137/6161/6154/6187/6223/6210/6122/6142/23521/6144/6217/6207/6188/6168/6125/6208/6206/6231/6169/6230/9349/6152/6147/6203/2197/6201/11224/9045/6222/6165/6175/6130/6167/6193/6129/25873/6234/7311/4736/6232/6235/6181/6160/6146/6205/6155/6170
## hsa05171 6128/6229/6141/6134/6202/6164/6233/6138/6171/6189/6176/6209/6228/6159/6133/6135/6132/3921/6158/6191/6156/6143/6137/6161/6154/6187/6223/6210/6122/6142/23521/6144/6217/6207/6188/6168/6125/6208/6206/6231/6169/6230/9349/6152/6147/6203/2197/6201/11224/9045/6222/6165/6175/6130/6167/6193/6129/25873/6234/7311/4736/6232/6235/6181/6160/6146/6205/6155/6170
## hsa05012 6233/7345/1350/805/7316/347733/7417/7846/801/9377/4697/29796/517/7314/5688/518/7416/7388/7280/6622/1337/7311/4725/4729/1347/4718/203068/7295
## hsa05010 1350/2597/805/347733/7417/7846/801/9377/4697/29796/517/5688/518/7416/7388/7280/6622/1337/10313/4725/4729/1347/4718/203068
## hsa05022 6233/7345/1350/805/7316/347733/7417/7846/801/9377/4697/29796/517/7314/5688/518/7416/7388/7280/6622/1337/7311/4725/4729/1347/4718/203068
## hsa05016 1350/347733/7417/7846/9377/4697/29796/517/5688/518/7416/7388/7280/1337/4725/1211/4729/1347/4718/1173/203068
## hsa00190 1350/9550/9377/4697/29796/517/518/7388/1337/4725/4729/1347/4718/10632
## hsa05020 1350/347733/7417/7846/9377/4697/29796/517/5688/518/7416/7388/7280/1337/4725/4729/1347/4718/203068
## hsa05415 1350/2597/7417/9377/4697/29796/517/518/7416/7388/1337/4725/4729/1347/4718
## hsa05014 71/1350/60/347733/7846/9377/4697/29796/517/5688/518/7416/7388/7280/1337/4725/4729/1347/4718/203068
## Count
## hsa03010 69
## hsa05171 69
## hsa05012 28
## hsa05010 24
## hsa05022 27
## hsa05016 21
## hsa00190 14
## hsa05020 19
## hsa05415 15
## hsa05014 20