-
Notifications
You must be signed in to change notification settings - Fork 0
/
AveBeta_Hmm.Rmd
154 lines (108 loc) · 3.95 KB
/
AveBeta_Hmm.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
---
title: "AveBeta_Hmm"
output:
html_document:
df_print: paged
---
# {.tabset}
```{r setup, echo=FALSE, warning=FALSE}
library(tidyverse)
library(dplyr)
library(slider)
library(ggplot2)
library(readxl)
library(liftOver)
library(annotatr)
library(GenomicRanges)
library(GenomicFeatures)
library(data.table)
library(xlsx)
library(rtracklayer)
require(readr)
library(ggridges)
library(viridis)
library(ggstatsplot)
#library("ggpubr")
source("H3K27func.R")
```
```{r, echo=FALSE, warning=FALSE, eval=FALSE, eval=FALSE}
datalist <- readRDS("../Hmm/Hmm_data.Rds")
wkhmm <- bind_rows(datalist) %>% filter_at(vars(starts_with("n_sites_")),all_vars(!is.na(.))) %>% rowwise() %>%
mutate(n_sites=max(c_across(starts_with("n_sites_")), na.rm = TRUE)) %>%
ungroup
write_csv(wkhmm, "../AveBeta/selectedbins_Hmm.csv")
```
```{r test, echo=FALSE, warning=FALSE}
titlelist <- list("N1N2", "JAJB", "IAIB", "DADB", "HAHB", "MAMB", "ENJN", "ENIN", "EAEB", "FAFB", "KAKB", "PAPB", "SASB", "XAXB")
wktmp <- read_csv("../AveBeta/selectedbins_Hmm.csv")
head(wktmp)
longdata <- list()
for(i in 1:length(titlelist)){
longtmp <- wktmp %>%
mutate(ave_beta = rowMeans(wktmp[,c(sprintf("ave_beta1_%s", titlelist[[i]]), sprintf("ave_beta2_%s", titlelist[[i]]))], na.rm=TRUE)) %>%
dplyr::select(seqnames, start, end, regid, sprintf("ave_pwd_%s", titlelist[[i]]), ave_beta, Names) %>%
mutate(whichpair=titlelist[[i]])
colnames(longtmp)[5] <- c("ave_pwd")
longdata[[i]] <- longtmp
}
longdataf <- bind_rows(longdata) %>% distinct()
write_csv(longdataf, "../AveBeta/Hmm_long_unique.csv")
#longdataf <- read_csv("../AveBeta/Hmm_long_unique.csv")
summary(longdataf)
mode(longdataf$Names)
for(i in 1:15){
plot <- longdataf %>% filter(Names==i) %>%
ggplot(aes(x = ave_beta, y = ave_pwd)) +
geom_point(aes(color = whichpair)) +
ylim(0, 1) +
scale_x_continuous(breaks = seq(0, 1, by = 0.5)) +
facet_wrap(whichpair ~ ., ncol=6) +
#facet_wrap(facets = ~reorder(seqnames, rank), ncol=6) +
xlab("\n Average beta") +
ylab("Average pwd\n") +
ggtitle(sprintf("Hmm consensus state %s \n", i)) +
guides(color=guide_legend(" "))
print(plot)
}
```
```{r mkplotpre, echo=FALSE, warning=FALSE}
titlelist <- list("N1N2", "JAJB", "IAIB", "DADB", "HAHB", "MAMB", "ENJN", "ENIN", "EAEB", "FAFB", "KAKB", "PAPB", "SASB", "XAXB")
alist <- list("N1", "JA", "IA", "DA", "HA", "MA", "EN", "EN","EA","FA","KA","PA","SA","XA")
blist <- list("N2", "JB", "IB", "DB", "HB", "MB", "JN", "IN","EB","FB","KB","PB","SB","XB")
selectbins <- read_csv("../AveBeta/selectedbins_Hmm.csv")
table(selectbins$Names)
```
```{r mkplot, echo=FALSE, warning=FALSE}
for (i in 1:length(titlelist)){
ck <- selectbins %>% dplyr::select(seqnames, start, end, Names, sprintf("ave_pwd_%s", titlelist[[i]]), sprintf("ave_beta1_%s", titlelist[[i]]), sprintf("ave_beta2_%s", titlelist[[i]]))
colnames(ck)[5:7] <- c("ave_pwd", "ave_beta1", "ave_beta2")
plot1 <- ck %>% mutate(rank=as.numeric(Names)) %>%
ggplot(aes(x = ave_beta1, y = ave_pwd)) +
geom_point() +
geom_smooth() +
ylim(0, 1) +
xlim(0, 1) +
#facet_wrap(Names ~ ., ncol=5) +
facet_wrap(facets = ~reorder(Names, rank), ncol=5) +
ylab(sprintf("Average pwd %s", titlelist[[i]])) +
xlab(sprintf("Average methylation rate in %s", alist[[i]])) +
guides(color=guide_legend(" "))
plot2 <- ck %>% mutate(rank=as.numeric(Names)) %>%
ggplot(aes(x = ave_beta2, y = ave_pwd)) +
geom_point() +
geom_smooth() +
ylim(0, 1) +
xlim(0, 1) +
#facet_wrap(Names ~ ., ncol=5) +
facet_wrap(facets = ~reorder(Names, rank), ncol=5) +
ylab(sprintf("Average pwd %s", titlelist[[i]])) +
xlab(sprintf("Average methylation rate in %s", blist[[i]])) +
guides(color=guide_legend(" "))
print(combine_plots(
plotlist = list(plot1, plot2),
plotgrid.args = list(ncol = 2, nrow = 1),
annotation.args = list(
tag_levels = "a",
title = sprintf("Hmm region, %s", titlelist[[i]]))))
}
```