-
Notifications
You must be signed in to change notification settings - Fork 0
/
Athero.Rmd
150 lines (100 loc) · 4.12 KB
/
Athero.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
---
title: "Athero"
author: "gntem2"
date: "15/01/2020"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## R Markdown
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.
When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
```{r data}
library(pubmed.mineR)
library(lsa)
library(SnowballC)
library(wordcloud)
library(DT)
#create list of words
tdm_wordsA=c("inflammation","residual", "atherosclerosis", "plaque", "vulnerable", "carotid artery", "coronary artery", "rupture", "antiplatelet", "gliptin", "ezetimibe", "colchicine","evolocumab","bococizumab","canakinumab", "methotrexate","salsalate", "gout", "statin", "cholesterol", "IMT", "shear stress","fractional flow reserve", "calcification", "hypertension", "diabetes", "obesity" , "stroke","acute coronary syndrome","myocardial infarction","infection","penumonia","microbiota","gut brain")
#save pubmed search as summary
#atherosclerosis inflammation
#16/1/20
abstracts=readabs("pubmed_result.txt")
pmids=abstracts@PMID
gene=gene_atomization(abstracts) #HGNC approved symbol
#summary(gene)
words=word_atomizations(abstracts)
#plot wordcloud
abstract.bodies=abstracts@Abstract
#SentenceToken(abstract.bodies[1])
Myelin=searchabsL(abstracts)
Con=Find_conclusion(abstracts) #conclusion of abstracts
#create list of gene
#take gene data from first column
tdm_wordsG=gene[,1]
#merge the 2 word lists
tdm_wordsAG=c(tdm_wordsA,tdm_wordsG)
#create term document matrix
tdmAG=tdm_for_lsa(Myelin,tdm_wordsAG)
#plot wordcloud
m <- as.matrix(tdmAG)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
head(d, 10)
set.seed(1234)
wordcloud(words = d$word, freq = d$freq, min.freq = 1,
max.words=200, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))
barplot(d[1:20,]$freq, las = 2, names.arg = d[1:20,]$word,
col ="lightblue", main ="Most frequent words",
ylab = "Word frequencies")
## latent semantic analysis
lsaspace=lsa(tdmAG,dims=dimcalc_share());
newmatrix=as.textmatrix(lsaspace)
associated_words=lapply(tdm_wordsAG,function(x){associate(newmatrix,x,measure="cosine",threshold="0.5")})
names(associated_words)=tdm_wordsAG
associated_words[1:10]
cos_sim_calc(tdmAG) #output file cosimdata.txt #cytoscape
cos=read.table("cossimdata.txt",header=FALSE,sep="\t")
#write.csv(cos, "cossimdata.csv")
```
```{r plot}
library(dplyr)
cos2=na.omit(cos)
library(igraph)
library(ggraph)
library(graphlayouts)
#relavel column
relations <- data.frame(from=cos2[,1], to=cos2[,2], weight=abs(cos2[,3]))
#remove 0
relations2=relations[-row(relations)[relations == 0],]
#undirected graph
g.1a <- graph.data.frame(relations2, directed=FALSE)
#V(g.1a)$size<-1
min<-.99 #threshold
#Give the graph lots of room
#opar <- par()$mar; par(mar=rep(3, 4))
#plot(g.1a, layout=layout1)
#plot(g.1a, layout=layout1, edge.width=ifelse(E(g.1a)$weight>=min, E(g.1a)$weight, NA))
#waltrap community
wc=cluster_walktrap(g.1a)
modularity(wc)
#ebc<-edge.betweenness.community(g.1a)
#ci<-cluster_infomap(g.1a)
#
#layout1 <- layout.auto(g.1a)
layout1<-layout.lgl(g.1a)
#plot(wc,g.1a, layout=layout1, vertex.size=.5,vertex.label=NA) #remove vertex label
layout2<-layout.fruchterman.reingold(g.1a)
plot(wc,g.1a, layout=layout2) #remove vertex label
V(g.1a)$color<-wc$membership
write.graph(g.1a,"athero.graphml",format = "graphml")
#heatmap
netm <- get.adjacency(g.1a, attr="weight", sparse=F)
palf <- colorRampPalette(c("gold", "dark orange"))
#heatmap(netm[,17:1], Rowv = NA, Colv = NA, col = palf(100), scale="none", margins=c(10,10) )
#gephi
```
Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.