-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathRunCodonMatrix.R
75 lines (61 loc) · 1.91 KB
/
RunCodonMatrix.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
library(Biostrings)
source("Scripts/DataAnalysis.R")
source("Scripts/Sequences.R")
#' Input: directory only with mutated fasta files of the original sequence
#' reads in all files and generates a 64x64 for each
path = "../BA Circular Code/Workspace/" # change here
fastafile = list.files(path, pattern = "*.fasta")
dnaf = readDNAStringSet("cds/ena-ch-reinhardtii.fasta") #change here
#dnaf = dnaf[1:1010] # change here for deleting IUPAC Codes
#dnaf = deleteIUPACSequences(dnaf)
dnaf1 = dnaf #Frame 1
for (j in 1:pmin(length(dnaf), 1000)) {
dnaf1[[j]] = changeReadingFrame(1, dnaf[[j]])
}
dnaf2 = dnaf #Frame 2
for (j in 1:pmin(length(dnaf), 1000)) {
dnaf2[[j]] = changeReadingFrame(2, dnaf[[j]])
}
for (h in 1:length(fastafile)) {
print(fastafile[h])
tmp = unlist(strsplit(fastafile[h],"_"))
seqName = tmp[1]
code = tmp[2]
frame = unlist(strsplit(tmp[3],".fasta"))
ar = generateEmptyTable(64, 64, CODONS)
dnafmod = readDNAStringSet(paste(path,fastafile[h],sep=""))
if (as.numeric(frame) == 0) {
for (i in 1:length(dnafmod)) {
outputMatrix_codons = codonCount(ar, dnaf[[i]], dnafmod[[i]])
ar = outputMatrix_codons
print(paste("Done with sequence", i, "/",length(dnafmod)))
}
} else if (as.numeric(frame) == 1) {
for (i in 1:length(dnafmod)) {
outputMatrix_codons = codonCount(ar, dnaf1[[i]], dnafmod[[i]])
ar = outputMatrix_codons
print(paste("Done with sequence", i, "/",length(dnafmod)))
}
} else if (as.numeric(frame) == 2) {
for (i in 1:length(dnafmod)) {
outputMatrix_codons = codonCount(ar, dnaf2[[i]], dnafmod[[i]])
ar = outputMatrix_codons
print(paste("Done with sequence", i, "/",length(dnafmod)))
}
} else {
print("Error")
}
saveRDS(
object = outputMatrix_codons,
file = paste(
"Workspace/",
seqName,
"_",
code,
"_",
frame,
".RDS",
sep = ""
)
)
}