-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfasta.d
168 lines (146 loc) · 5.2 KB
/
fasta.d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
// The Computer Language Benchmarks Game
// http://benchmarkgame.alioth.debian.org
//
// Fasta benchmark
// http://benchmarksgame.alioth.debian.org/u32/performance.php?test=fasta
//
// Heavily derived from C++ g++ #3 and thus
//
// converted to C++ from D by Rafal Rusin
// modified by Vaclav Haisman
// modified by The Anh to compile with g++ 4.3.2
// modified by Branamir Maksimovic
// modified by Kim Walisch
// modified by Tavis Bohne
// converted back to D from C++ by Clark Gredoña
//
// compiles with gdc -w -O3 -frelease -finline-functions -fno-bounds-check -app.d
enum int MAX_LINE_WIDTH = 60;
import std.algorithm;
import std.conv;
import std.range;
import std.stdint;
import std.stdio;
import std.typecons;
char[] alu =
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA"
"TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT"
"AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG"
"GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG"
"CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA".dup;
alias IUB = Tuple!(float, "probability", char, "c");
// we use this separate array of probabilities in order to sum by using reduce
float[15] iubProbabilities = [0.27f, 0.12f, 0.12f, 0.27f, 0.02f, 0.02f, 0.02f,
0.02f, 0.02f, 0.02f, 0.02f, 0.02f, 0.02f, 0.02f,
0.02f];
// the probabilities here will be replaced by the cumulative probabilities
auto iub = [IUB(0.00f, 'a'), IUB(0.00f, 'c'), IUB(0.00f, 'g'), IUB(0.00f, 't'),
IUB(0.00f, 'B'), IUB(0.00f, 'D'), IUB(0.00f, 'H'), IUB(0.00f, 'K'),
IUB(0.00f, 'M'), IUB(0.00f, 'N'), IUB(0.00f, 'R'), IUB(0.00f, 'S'),
IUB(0.00f, 'V'), IUB(0.00f, 'W'), IUB(0.00f, 'Y')];
float[4] homosapienProbs = [0.3029549426680f,
0.1979883004921f,
0.1975473066391f,
0.3015094502008f];
auto homosapiens = [IUB(0.00f, 'a'), IUB(0.00f, 'c'),
IUB(0.00f, 'g'), IUB(0.00f, 't')];
float genRandom(in float max = 1.0f)
{
static immutable int IM = 139968, IA = 3877, IC = 29573;
static int last = 42;
last = (last * IA + IC) % IM;
return max * last * (1.0f / IM);
}
// No need for Tuples when just repeating the same char sequence without use
// of probability.
template CharRangeTemplate(Range)
{
class RepeatFunctorType {
public this(Range range) {
this.range = cycle(range);
}
public char opCall()
{
// would use moveFront but it doesn't actually remove the element
// like it's supposed to
auto result = to!char (range.front);
range = range.drop(1);
return result;
}
private Cycle!(Range) range;
}
}
template TupleRangeTemplate(Range)
{
class RandomFunctorType {
public this(Range range) {
this.range = range;
}
public char opCall()
{
immutable float p = genRandom(1.0f);
auto tuple = (find !"a.probability >= b" (range, p)).front;
return tuple.c;
}
private Range range;
}
}
template FunctorTemplate(F) {
/// Prints in FASTA format, building char by char according to F.
public void make(int charsRemaining, F functor)
{
char line[MAX_LINE_WIDTH];
while (charsRemaining > 0)
{
auto currLineWidth = charsRemaining;
if (currLineWidth > MAX_LINE_WIDTH)
{
currLineWidth = MAX_LINE_WIDTH;
}
for (auto i = 0; i < currLineWidth; ++i)
{
line[i] = functor();
}
writeln(line[0..currLineWidth]);
charsRemaining -= currLineWidth;
}
}
}
void main(string[] args)
{
auto n = 1000;
if (args.length < 2 || (n = to!int (args[1])) < 0)
{
writeln("Usage: ", args[0], " LENGTH");
return;
}
// convert expected probability of selecting each nucleotide into
// cumulative probabilities
auto arrLength = iubProbabilities.length; // cache length on stack
for (auto i = 0; i < arrLength; i++)
{
iub[i][0] = reduce!((a, b) => a + b)(iubProbabilities[0..i + 1]);
}
arrLength = homosapienProbs.length;
float[4] cumulativeHomosapienProbs;
for (auto i = 0; i < arrLength; i++)
{
homosapiens[i][0] = reduce!((a, b) => a + b)(homosapienProbs[0..i + 1]);
}
writeln(">ONE Homo sapiens alu");
alias charRangeTemplateInst = CharRangeTemplate!(char[]);
auto repeatFunctor =
new charRangeTemplateInst.RepeatFunctorType(alu);
alias funTemplateInst =
FunctorTemplate!(charRangeTemplateInst.RepeatFunctorType);
funTemplateInst.make(n * 2, repeatFunctor);
writeln(">TWO IUB ambiguity codes");
alias iubRangeTemplateInst = TupleRangeTemplate!(typeof(iub));
alias randFunTemplateInst =
FunctorTemplate!(iubRangeTemplateInst.RandomFunctorType);
auto randomFunctor = new iubRangeTemplateInst.RandomFunctorType(iub);
randFunTemplateInst.make(n * 3, randomFunctor);
writeln(">THREE Homo sapiens frequency");
randomFunctor = new iubRangeTemplateInst.RandomFunctorType(homosapiens);
randFunTemplateInst.make(n * 5, randomFunctor);
}