-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAlgorithms.cu
232 lines (183 loc) · 5.77 KB
/
Algorithms.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
#include "Algorithms.h"
#include "Algorithms.cuh"
#include "ExpManager.h"
#include "ThreefryGPU.h"
#include "GPUDna.cuh"
#include <cstdint>
#include <stdio.h>
#include <unistd.h>
#include <iostream>
#include<cuda.h>
#include<cuda_profiler_api.h>
using namespace std;
#define DEBUG 1
// Convenience function for checking CUDA runtime API results
// can be wrapped around any runtime API call. No-op in release builds.
inline
cudaError_t checkCuda(cudaError_t result)
{
#if defined(DEBUG) || defined(_DEBUG)
if (result != cudaSuccess) {
fprintf(stderr, "CUDA Runtime Error: %s\n",
cudaGetErrorString(result));
assert(result == cudaSuccess);
}
#endif
return result;
}
constexpr int32_t PROMOTER_ARRAY_SIZE = 10000;
/**
* Function to transfer data from CPU to GPU
*
* @param exp_m
* @param first_gen
*/
void transfer_in(ExpManager* exp_m, bool first_gen) {
exp_m->rng_->initDevice();
checkCuda(cudaMalloc((void**) &gpu_counters,
exp_m->rng_->counters().size() *
sizeof(unsigned long long)));
checkCuda(cudaMemcpy(gpu_counters, exp_m->rng_->counters().data(),
exp_m->rng_->counters().size() *
sizeof(unsigned long long), cudaMemcpyHostToDevice));
// TO COMPLETE
}
__device__ int32_t Threefry::Device::roulette_random(double* probs, int32_t nb_elts)
{
double pick_one = 0.0;
while (pick_one == 0.0)
{
pick_one = randomDouble();
}
int32_t found_org = 0;
pick_one -= probs[0];
while (pick_one > 0)
{
assert(found_org<nb_elts-1);
pick_one -= probs[++found_org];
}
return found_org;
}
__constant__ double cof[6] = { 76.18009172947146,
-86.50532032941677,
24.01409824083091,
-1.231739572450155,
0.1208650973866179e-2,
-0.5395239384953e-5 };
// Returns the value ln[gamma(X)] for X.
// The gamma function is defined by the integral gamma(z) = int(0, +inf, t^(z-1).e^(-t)dt).
// When the argument z is an integer, the gamma function is just the familiar factorial
// function, but offset by one, n! = gamma(n + 1).
__device__ static double gammln(double X)
{
double x, y, tmp, ser;
y = x = X;
tmp = x + 5.5;
tmp -= (x+0.5) * log(tmp);
ser = 1.000000000190015;
for (int8_t j = 0 ; j <= 5 ; j++)
{
ser += cof[j] / ++y;
}
return -tmp + log(2.5066282746310005 * ser / x);
}
__device__
int32_t Threefry::Device::binomial_random(int32_t nb_drawings, double prob)
{
int32_t nb_success;
// The binomial distribution is invariant under changing
// ProbSuccess to 1-ProbSuccess, if we also change the answer to
// NbTrials minus itself; we ll remember to do this below.
double p;
if (prob <= 0.5) p = prob;
else p = 1.0 - prob;
// mean of the deviate to be produced
double mean = nb_drawings * p;
if (nb_drawings < 25)
// Use the direct method while NbTrials is not too large.
// This can require up to 25 calls to the uniform random.
{
nb_success = 0;
for (int32_t j = 1 ; j <= nb_drawings ; j++)
{
if (randomDouble() < p) nb_success++;
}
}
else if (mean < 1.0)
// If fewer than one event is expected out of 25 or more trials,
// then the distribution is quite accurately Poisson. Use direct Poisson method.
{
double g = exp(-mean);
double t = 1.0;
int32_t j;
for (j = 0; j <= nb_drawings ; j++)
{
t = t * randomDouble();
if (t < g) break;
}
if (j <= nb_drawings) nb_success = j;
else nb_success = nb_drawings;
}
else
// Use the rejection method.
{
double en = nb_drawings;
double oldg = gammln(en + 1.0);
double pc = 1.0 - p;
double plog = log(p);
double pclog = log(pc);
// rejection method with a Lorentzian comparison function.
double sq = sqrt(2.0 * mean * pc);
double angle, y, em, t;
do
{
do
{
angle = M_PI * randomDouble();
y = tan(angle);
em = sq*y + mean;
} while (em < 0.0 || em >= (en + 1.0)); // Reject.
em = floor(em); // Trick for integer-valued distribution.
t = 1.2 * sq * (1.0 + y*y)
* exp(oldg - gammln(em + 1.0) - gammln(en - em + 1.0) + em * plog + (en - em) * pclog);
} while (randomDouble() > t); // Reject. This happens about 1.5 times per deviate, on average.
nb_success = (int32_t) rint(em);
}
// Undo the symmetry transformation.
if (p != prob) nb_success = nb_drawings - nb_success;
return nb_success;
}
__device__ static int mod(int a, int b)
{
assert(b > 0);
while (a < 0) a += b;
while (a >= b) a -= b;
return a;
}
/**
* Run a step on the GPU
* @param nb_indiv
* @param w_max
* @param selection_pressure
* @param grid_width
* @param grid_height
* @param mutation_rate
*/
void run_a_step_on_GPU(int nb_indiv, double w_max, double selection_pressure, int grid_width, int grid_height, double mutation_rate) {
}
/**
* Reallocate some data structures if needed
* @param nb_indiv
*/
void allocate_next_gen(int nb_indiv) {
}
/**
PRNG usage:
* For selection
Threefry::Device rng(gpu_counters,indiv_id,Threefry::Phase::REPROD,nb_indiv);
int found_org = rng.roulette_random(probs, NEIGHBORHOOD_SIZE);
* For mutation:
Threefry::Device rng(gpu_counters,indiv_id,Threefry::Phase::MUTATION,nb_indivs);
rng.binomial_random(prev_gen_size, mutation_r);
rng.random( number );
**/