-
Notifications
You must be signed in to change notification settings - Fork 12
/
main.cu
230 lines (191 loc) · 8.07 KB
/
main.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
#include "includes/CompFab.h"
#include "math.h"
#include "curand.h"
#include "curand_kernel.h"
#include "includes/cuda_math.h"
#include <iostream>
#include <string>
#include <sstream>
#include "stdio.h"
#include <vector>
#define RANDOM_SEEDS 1000
#define EPSILONF 0.000001
#define E_PI 3.1415926535897932384626433832795028841971693993751058209749445923078164062
// check cuda calls for errors
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
// generates a random float between 0 and 1
__device__ float generate( curandState* globalState , int ind)
{
curandState localState = globalState[ind];
float RANDOM = curand_uniform( &localState );
globalState[ind] = localState;
return RANDOM;
}
// set up random seed buffer
__global__ void setup_kernel ( curandState * state, unsigned long seed )
{
int id = threadIdx.x;
curand_init ( seed, id, 0, &state[id] );
}
__device__ bool inside(unsigned int numIntersections, bool double_thick) {
// if (double_thick && numIntersections % 2 == 0) return (numIntersections / 2) % 2 == 1;
if (double_thick) return (numIntersections / 2) % 2 == 1;
return numIntersections % 2 == 1;
}
// adapted from: https://en.wikipedia.org/wiki/M%C3%B6ller%E2%80%93Trumbore_intersection_algorithm
__device__ bool intersects(CompFab::Triangle &triangle, float3 dir, float3 pos) {
float3 V1 = {triangle.m_v1.m_x, triangle.m_v1.m_y, triangle.m_v1.m_z};
float3 V2 = {triangle.m_v2.m_x, triangle.m_v2.m_y, triangle.m_v2.m_z};
float3 V3 = {triangle.m_v3.m_x, triangle.m_v3.m_y, triangle.m_v3.m_z};
//Find vectors for two edges sharing V1
float3 e1 = V2 - V1;
float3 e2 = V3 - V1;
// //Begin calculating determinant - also used to calculate u parameter
float3 P = cross(dir, e2);
//if determinant is near zero, ray lies in plane of triangle
float det = dot(e1, P);
//NOT CULLING
if(det > -EPSILONF && det < EPSILONF) return false;
float inv_det = 1.f / det;
// calculate distance from V1 to ray origin
float3 T = pos - V1;
//Calculate u parameter and test bound
float u = dot(T, P) * inv_det;
//The intersection lies outside of the triangle
if(u < 0.f || u > 1.f) return false;
//Prepare to test v parameter
float3 Q = cross(T, e1);
//Calculate V parameter and test bound
float v = dot(dir, Q) * inv_det;
//The intersection lies outside of the triangle
if(v < 0.f || u + v > 1.f) return false;
float t = dot(e2, Q) * inv_det;
if(t > EPSILONF) { // ray intersection
return true;
}
// No hit, no win
return false;
}
// Decides whether or not each voxel is within the given mesh
__global__ void voxelize_kernel(
bool* R, CompFab::Triangle* triangles, const int numTriangles,
const float spacing, const float3 bottom_left,
const int w, const int h, const int d, bool double_thick)
{
// find the position of the voxel
unsigned int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
unsigned int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
unsigned int zIndex = blockDim.z * blockIdx.z + threadIdx.z;
// pick an arbitrary sampling direction
float3 dir = make_float3(1.0, 0.0, 0.0);
if ( (xIndex < w) && (yIndex < h) && (zIndex < d) )
{
// find linearlized index in final boolean array
unsigned int index_out = zIndex*(w*h)+yIndex*h + xIndex;
// find world space position of the voxel
float3 pos = make_float3(bottom_left.x + spacing*xIndex,bottom_left.y + spacing*yIndex,bottom_left.z + spacing*zIndex);
// check if the voxel is inside of the mesh.
// if it is inside, then there should be an odd number of
// intersections with the surrounding mesh
unsigned int intersections = 0;
for (int i = 0; i < numTriangles; ++i)
if (intersects(triangles[i], dir, pos))
intersections += 1;
// store answer
R[index_out] = inside(intersections, double_thick);
}
}
// Decides whether or not each voxel is within the given partially un-closed mesh
// checks a variety of directions and picks most common belief
__global__ void voxelize_kernel_open_mesh(
// triangles of the mesh being voxelized
bool* R, CompFab::Triangle* triangles, const int numTriangles,
// information about how large the samples are and where they begin
const float spacing, const float3 bottom_left,
// number of voxels
const int w, const int h, const int d,
// sampling information for multiple intersection rays
const int samples, curandState* globalState, bool double_thick
)
{
// find the position of the voxel
unsigned int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
unsigned int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
unsigned int zIndex = blockDim.z * blockIdx.z + threadIdx.z;
if ( (xIndex < w) && (yIndex < h) && (zIndex < d) )
{
// find linearlized index in final boolean array
unsigned int index_out = zIndex*(w*h)+yIndex*h + xIndex;
// find world space position of the voxel
float3 pos = make_float3(bottom_left.x + spacing*xIndex,bottom_left.y + spacing*yIndex,bottom_left.z + spacing*zIndex);
float3 dir;
// we will randomly sample 3D space by sending rays in randomized directions
int votes = 0;
float theta;
float z;
for (int j = 0; j < samples; ++j)
{
// compute the random direction. Convert from polar to euclidean to get an even distribution
theta = generate(globalState, index_out % RANDOM_SEEDS) * 2.f * E_PI;
z = generate(globalState, index_out % RANDOM_SEEDS) * 2.f - 1.f;
dir.x = sqrt(1-z*z) * cosf(theta);
dir.y = sqrt(1-z*z) * sinf(theta);
dir.z = sqrt(1-z*z) * cosf(theta);
// check if the voxel is inside of the mesh.
// if it is inside, then there should be an odd number of
// intersections with the surrounding mesh
unsigned int intersections = 0;
for (int i = 0; i < numTriangles; ++i)
if (intersects(triangles[i], dir, pos))
intersections += 1;
if (inside(intersections, double_thick)) votes += 1;
}
// choose the most popular answer from all of the randomized samples
R[index_out] = votes > (samples / 2.f);
}
}
// voxelize the given mesh with the given resolution and dimensions
void kernel_wrapper(int samples, int w, int h, int d, CompFab::VoxelGrid *g_voxelGrid, std::vector<CompFab::Triangle> triangles, bool double_thick)
{
int blocksInX = (w+8-1)/8;
int blocksInY = (h+8-1)/8;
int blocksInZ = (d+8-1)/8;
dim3 Dg(blocksInX, blocksInY, blocksInZ);
dim3 Db(8, 8, 8);
curandState* devStates;
if (samples > 0) {
// set up random numbers
dim3 tpb(RANDOM_SEEDS,1,1);
cudaMalloc ( &devStates, RANDOM_SEEDS*sizeof( curandState ) );
// setup seeds
setup_kernel <<< 1, tpb >>> ( devStates, time(NULL) );
}
// set up boolean array on the GPU
bool *gpu_inside_array;
gpuErrchk( cudaMalloc( (void **)&gpu_inside_array, sizeof(bool) * w * h * d ) );
gpuErrchk( cudaMemcpy( gpu_inside_array, g_voxelGrid->m_insideArray, sizeof(bool) * w * h * d, cudaMemcpyHostToDevice ) );
// set up triangle array on the GPU
CompFab::Triangle* triangle_array = &triangles[0];
CompFab::Triangle* gpu_triangle_array;
gpuErrchk( cudaMalloc( (void **)&gpu_triangle_array, sizeof(CompFab::Triangle) * triangles.size() ) );
gpuErrchk( cudaMemcpy( gpu_triangle_array, triangle_array, sizeof(CompFab::Triangle) * triangles.size(), cudaMemcpyHostToDevice ) );
float3 lower_left = make_float3(g_voxelGrid->m_lowerLeft.m_x, g_voxelGrid->m_lowerLeft.m_y, g_voxelGrid->m_lowerLeft.m_z);
if (samples > 0) {
voxelize_kernel_open_mesh<<<Dg, Db>>>(gpu_inside_array, gpu_triangle_array, triangles.size(), (float) g_voxelGrid->m_spacing, lower_left, w, h, d, samples, devStates, double_thick);
} else {
voxelize_kernel<<<Dg, Db>>>(gpu_inside_array, gpu_triangle_array, triangles.size(), (float) g_voxelGrid->m_spacing, lower_left, w, h, d, double_thick);
}
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
gpuErrchk( cudaMemcpy( g_voxelGrid->m_insideArray, gpu_inside_array, sizeof(bool) * w * h * d, cudaMemcpyDeviceToHost ) );
gpuErrchk( cudaFree(gpu_inside_array) );
gpuErrchk( cudaFree(gpu_triangle_array) );
}