-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathhost_reductions_kernel_cuda.hpp
52 lines (46 loc) · 1.41 KB
/
host_reductions_kernel_cuda.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#ifndef __HOST_REDUCTIONS_KERNEL_CUDA_INC
#define __HOST_REDUCTIONS_KERNEL_CUDA_INC
#include "cuda_common.hpp"
#include "kernel_files/reductions_kernel.cuknl"
template<typename T>
class ReduceToHost
{
private:
inline static void reduce
(const REDUCTION_TYPE reduction_type, T* buffer, T* result, int len)
{
while(len > 1)
{
int num_blocks = ceil(len / (double)BLOCK_SZ);
switch(reduction_type)
{
case RED_SUM:
reduction<T, RED_SUM><<<num_blocks, BLOCK_SZ>>>(len, buffer);
break;
case RED_MAX:
reduction<T, RED_MAX><<<num_blocks, BLOCK_SZ>>>(len, buffer);
break;
case RED_MIN:
reduction<T, RED_MIN><<<num_blocks, BLOCK_SZ>>>(len, buffer);
break;
}
len = num_blocks;
}
CUDA_ERR_CHECK;
cudaMemcpy(result, buffer, sizeof(T), cudaMemcpyDeviceToHost);
}
public:
inline static void sum (T* buffer, T* result, int len)
{
reduce(RED_SUM, buffer, result, len);
}
inline static void max_element (T* buffer, T* result, int len)
{
reduce(RED_MAX, buffer, result, len);
}
inline static void min_element (T* buffer, T* result, int len)
{
reduce(RED_MIN, buffer, result, len);
}
};
#endif //__HOST_REDUCTIONS_KERNEL_CUDA_INC