-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathGPUBatchInsertsPerf.cu
96 lines (81 loc) · 3.18 KB
/
GPUBatchInsertsPerf.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#include <stdio.h>
#include <assert.h>
#include <cuda_profiler_api.h>
#include "mt19937ar.h"
#include "CuckooFilter.cu"
#include "graph_test.cu"
#ifndef NOT_FOUND
#define NOT_FOUND UINT_MAX
#endif
void generateRandomNumbers(unsigned int *numberArray, unsigned int n)
{
srand((unsigned int)time(NULL));
for (int i = 0; i < n; i++){
numberArray[i] = rand();
}
}
void CUDAErrorCheck()
{
cudaError_t errSync = cudaGetLastError();
cudaError_t errAsync = cudaDeviceSynchronize();
errSync = cudaGetLastError();
errAsync = cudaDeviceSynchronize();
if (errSync != cudaSuccess)
printf("Sync kernel error: %s\n", cudaGetErrorString(errSync));
if (errAsync != cudaSuccess)
printf("Async kernel error: %s\n", cudaGetErrorString(errAsync));
}
int main(int argc, char* argv[])
{
assert(argc==5);
unsigned int numBuckets = atoi(argv[1]);
unsigned int bucketSize = atoi(argv[2]);
float fillFraction = (float)atof(argv[3]);
unsigned int numLookUps = atoi(argv[4]);
//New random batch lookups
//Generate values for random lookups
int insertSize = floor(numBuckets*bucketSize*fillFraction);
unsigned int* h_insertValues = new unsigned int[insertSize];
generateRandomNumbers(h_insertValues, insertSize);
CuckooFilter * ckFilter = new CuckooFilter(numBuckets, bucketSize);
insert((int *)h_insertValues, insertSize, numBuckets, bucketSize, ckFilter);
// Lookup values are the inserted values from earlier.
unsigned int * d_lookUpValues;
cudaMalloc((void**) &d_lookUpValues, numLookUps * sizeof(unsigned int));
cudaMemcpy(&d_lookUpValues, &h_insertValues, numLookUps * sizeof(unsigned int), cudaMemcpyHostToDevice);
//Output array
char * d_results;
cudaMalloc((void**) &d_results, numLookUps * sizeof(char));
cudaMemset(&d_results, 0, numLookUps * sizeof(char));
CuckooFilter * d_ckFilter = (CuckooFilter *) cudaMallocAndCpy(sizeof(CuckooFilter), ckFilter);
// cudaEvent_t start, stop;
// cudaEventCreate(&start);
// cudaEventCreate(&stop);
//Launch lookup kernel
// cudaProfilerStart();
// cudaEventRecord(start);
std::cout << "Calling lookup kernel" << std::endl;
lookUpGPU<<<(numLookUps + 1023)/1024, 1024>>>(d_ckFilter, numLookUps, d_lookUpValues, d_results);
cudaDeviceSynchronize();
char * h_results = new char[numLookUps];
cudaMemcpy(&h_results, &d_results, numLookUps* sizeof(char), cudaMemcpyDeviceToHost);
// cudaEventRecord(stop);
// cudaProfilerStop();
//Calculate and print timing results
// cudaEventSynchronize(stop);
// float batchLookupTime = 0;
// cudaEventElapsedTime(&batchLookupTime, start, stop);
// printf("Random lookup rate = %f million ops/sec\n", numValues / randomLookupTime / 1000);
//printf("%f\n", batchSize / batchLookupTime / 1000);
//Free Memory
ckFilter->freeFilter();
delete[] h_insertValues;
// cudaEventDestroy(start);
// cudaEventDestroy(stop);
delete[] h_lookUpValues;
cudaFree(d_lookUpValues);
cudaFree(d_results);
delete[] h_results;
cudaDeviceReset();
return 0;
}