diff --git a/cosmos/poc/cuda/rank.cu b/cosmos/poc/cuda/rank.cu index c74b1d22..e1723437 100644 --- a/cosmos/poc/cuda/rank.cu +++ b/cosmos/poc/cuda/rank.cu @@ -80,7 +80,7 @@ void calculateCidTotalOutStake( uint64_t cidsSize, uint64_t *stakes, /*array index - user index*/ uint64_t *outLinksStartIndex, uint32_t *outLinksCount, /*array index - cid index*/ - cid_link *allOutLinks, /*all out links from all users*/ + uint64_t *outLinksUsers, /*all out links from all users*/ /*returns*/ uint64_t *cidsTotalOutStakes /*array index - cid index*/ ) { @@ -88,9 +88,9 @@ void calculateCidTotalOutStake( uint64_t stride = blockDim.x * gridDim.x; for (uint64_t i = index; i < cidsSize; i += stride) { - double totalOutStake = 0.0; + uint64_t totalOutStake = 0; for (uint64_t j = outLinksStartIndex[i]; j < outLinksStartIndex[i] + outLinksCount[i]; j++) { - totalOutStake += stakes[allOutLinks[j].user_index]; + totalOutStake += stakes[outLinksUsers[j]]; } cidsTotalOutStakes[i] = totalOutStake; } @@ -144,8 +144,8 @@ void getCompressedInLinks( uint64_t *inLinksStartIndex, uint32_t *inLinksCount, uint64_t *cidsTotalOutStakes, /*array index - cid index*/ uint64_t *inLinksOuts, uint64_t *inLinksUsers, /*all incoming links from all users*/ uint64_t *stakes, /*array index - user index*/ - /*returns*/ CompressedInLink *compressedInLinks, /*all incoming compressed links*/ - /*returns*/ uint64_t *compressedInLinksStartIndex, uint32_t *compressedInLinksCount /*array index - cid index*/ + uint64_t *compressedInLinksStartIndex, uint32_t *compressedInLinksCount, /*array index - cid index*/ + /*returns*/ CompressedInLink *compressedInLinks /*all incoming compressed links*/ ) { int index = blockIdx.x * blockDim.x + threadIdx.x; @@ -158,14 +158,26 @@ void getCompressedInLinks( } uint32_t compressedLinksIndex = compressedInLinksStartIndex[i]; - uint64_t compressedLinkStake = stakes[inLinksUsers[inLinksStartIndex[i]]]; - for(uint64_t j = inLinksStartIndex[i] + 1; j < inLinksStartIndex[i] + inLinksCount[i]; j++) { - if(inLinksOuts[j] != inLinksOuts[j-1]) { + if(inLinksCount[i] == 1) { + uint64_t oppositeCid = inLinksOuts[inLinksStartIndex[i]]; + uint64_t compressedLinkStake = stakes[inLinksUsers[inLinksStartIndex[i]]]; + double weight = ddiv_rz(&compressedLinkStake, &cidsTotalOutStakes[oppositeCid]); + compressedInLinks[compressedLinksIndex] = CompressedInLink {oppositeCid, weight}; + continue; + } + + uint64_t compressedLinkStake = 0; + uint64_t lastLinkIndex = inLinksStartIndex[i] + inLinksCount[i] - 1; + for(uint64_t j = inLinksStartIndex[i]; j < lastLinkIndex + 1; j++) { + + compressedLinkStake += stakes[inLinksUsers[j]]; + if(j == lastLinkIndex || inLinksOuts[j] != inLinksOuts[j+1]) { uint64_t oppositeCid = inLinksOuts[j]; double weight = ddiv_rz(&compressedLinkStake, &cidsTotalOutStakes[oppositeCid]); compressedInLinks[compressedLinksIndex] = CompressedInLink {oppositeCid, weight}; compressedLinksIndex++; + compressedLinkStake=0; } } } diff --git a/cosmos/poc/cuda/test_rank.cu b/cosmos/poc/cuda/test_rank.cu index a53b8aaf..504a2421 100644 --- a/cosmos/poc/cuda/test_rank.cu +++ b/cosmos/poc/cuda/test_rank.cu @@ -32,17 +32,17 @@ void test_getCompressedInLinksCount() { uint64_t *dev_inLinksStartIndex; uint64_t *dev_inLinksOuts; - cudaMallocManaged(&dev_inLinksCount, cidsSize*sizeof(uint32_t)); - cudaMallocManaged(&dev_compressedInLinksCount, cidsSize*sizeof(uint32_t)); - cudaMallocManaged(&dev_inLinksStartIndex, cidsSize*sizeof(uint64_t)); - cudaMallocManaged(&dev_inLinksOuts, outSize*sizeof(uint64_t)); + cudaMalloc(&dev_inLinksCount, cidsSize*sizeof(uint32_t)); + cudaMalloc(&dev_compressedInLinksCount, cidsSize*sizeof(uint32_t)); + cudaMalloc(&dev_inLinksStartIndex, cidsSize*sizeof(uint64_t)); + cudaMalloc(&dev_inLinksOuts, outSize*sizeof(uint64_t)); cudaMemcpy(dev_inLinksCount, inLinksCount, cidsSize*sizeof(uint32_t), cudaMemcpyHostToDevice); cudaMemcpy(dev_inLinksStartIndex, inLinksStartIndex, cidsSize*sizeof(uint64_t), cudaMemcpyHostToDevice); cudaMemcpy(dev_inLinksOuts, inLinksOuts, outSize*sizeof(uint64_t), cudaMemcpyHostToDevice); cudaDeviceSynchronize(); - getCompressedInLinksCount<<<1,6>>>( + getCompressedInLinksCount<<<2,3>>>( cidsSize, dev_inLinksStartIndex, dev_inLinksCount, dev_inLinksOuts, dev_compressedInLinksCount @@ -62,9 +62,152 @@ void test_getCompressedInLinksCount() { } } -// To run use `nvcc test_rank.cu -o test && ./test` command. +void test_calculateCidTotalOutStake() { + + int cidsSize = 6; + int linksSize = 9; + int usersSize = 3; + + uint32_t outLinksCount [6] = { 0, 2, 0, 1, 3, 3 }; + uint64_t outLinksStartIndex [6] = { 0, 0, 2, 2, 3, 6 }; + uint64_t outLinksUsers [9] = { 1, 0, 2, 0, 2, 1, 2, 1, 0}; + uint64_t stakes [3] = { 1, 2, 3}; + + uint32_t *dev_outLinksCount; + uint64_t *dev_outLinksStartIndex; + uint64_t *dev_outLinksUsers; + uint64_t *dev_stakes; + uint64_t *dev_cidsTotalOutStakes; + + cudaMalloc(&dev_outLinksCount, cidsSize*sizeof(uint32_t)); + cudaMalloc(&dev_outLinksStartIndex, cidsSize*sizeof(uint64_t)); + cudaMalloc(&dev_outLinksUsers, linksSize*sizeof(uint64_t)); + cudaMalloc(&dev_stakes, usersSize*sizeof(uint64_t)); + cudaMalloc(&dev_cidsTotalOutStakes, cidsSize*sizeof(uint64_t)); + + cudaMemcpy(dev_outLinksCount, outLinksCount, cidsSize*sizeof(uint32_t), cudaMemcpyHostToDevice); + cudaMemcpy(dev_outLinksStartIndex, outLinksStartIndex, cidsSize*sizeof(uint64_t), cudaMemcpyHostToDevice); + cudaMemcpy(dev_outLinksUsers, outLinksUsers, linksSize*sizeof(uint64_t), cudaMemcpyHostToDevice); + cudaMemcpy(dev_stakes, stakes, usersSize*sizeof(uint64_t), cudaMemcpyHostToDevice); + + cudaDeviceSynchronize(); + calculateCidTotalOutStake<<<2,3>>>( + cidsSize, dev_stakes, + dev_outLinksStartIndex, dev_outLinksCount, + dev_outLinksUsers, dev_cidsTotalOutStakes + ); + cudaDeviceSynchronize(); + + uint64_t actual[6] = {}; + cudaMemcpy(actual, dev_cidsTotalOutStakes, cidsSize*sizeof(uint64_t), cudaMemcpyDeviceToHost); + + uint64_t expected[6] = {0,3,0,3,6,6}; + if (std::equal(std::begin(expected), std::end(expected), std::begin(actual))) + printf("calculateCidTotalOutStake() works as expected!\n"); + else { + printf("calculateCidTotalOutStake() doesn't works :(\n"); + for (int i = sizeof(actual) / sizeof(actual[0])-1; i >= 0; i--) + std::cout << actual[i] << ' ' << expected[i] << '\n'; + } +} + +void test_find_max_ranks_diff() { + + double prevRank [6] = { -1.324, 32.1, 0.001, 2.231, -3.22, -0.02 }; + double newRank [6] = {1.3242, 32.22, 0.032, 2.231, -3.232, 0.02 }; + + double *dev_prevRank; + double *dev_newRank; + cudaMalloc(&dev_prevRank, 6*sizeof(double)); + cudaMalloc(&dev_newRank, 6*sizeof(double)); + cudaMemcpy(dev_prevRank, prevRank, 6*sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(dev_newRank, newRank, 6*sizeof(double), cudaMemcpyHostToDevice); + + double maxDiff = find_max_ranks_diff(dev_prevRank, dev_newRank, 6); + if (maxDiff == 2.6482) + printf("find_max_ranks_diff() works as expected!\n"); + else { + printf("find_max_ranks_diff() doesn't works :(\n"); + std::cout << maxDiff << ' ' << 2.6482 << '\n'; + } +} + +void test_getCompressedInLinks() { + + int cidsSize = 8; + int linksSize = 11; + int compressedLinksSize = 8; + int usersSize = 3; + + uint32_t inLinksCount [8] = {0,0,1,5,4,0,1,0}; + uint32_t compressedInLinksCount [8] = {0,0,1,3,3,0,1,0}; + uint64_t inLinksStartIndex [8] = {0,0,0,1,6,10,10,11}; + uint64_t compressedInLinksStartIndex [8] = {0,0,0,1,4,7,7,8}; + uint64_t cidsTotalOutStakes [8] = {3,3,3,1,6,1,0,3}; + uint64_t inLinksOuts [11] = {7,1,4,4,4,2,5,0,0,1,3}; + uint64_t inLinksUsers [11] = {0,2,0,1,2,0,1,1,2,1,1}; + uint64_t stakes [3] = {3,1,2}; + + uint64_t *dev_inLinksStartIndex; + uint32_t *dev_inLinksCount; + uint64_t *dev_cidsTotalOutStakes; + uint64_t *dev_inLinksOuts; + uint64_t *dev_inLinksUsers; + uint64_t *dev_stakes; + uint64_t *dev_compressedInLinksStartIndex; + uint32_t *dev_compressedInLinksCount; + CompressedInLink *dev_compressedInLinks; + + cudaMalloc(&dev_inLinksStartIndex, cidsSize*sizeof(uint64_t)); + cudaMalloc(&dev_inLinksCount, cidsSize*sizeof(uint32_t)); + cudaMalloc(&dev_cidsTotalOutStakes, cidsSize*sizeof(uint64_t)); + cudaMalloc(&dev_inLinksOuts, linksSize*sizeof(uint64_t)); + cudaMalloc(&dev_inLinksUsers, linksSize*sizeof(uint64_t)); + cudaMalloc(&dev_stakes, usersSize*sizeof(uint64_t)); + cudaMalloc(&dev_compressedInLinksStartIndex, cidsSize*sizeof(uint64_t)); + cudaMalloc(&dev_compressedInLinksCount, cidsSize*sizeof(uint32_t)); + cudaMalloc(&dev_compressedInLinks, compressedLinksSize*sizeof(CompressedInLink)); + + cudaMemcpy(dev_inLinksStartIndex, inLinksStartIndex, cidsSize*sizeof(uint64_t), cudaMemcpyHostToDevice); + cudaMemcpy(dev_inLinksCount, inLinksCount, cidsSize*sizeof(uint32_t), cudaMemcpyHostToDevice); + cudaMemcpy(dev_cidsTotalOutStakes, cidsTotalOutStakes, cidsSize*sizeof(uint64_t), cudaMemcpyHostToDevice); + cudaMemcpy(dev_inLinksOuts, inLinksOuts, linksSize*sizeof(uint64_t), cudaMemcpyHostToDevice); + cudaMemcpy(dev_inLinksUsers, inLinksUsers, linksSize*sizeof(uint64_t), cudaMemcpyHostToDevice); + cudaMemcpy(dev_stakes, stakes, usersSize*sizeof(uint64_t), cudaMemcpyHostToDevice); + cudaMemcpy(dev_compressedInLinksStartIndex, compressedInLinksStartIndex, cidsSize*sizeof(uint64_t), cudaMemcpyHostToDevice); + cudaMemcpy(dev_compressedInLinksCount, compressedInLinksCount, cidsSize*sizeof(uint32_t), cudaMemcpyHostToDevice); + + cudaDeviceSynchronize(); + getCompressedInLinks<<<4,2>>>( + cidsSize, + dev_inLinksStartIndex, dev_inLinksCount, dev_cidsTotalOutStakes, + dev_inLinksOuts, dev_inLinksUsers, + dev_stakes, + dev_compressedInLinksStartIndex, compressedInLinksCount, + dev_compressedInLinks + ); + cudaDeviceSynchronize(); + + CompressedInLink actual[8] = {}; + cudaMemcpy(actual, dev_compressedInLinks, compressedLinksSize*sizeof(CompressedInLink), cudaMemcpyDeviceToHost); + + CompressedInLink expected[8] = { + {7,1.0},{1,0.666667},{4,1},{2,1},{5,1},{0,1},{1,0.333333},{3,1} + }; + + printf("calculateCidTotalOutStake() output :(\n"); + for (int i = sizeof(actual) / sizeof(actual[0])-1; i >= 0; i--) { + std::cout << actual[i].fromIndex <<'_'<< actual[i].weight << " "; + std::cout << expected[i].fromIndex <<'_'<< expected[i].weight << '\n'; + } +} + +// To run use `nvcc test_rank.cu -o test && ./test && rm test` command. int main(void) { printf("Start testing !!!!!!!!!!!!!!!!!!\n"); test_getCompressedInLinksStartIndex(); test_getCompressedInLinksCount(); + test_calculateCidTotalOutStake(); + test_find_max_ranks_diff(); + test_getCompressedInLinks(); } \ No newline at end of file