Skip to content

Commit

Permalink
#74 Make gpu and cpu calculation consistency
Browse files Browse the repository at this point in the history
  • Loading branch information
hleb-albau committed Nov 16, 2018
1 parent e54682d commit a8e8eb9
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 84 deletions.
5 changes: 3 additions & 2 deletions cosmos/poc/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
cmn "github.com/tendermint/tendermint/libs/common"
dbm "github.com/tendermint/tendermint/libs/db"
"github.com/tendermint/tendermint/libs/log"
"math"
"os"
"time"
)
Expand Down Expand Up @@ -146,8 +147,8 @@ func (app *CyberdApp) EndBlocker(ctx sdk.Context, _ abci.RequestEndBlock) abci.R
app.BaseApp.Logger.Info("Rank calculated", "steps", steps, "time", time.Since(start))

rankAsBytes := make([]byte, 8*len(newRank))
for i, ui64 := range newRank {
binary.LittleEndian.PutUint64(rankAsBytes[i*8:i*8+8], ui64)
for i, f64 := range newRank {
binary.LittleEndian.PutUint64(rankAsBytes[i*8:i*8+8], math.Float64bits(f64))
}

hash := sha256.Sum256(rankAsBytes)
Expand Down
86 changes: 37 additions & 49 deletions cosmos/poc/app/rank/calculate.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,34 +6,40 @@ import (
)

const (
one uint64 = 1000000000 // represents 1.000000000
d uint64 = 850000000 // represents 0.850000000
tolerance uint64 = 10000000 // represents 0.010000000
d float64 = 0.85
tolerance float64 = 1e-3
)

func CalculateRank(data *InMemoryStorage) ([]uint64, int) {
func CalculateRank(data *InMemoryStorage) ([]float64, int) {

size := data.GetCidsCount()
inLinks := data.GetInLinks()

size := data.GetCidsCount()
if size == 0 {
return []uint64{}, 0
return []float64{}, 0
}

prevrank := make([]uint64, size)
rank := make([]float64, size)
defaultRank := (1.0 - d) / float64(size)
danglingNodesSize := uint64(0)

tOverSize := (one - d) / size
danglingNodes := calculateDanglingNodes(data)

for _, i := range danglingNodes {
prevrank[i] = tOverSize
for i := range rank {
rank[i] = defaultRank
if len(inLinks[CidNumber(i)]) == 0 {
danglingNodesSize++
}
}

change := 2 * one
innerProductOverSize := defaultRank * (float64(danglingNodesSize) / float64(size))
defaultRankWithCorrection := float64(d*innerProductOverSize) + defaultRank

change := tolerance + 1

steps := 0
var rank []uint64
prevrank := make([]float64, 0)
prevrank = append(prevrank, rank...)
for change > tolerance {
rank = step(tOverSize, prevrank, danglingNodes, data)
rank = step(defaultRankWithCorrection, prevrank, data)
change = calculateChange(prevrank, rank)
prevrank = rank
steps++
Expand All @@ -42,32 +48,9 @@ func CalculateRank(data *InMemoryStorage) ([]uint64, int) {
return rank, steps
}

func calculateDanglingNodes(data *InMemoryStorage) []int64 {

cidsCount := data.GetCidsCount()
outLinks := data.GetInLinks()
danglingNodes := make([]int64, 0)
func step(defaultRankWithCorrection float64, prevrank []float64, data *InMemoryStorage) []float64 {

i := uint64(0)
for i < cidsCount {
if len(outLinks[CidNumber(i)]) == 0 {
danglingNodes = append(danglingNodes, int64(i))
}
i++
}

return danglingNodes
}

func step(tOverSize uint64, prevrank []uint64, danglingNodes []int64, data *InMemoryStorage) []uint64 {

innerProduct := uint64(0)
for _, danglingNode := range danglingNodes {
innerProduct += prevrank[danglingNode]
}

innerProductOverSize := innerProduct / uint64(len(prevrank))
rank := append(make([]uint64, 0, len(prevrank)), prevrank...)
rank := append(make([]float64, 0, len(prevrank)), prevrank...)

var wg sync.WaitGroup
wg.Add(len(data.GetInLinks()))
Expand All @@ -76,32 +59,37 @@ func step(tOverSize uint64, prevrank []uint64, danglingNodes []int64, data *InMe

go func(cid CidNumber, inLinks CidLinks) {
defer wg.Done()
ksum := uint64(0)
ksum := float64(0)

//todo dependent on range iterator order, that non-deterministic
for j := range inLinks {
linkStake := data.GetOverallLinkStake(CidNumber(j), CidNumber(cid))
jCidOutStake := data.GetOverallOutLinksStake(CidNumber(j))
ksum += prevrank[j] / (jCidOutStake / linkStake)
weight := float64(linkStake) / float64(jCidOutStake)
ksum = float64(prevrank[j]*weight) + ksum //force no-fma here by explicit conversion
}

// 17/20 = 0.85 = d
rank[cid] = (ksum+innerProductOverSize)/20*17 + tOverSize
rank[cid] = float64(ksum*d) + defaultRankWithCorrection //force no-fma here by explicit conversion
}(i, inLinksForI)
}
wg.Wait()
return rank
}

func calculateChange(prevrank, rank []uint64) uint64 {
func calculateChange(prevrank, rank []float64) float64 {

acc := uint64(0)
maxDiff := 0.0
diff := 0.0
for i, pForI := range prevrank {
if pForI > rank[i] {
acc += pForI - rank[i]
diff = pForI - rank[i]
} else {
acc += rank[i] - pForI
diff = rank[i] - pForI
}
if diff > maxDiff {
maxDiff = diff
}
}

return acc
return maxDiff
}
24 changes: 18 additions & 6 deletions cosmos/poc/app/storage/inmemory.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ import (

type RankedCidNumber struct {
cidNumber CidNumber
rank uint64
rank float64
}

type RankedCid struct {
Cid Cid
Rank uint64
Rank float64
}

type CidRankedLinks []RankedCidNumber
Expand All @@ -34,13 +34,21 @@ type InMemoryStorage struct {
cidsCount uint64
cidsNumbersIndexes map[Cid]CidNumber
cidsByNumberIndex map[CidNumber]Cid
cidRank []uint64 // array index is cid number
cidRank []float64 // array index is cid number

cidRankedLinksIndex []CidRankedLinks

userStake map[AccountNumber]uint64
}

func (s *InMemoryStorage) Empty() {
s.inLinks = make(map[CidNumber]CidLinks)
s.outLinks = make(map[CidNumber]CidLinks)
s.cidsNumbersIndexes = make(map[Cid]CidNumber)
s.cidsByNumberIndex = make(map[CidNumber]Cid)
s.userStake = make(map[AccountNumber]uint64)
}

// Load from underlying persistent storage
// Heavy operation
func (s *InMemoryStorage) Load(ctx sdk.Context, ps CyberdPersistentStorages, am auth.AccountKeeper) {
Expand Down Expand Up @@ -77,6 +85,10 @@ func (s *InMemoryStorage) UpdateStake(acc sdk.AccAddress, stake int64) {
s.userStake[AccountNumber(acc.String())] += uint64(stake)
}

func (s *InMemoryStorage) UpdateStakeByNumber(acc AccountNumber, stake int64) {
s.userStake[acc] += uint64(stake)
}

func (s *InMemoryStorage) AddLink(link LinkedCids) {

CidsLinks(s.outLinks).Put(link.FromCid, link.ToCid, link.Creator)
Expand Down Expand Up @@ -140,7 +152,7 @@ func (s *InMemoryStorage) GetCidRankedLinks(cid Cid, page, perPage int) ([]Ranke
return response, totalSize, nil
}

func (s *InMemoryStorage) UpdateRank(newCidRank []uint64) {
func (s *InMemoryStorage) UpdateRank(newCidRank []float64) {
s.cidRank = newCidRank
s.buildCidRankedLinksIndex()
}
Expand All @@ -158,7 +170,7 @@ func (s *InMemoryStorage) buildCidRankedLinksIndex() {
s.cidRankedLinksIndex = newIndex
}

func getLinksSortedByRank(cidOutLinks CidLinks, cidRank []uint64) CidRankedLinks {
func getLinksSortedByRank(cidOutLinks CidLinks, cidRank []float64) CidRankedLinks {
cidRankedLinks := make(CidRankedLinks, 0, len(cidOutLinks))
for linkedCidNumber := range cidOutLinks {
rankedCid := RankedCidNumber{cidNumber: linkedCidNumber, rank: cidRank[linkedCidNumber]}
Expand All @@ -170,7 +182,7 @@ func getLinksSortedByRank(cidOutLinks CidLinks, cidRank []uint64) CidRankedLinks

//
// GETTERS
func (s *InMemoryStorage) GetRank() []uint64 {
func (s *InMemoryStorage) GetRank() []float64 {
return s.cidRank
}

Expand Down
25 changes: 13 additions & 12 deletions cosmos/poc/cuda/README.md
Original file line number Diff line number Diff line change
@@ -1,24 +1,25 @@
# Cuda support

## Install required libs
```bash

```

## Development
To execute cuda code run.
To execute gpu and cpu rank computing ran:

```bash
nvcc -shared -o librank.so rank.cu --compiler-options '-fPIC -frounding-math -fsignaling-nans' && sudo cp librank.so /usr/lib/
nvcc -fmad=false -shared -o librank.so rank.cu --compiler-options '-fPIC -frounding-math -fsignaling-nans'
sudo cp librank.so /usr/lib/
go run *.go
```
After executing check ranks. They should match.


https://gcc.gnu.org/wiki/FloatingPointMath

While creating the shared libraries, position independent code should be produced. This helps the shared library
to get loaded as any address instead of some fixed address. For this -fPIC option is used.

'-frounding-math' is round-to-zero for all floating point to integer conversions, and round-to-nearest for all other arithmetic truncations.
to get loaded as any address instead of some fixed address. For this `-fPIC` option is used.

## Determinism

//fma is disabled for current version.

https://gcc.gnu.org/wiki/FloatingPointMath
'-frounding-math' is round-to-zero for all floating point to integer conversions, and round-to-nearest for
all other arithmetic truncations.


19 changes: 5 additions & 14 deletions cosmos/poc/cuda/rank.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ void run_rank_iteration(

double ksum = 0;
for (uint64_t j = inLinksStartIndex[i]; j < inLinksStartIndex[i] + inLinksCount[i]; j++) {
// ksum = prevRank[inLinks[j].fromIndex] * inLinks[j].weight + ksum
ksum = __fmaf_rn(prevRank[inLinks[j].fromIndex], inLinks[j].weight, ksum);
ksum = prevRank[inLinks[j].fromIndex] * inLinks[j].weight + ksum;
//ksum = __fmaf_rn(prevRank[inLinks[j].fromIndex], inLinks[j].weight, ksum);
}
// rank[i] = ksum * DUMP_FACTOR + defaultRank
rank[i] = __fmaf_rn(ksum, DUMP_FACTOR, defaultRankWithCorrection); // ksum * DUMP_FACTOR + defaultRank
rank[i] = ksum * DUMP_FACTOR + defaultRankWithCorrection;
//rank[i] = __fmaf_rn(ksum, DUMP_FACTOR, defaultRankWithCorrection);
}
}

Expand Down Expand Up @@ -226,11 +226,8 @@ extern "C" {
double *rank /* array index - cid index*/
) {

printf("Cuda...\n");

// STEP1: Calculate for each cid total stake by out links
/*-------------------------------------------------------------------*/
printf("Cuda step 1.\n");
uint64_t *d_outLinksStartIndex;
uint32_t *d_outLinksCount;
uint64_t *d_outLinksUsers;
Expand Down Expand Up @@ -262,7 +259,6 @@ extern "C" {

// STEP2: Calculate compressed in links count
/*-------------------------------------------------------------------*/
printf("Cuda step 2.\n");
uint64_t *d_inLinksStartIndex;
uint32_t *d_inLinksCount;
uint64_t *d_inLinksOuts;
Expand All @@ -287,7 +283,6 @@ extern "C" {

// STEP3: Calculate compressed in links start indexes
/*-------------------------------------------------------------------*/
printf("Cuda step 3.\n");
uint32_t *compressedInLinksCount = (uint32_t*) malloc(cidsSize*sizeof(uint32_t));
uint64_t *compressedInLinksStartIndex = (uint64_t*) malloc(cidsSize*sizeof(uint64_t));
cudaMemcpy(compressedInLinksCount, d_compressedInLinksCount, cidsSize * sizeof(uint32_t), cudaMemcpyDeviceToHost);
Expand All @@ -306,7 +301,6 @@ extern "C" {

// STEP4: Calculate compressed in links
/*-------------------------------------------------------------------*/
printf("Cuda step 4.\n");
uint64_t *d_inLinksUsers;
CompressedInLink *d_compressedInLinks; //calculated

Expand Down Expand Up @@ -334,7 +328,6 @@ extern "C" {

// STEP5: Calculate dangling nodes rank, and default rank
/*-------------------------------------------------------------------*/
printf("Cuda step 5.\n");
double defaultRank = (1.0 - DUMP_FACTOR) / cidsSize;
uint64_t danglingNodesSize = 0;
for(uint64_t i=0; i< cidsSize; i++){
Expand All @@ -344,7 +337,7 @@ extern "C" {
}
}

double innerProductOverSize = defaultRank * ( danglingNodesSize / cidsSize);
double innerProductOverSize = defaultRank * ((double) danglingNodesSize / (double)cidsSize);
double defaultRankWithCorrection = (DUMP_FACTOR * innerProductOverSize) + defaultRank; //fma point
/*-------------------------------------------------------------------*/

Expand All @@ -353,8 +346,6 @@ extern "C" {

// STEP6: Calculate rank
/*-------------------------------------------------------------------*/
printf("Calculating rank\n");

double *d_rank, *d_prevRank;

cudaMalloc(&d_rank, cidsSize*sizeof(double));
Expand Down
Loading

0 comments on commit a8e8eb9

Please sign in to comment.