Skip to content

Commit

Permalink
Merge pull request #2080 from psychocrypt/topic-reduceSharedMemUsage
Browse files Browse the repository at this point in the history
OpenCL: reduce local mem footprint
  • Loading branch information
fireice-uk authored Nov 20, 2018
2 parents a7e30eb + 6f28392 commit 2683009
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 31 deletions.
8 changes: 3 additions & 5 deletions xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
ulong b[2];
uint4 b_x[1];
#endif
__local uint AES0[256], AES1[256], AES2[256], AES3[256];
__local uint AES0[256], AES1[256];

// cryptonight_monero_v8
#if(ALGO==11)
Expand All @@ -577,8 +577,6 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
const uint tmp = AES0_C[i];
AES0[i] = tmp;
AES1[i] = rotate(tmp, 8U);
AES2[i] = rotate(tmp, 16U);
AES3[i] = rotate(tmp, 24U);
// cryptonight_monero_v8
#if(ALGO==11)
RCP[i] = RCP_C[i];
Expand Down Expand Up @@ -653,9 +651,9 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
((uint4 *)c)[0] = SCRATCHPAD_CHUNK(0);
// cryptonight_bittube2
#if(ALGO == 10)
((uint4 *)c)[0] = AES_Round_bittube2(AES0, AES1, AES2, AES3, ((uint4 *)c)[0], ((uint4 *)a)[0]);
((uint4 *)c)[0] = AES_Round2(AES0, AES1, ~((uint4 *)c)[0], ((uint4 *)a)[0]);
#else
((uint4 *)c)[0] = AES_Round(AES0, AES1, AES2, AES3, ((uint4 *)c)[0], ((uint4 *)a)[0]);
((uint4 *)c)[0] = AES_Round2(AES0, AES1, ((uint4 *)c)[0], ((uint4 *)a)[0]);
#endif

// cryptonight_monero_v8
Expand Down
59 changes: 33 additions & 26 deletions xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl
Original file line number Diff line number Diff line change
Expand Up @@ -74,42 +74,49 @@ static const __constant uint AES0_C[256] =

#define BYTE(x, y) (amd_bfe((x), (y) << 3U, 8U))

inline uint4 AES_Round_bittube2(const __local uint *AES0, const __local uint *AES1, const __local uint *AES2, const __local uint *AES3, uint4 x, uint4 k)
{
x = ~x;
k.s0 ^= AES0[BYTE(x.s0, 0)] ^ AES1[BYTE(x.s1, 1)] ^ AES2[BYTE(x.s2, 2)] ^ AES3[BYTE(x.s3, 3)];
x.s0 ^= k.s0;
k.s1 ^= AES0[BYTE(x.s1, 0)] ^ AES1[BYTE(x.s2, 1)] ^ AES2[BYTE(x.s3, 2)] ^ AES3[BYTE(x.s0, 3)];
x.s1 ^= k.s1;
k.s2 ^= AES0[BYTE(x.s2, 0)] ^ AES1[BYTE(x.s3, 1)] ^ AES2[BYTE(x.s0, 2)] ^ AES3[BYTE(x.s1, 3)];
x.s2 ^= k.s2;
k.s3 ^= AES0[BYTE(x.s3, 0)] ^ AES1[BYTE(x.s0, 1)] ^ AES2[BYTE(x.s1, 2)] ^ AES3[BYTE(x.s2, 3)];
return k;
}

uint4 AES_Round(const __local uint *AES0, const __local uint *AES1, const __local uint *AES2, const __local uint *AES3, const uint4 X, uint4 key)
{
key.s0 ^= AES0[BYTE(X.s0, 0)];
key.s1 ^= AES0[BYTE(X.s1, 0)];
key.s2 ^= AES0[BYTE(X.s2, 0)];
key.s3 ^= AES0[BYTE(X.s3, 0)];
key.s1 ^= AES0[BYTE(X.s1, 0)];
key.s2 ^= AES0[BYTE(X.s2, 0)];
key.s3 ^= AES0[BYTE(X.s3, 0)];

key.s0 ^= AES2[BYTE(X.s2, 2)];
key.s1 ^= AES2[BYTE(X.s3, 2)];
key.s2 ^= AES2[BYTE(X.s0, 2)];
key.s3 ^= AES2[BYTE(X.s1, 2)];
key.s1 ^= AES2[BYTE(X.s3, 2)];
key.s2 ^= AES2[BYTE(X.s0, 2)];
key.s3 ^= AES2[BYTE(X.s1, 2)];

key.s0 ^= AES1[BYTE(X.s1, 1)];
key.s1 ^= AES1[BYTE(X.s2, 1)];
key.s2 ^= AES1[BYTE(X.s3, 1)];
key.s3 ^= AES1[BYTE(X.s0, 1)];
key.s1 ^= AES1[BYTE(X.s2, 1)];
key.s2 ^= AES1[BYTE(X.s3, 1)];
key.s3 ^= AES1[BYTE(X.s0, 1)];

key.s0 ^= AES3[BYTE(X.s3, 3)];
key.s1 ^= AES3[BYTE(X.s0, 3)];
key.s2 ^= AES3[BYTE(X.s1, 3)];
key.s3 ^= AES3[BYTE(X.s2, 3)];
key.s1 ^= AES3[BYTE(X.s0, 3)];
key.s2 ^= AES3[BYTE(X.s1, 3)];
key.s3 ^= AES3[BYTE(X.s2, 3)];

return key;
}

uint4 AES_Round2(const __local uint *AES0, const __local uint *AES1, const uint4 X, uint4 key)
{
key.s0 ^= AES0[BYTE(X.s0, 0)];
key.s1 ^= AES0[BYTE(X.s1, 0)];
key.s2 ^= AES0[BYTE(X.s2, 0)];
key.s3 ^= AES0[BYTE(X.s3, 0)];

key.s0 ^= rotate(AES0[BYTE(X.s2, 2)] ^ AES1[BYTE(X.s3, 3)], 16u);
key.s1 ^= rotate(AES0[BYTE(X.s3, 2)] ^ AES1[BYTE(X.s0, 3)], 16u);
key.s2 ^= rotate(AES0[BYTE(X.s0, 2)] ^ AES1[BYTE(X.s1, 3)], 16u);
key.s3 ^= rotate(AES0[BYTE(X.s1, 2)] ^ AES1[BYTE(X.s2, 3)], 16u);

key.s0 ^= AES1[BYTE(X.s1, 1)];
key.s1 ^= AES1[BYTE(X.s2, 1)];
key.s2 ^= AES1[BYTE(X.s3, 1)];
key.s3 ^= AES1[BYTE(X.s0, 1)];

return key;
return key;
}

#endif
Expand Down

0 comments on commit 2683009

Please sign in to comment.