Skip to content

Commit

Permalink
Merge pull request fireice-uk#2084 from psychocrypt/topic-amd32bit
Browse files Browse the repository at this point in the history
AMD: use more 32bit operations
  • Loading branch information
fireice-uk authored Nov 21, 2018
2 parents de33fa3 + 58b922a commit 4065f9e
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 15 deletions.
9 changes: 4 additions & 5 deletions xmrstak/backend/amd/amd_gpu/gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
#endif



#ifdef _WIN32
#include <windows.h>
#include <Shlobj.h>
Expand Down Expand Up @@ -412,12 +411,12 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_

std::string options;
options += " -DITERATIONS=" + std::to_string(hashIterations);
options += " -DMASK=" + std::to_string(threadMemMask);
options += " -DWORKSIZE=" + std::to_string(ctx->workSize);
options += " -DMASK=" + std::to_string(threadMemMask) + "U";
options += " -DWORKSIZE=" + std::to_string(ctx->workSize) + "U";
options += " -DSTRIDED_INDEX=" + std::to_string(strided_index);
options += " -DMEM_CHUNK_EXPONENT=" + std::to_string(mem_chunk_exp);
options += " -DMEM_CHUNK_EXPONENT=" + std::to_string(mem_chunk_exp) + "U";
options += " -DCOMP_MODE=" + std::to_string(ctx->compMode ? 1u : 0u);
options += " -DMEMORY=" + std::to_string(hashMemSize);
options += " -DMEMORY=" + std::to_string(hashMemSize) + "LLU";
options += " -DALGO=" + std::to_string(miner_algo[ii]);
options += " -DCN_UNROLL=" + std::to_string(ctx->unroll);
/* AMD driver output is something like: `1445.5 (VM)`
Expand Down
20 changes: 10 additions & 10 deletions xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
Original file line number Diff line number Diff line change
Expand Up @@ -636,15 +636,15 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
if(gIdx < Threads)
#endif
{
ulong idx0 = a[0] & MASK;
uint idx0 = as_uint2(a[0]).s0 & MASK;

#pragma unroll CN_UNROLL
for(int i = 0; i < ITERATIONS; ++i)
{
ulong c[2];
// cryptonight_monero_v8 && NVIDIA
#if(ALGO==11 && defined(__NV_CL_C_VERSION))
ulong idxS = idx0 & 0x30;
uint idxS = idx0 & 0x30U;
*scratchpad_line = SCRATCHPAD_CHUNK_GLOBAL;
#endif

Expand Down Expand Up @@ -680,23 +680,23 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
# endif
b_x[0].s2 ^= ((table >> index) & 0x30U) << 24;
SCRATCHPAD_CHUNK(0) = b_x[0];
idx0 = c[0] & MASK;
idx0 = as_uint2(c[0]).s0 & MASK;
// cryptonight_monero_v8
#elif(ALGO==11)
SCRATCHPAD_CHUNK(0) = b_x[0] ^ ((uint4 *)c)[0];
# ifdef __NV_CL_C_VERSION
// flush shuffled data
SCRATCHPAD_CHUNK_GLOBAL = *scratchpad_line;
idx0 = c[0] & MASK;
idx0 = as_uint2(c[0]).s0 & MASK;
idxS = idx0 & 0x30;
*scratchpad_line = SCRATCHPAD_CHUNK_GLOBAL;
# else
idx0 = c[0] & MASK;
idx0 = as_uint2(c[0]).s0 & MASK;
# endif
#else
b_x[0] ^= ((uint4 *)c)[0];
SCRATCHPAD_CHUNK(0) = b_x[0];
idx0 = c[0] & MASK;
idx0 = as_uint2(c[0]).s0 & MASK;
#endif
uint4 tmp;
tmp = SCRATCHPAD_CHUNK(0);
Expand Down Expand Up @@ -763,22 +763,22 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
b_x[1] = b_x[0];
#endif
b_x[0] = ((uint4 *)c)[0];
idx0 = a[0] & MASK;
idx0 = as_uint2(a[0]).s0 & MASK;

// cryptonight_heavy || cryptonight_bittube2
#if (ALGO == 4 || ALGO == 10)
long n = *((__global long*)(Scratchpad + (IDX((idx0) >> 4))));
int d = ((__global int*)(Scratchpad + (IDX((idx0) >> 4))))[2];
long q = fast_div_heavy(n, d | 0x5);
*((__global long*)(Scratchpad + (IDX((idx0) >> 4)))) = n ^ q;
idx0 = (d ^ q) & MASK;
idx0 = (d ^ as_int2(q).s0) & MASK;
// cryptonight_haven
#elif (ALGO == 9)
long n = *((__global long*)(Scratchpad + (IDX((idx0) >> 4))));
int d = ((__global int*)(Scratchpad + (IDX((idx0) >> 4))))[2];
long q = fast_div_heavy(n, d | 0x5);
*((__global long*)(Scratchpad + (IDX((idx0) >> 4)))) = n ^ q;
idx0 = ((~d) ^ q) & MASK;
idx0 = ((~d) ^ as_int2(q).s0) & MASK;
#endif

}
Expand Down Expand Up @@ -1236,4 +1236,4 @@ __kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global
}
}

)==="
)==="

0 comments on commit 4065f9e

Please sign in to comment.