diff --git a/ErgoOpenCL.vcxproj b/ErgoOpenCL.vcxproj index f3a200c..96edff3 100755 --- a/ErgoOpenCL.vcxproj +++ b/ErgoOpenCL.vcxproj @@ -22,7 +22,7 @@ 15.0 {19C0E2B9-63F8-4C92-B426-8D0F7E4248BC} ErgoOpenCL - 8.1 + 10.0.17763.0 @@ -41,13 +41,13 @@ Application true - v140 + v141 MultiByte Application false - v140 + v141 true MultiByte @@ -70,10 +70,10 @@ - C:\boost_1_55_0\boost_1_55_0;C:\OpenSSL-Win64\include;E:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\include;$(IncludePath) + C:\boost_1_55_0\boost_1_55_0;C:\OpenSSL-Win64\include;F:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\include;$(IncludePath) - C:\boost_1_55_0\boost_1_55_0;C:\OpenSSL-Win64\include;E:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\include;$(IncludePath) + C:\boost_1_55_0\boost_1_55_0;C:\OpenSSL-Win64\include;F:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\include;$(IncludePath) @@ -93,7 +93,7 @@ WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) - C:\OpenSSL-Win64\lib;E:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\lib;C:\Program Files %28x86%29\AMD APP SDK\3.0\lib\x86_64;%(AdditionalLibraryDirectories) + C:\OpenSSL-Win64\lib;F:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\lib;C:\Program Files %28x86%29\AMD APP SDK\3.0\lib\x86_64;%(AdditionalLibraryDirectories) OpenCL.lib;libcurl.lib;libeay32.lib;%(AdditionalDependencies) @@ -126,7 +126,7 @@ true true OpenCL.lib;libcurl.lib;libeay32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) - C:\OpenSSL-Win64\lib;E:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\lib;C:\Program Files %28x86%29\AMD APP SDK\3.0\lib\x86_64;%(AdditionalLibraryDirectories) + C:\OpenSSL-Win64\lib;F:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\lib;C:\Program Files %28x86%29\AMD APP SDK\3.0\lib\x86_64;%(AdditionalLibraryDirectories) diff --git a/MiningKernel.cl b/MiningKernel.cl index b41ed47..2d14195 100755 --- a/MiningKernel.cl +++ b/MiningKernel.cl @@ -68,10 +68,10 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base, ((cl_ulong *)(aux))[14] = ~((cl_ulong *)(aux))[14]; - ((cl_ulong *)(aux))[16] = ((cl_ulong *)data)[0]; - ((cl_ulong *)(aux))[17] = ((cl_ulong *)data)[1]; - ((cl_ulong *)(aux))[18] = ((cl_ulong *)data)[2]; - ((cl_ulong *)(aux))[19] = ((cl_ulong *)data)[3]; + ((cl_ulong *)(aux))[16] = ((global cl_ulong *)data)[0]; + ((cl_ulong *)(aux))[17] = ((global cl_ulong *)data)[1]; + ((cl_ulong *)(aux))[18] = ((global cl_ulong *)data)[2]; + ((cl_ulong *)(aux))[19] = ((global cl_ulong *)data)[3]; ((cl_ulong *)(aux))[20] = tmp; ((cl_ulong *)(aux))[21] = 0; ((cl_ulong *)(aux))[22] = 0; @@ -114,10 +114,12 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base, h3 = h2 % N_LEN; //--------------------------read hash from lookup + cl_uint tmpL; #pragma unroll 8 for (int i = 0; i < 8; ++i) { - reverseBytesInt(hashes[(h3 << 3) + i], r[7 - i]); + tmpL = hashes[(h3 << 3) + i]; + reverseBytesInt(tmpL, r[7 - i]); } //------------------------------------------------------ @@ -136,7 +138,7 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base, bT[j] = ((uint8_t *)r)[j + 1]; #pragma unroll for (j = 31; j < 63; ++j) - bT[j] = ((uint8_t *)data)[j - 31]; + bT[j] = ((global uint8_t *)data)[j - 31]; #pragma unroll for (j = 63; j < 71; ++j) bT[j] = ((uint8_t *)&tmp)[j - 63]; diff --git a/PreHashKernel.cl b/PreHashKernel.cl index e57b5bb..2ccd1a4 100755 --- a/PreHashKernel.cl +++ b/PreHashKernel.cl @@ -1,155 +1,174 @@ #include "OCLdecs.h"////problem with relative path + +__constant uint8_t blake2b_sigma[12][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } +}; + +#pragma OPENCL EXTENSION cl_amd_media_ops : enable +#pragma OPENCL EXTENSION cl_amd_media_ops2 : enable + +ulong FAST_ROTL64_LO(const uint2 x, const uint y) { return(as_ulong(amd_bitalign(x, x.s10, 32 - y))); } +ulong FAST_ROTL64_HI(const uint2 x, const uint y) { return(as_ulong(amd_bitalign(x.s10, x, 32 - (y - 32)))); } + + +//#define rotr64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +ulong rotr64(const ulong x2, const uint y) +{ + uint2 x = as_uint2(x2); + if (y < 32) return(as_ulong(amd_bitalign(x.s10, x, y))); + else return(as_ulong(amd_bitalign(x, x.s10, (y - 32)))); +} + +#define G(m, r, i, a, b, c, d) do { \ + a += b + ((ulong *)m)[blake2b_sigma[r][i]]; \ + d = rotr64(d ^ a, 32);\ + c += d; \ + b = rotr64(b ^ c, 24); \ + a += b + ((ulong *)m)[blake2b_sigma[r][i + 1]]; \ + d = rotr64(d ^ a, 16); \ + c += d; \ + b = rotr64(b ^ c, 63); \ +} while(0) + + +#define BLAKE2B_RND(v, r, m) do { \ + G(m, r, 0, v[ 0], v[ 4], v[ 8], v[12]); \ + G(m, r, 2, v[ 1], v[ 5], v[ 9], v[13]); \ + G(m, r, 4, v[ 2], v[ 6], v[10], v[14]); \ + G(m, r, 6, v[ 3], v[ 7], v[11], v[15]); \ + G(m, r, 8, v[ 0], v[ 5], v[10], v[15]); \ + G(m, r, 10, v[ 1], v[ 6], v[11], v[12]); \ + G(m, r, 12, v[ 2], v[ 7], v[ 8], v[13]); \ + G(m, r, 14, v[ 3], v[ 4], v[ 9], v[14]); \ +} while(0) + + +inline void BlakeCompress(ulong *h, const ulong *m, ulong t, ulong f) +{ + ulong v[16]; + + ((ulong8 *)v)[0] = ((ulong8 *)h)[0]; + ((ulong8 *)v)[1] = (ulong8)(0x6A09E667F3BCC908UL, 0xBB67AE8584CAA73BUL, 0x3C6EF372FE94F82BUL, 0xA54FF53A5F1D36F1UL, 0x510E527FADE682D1UL, 0x9B05688C2B3E6C1FUL, 0x1F83D9ABFB41BD6BUL, 0x5BE0CD19137E2179UL); + + v[12] ^= t; + v[14] ^= f; + +#pragma unroll + for (int rnd = 0; rnd < 12; ++rnd) + { + BLAKE2B_RND(v, rnd, m); + } + + h[0] ^= v[0] ^ v[0 + 8]; + h[1] ^= v[1] ^ v[1 + 8]; + h[2] ^= v[2] ^ v[2 + 8]; + h[3] ^= v[3] ^ v[3 + 8]; + h[4] ^= v[4] ^ v[4 + 8]; + h[5] ^= v[5] ^ v[5 + 8]; + h[6] ^= v[6] ^ v[6 + 8]; + h[7] ^= v[7] ^ v[7 + 8]; +} + + //////////////////////////////////////////////////////////////////////////////// // First iteration of hashes precalculation //////////////////////////////////////////////////////////////////////////////// __kernel void InitPrehash( - // data: height - const cl_uint h, - // hashes - global cl_uint * hashes + // data: height + const cl_uint h, + // hashes + global cl_uint * hashes ) { - cl_uint tid =get_local_id(0); + cl_uint tid = get_local_id(0); - // shared memory - cl_uint height = h; + // shared memory + cl_uint height = h; + tid = get_global_id(0); + if (tid < N_LEN) + { + ulong h[8]; + ulong b[16]; + ulong t = 0; + //====================================================================// + // Initialize context + //====================================================================// + ((ulong8 *)h)[0] = (ulong8)(0x6A09E667F3BCC908UL, 0xBB67AE8584CAA73BUL, 0x3C6EF372FE94F82BUL, 0xA54FF53A5F1D36F1UL, 0x510E527FADE682D1UL, 0x9B05688C2B3E6C1FUL, 0x1F83D9ABFB41BD6BUL, 0x5BE0CD19137E2179UL); - tid = get_global_id(0); + h[0] ^= 0x01010020; - if (tid < N_LEN) - { + //====================================================================// + // Hash tid + //====================================================================// - cl_uint j; + ((uint *)b)[0] = as_uint(as_uchar4(tid).s3210); + //====================================================================// + // Hash height + //====================================================================// + ((uint *)b)[1] = height; - // local memory - // 472 bytes - cl_uint ldata[118]; + //====================================================================// + // Hash constant message + //====================================================================// - // 32 * 64 bits = 256 bytes - cl_ulong * aux = (cl_ulong *)ldata; - // (212 + 4) bytes - ctx_t * ctx = (ctx_t *)(ldata + 64); + ulong ctr = 0; + for (int x = 1; x < 16; ++x, ++ctr) + { + ((ulong *)b)[x] = as_ulong(as_uchar8(ctr).s76543210); + } - //====================================================================// - // Initialize context - //====================================================================// - //memset(ctx->b, 0, BUF_SIZE_8); - #pragma unroll - for (int am = 0; am < BUF_SIZE_8; am++) +#pragma unroll 1 + for (int z = 0; z < 63; ++z) + { + t += 128; + BlakeCompress((ulong *)h, (ulong *)b, t, 0UL); + +#pragma unroll + for (int x = 0; x < 16; ++x, ++ctr) { - ctx->b[am] = 0; + ((ulong *)b)[x] = as_ulong(as_uchar8(ctr).s76543210); } - B2B_IV(ctx->h); - + } + t += 128; + BlakeCompress((ulong *)h, (ulong *)b, t, 0UL); - ctx->h[0] ^= 0x01010000 ^ NUM_SIZE_8; - //memset(ctx->t, 0, 16); - ctx->t[0] = 0; - ctx->t[1] = 0; - ctx->c = 0; + ((ulong *)b)[0] = as_ulong(as_uchar8(ctr).s76543210); + t += 8; - - //====================================================================// - // Hash tid - //====================================================================// -#pragma unroll - for (j = 0; ctx->c < BUF_SIZE_8 && j < INDEX_SIZE_8; ++j) - { - ctx->b[ctx->c++] = ((const uint8_t *)&tid)[INDEX_SIZE_8 - j - 1]; - } - - //====================================================================// - // Hash height - //====================================================================// -#pragma unroll - for (j = 0; ctx->c < BUF_SIZE_8 && j < HEIGHT_SIZE ; ++j) - { - ctx->b[ctx->c++] = ((const uint8_t *)&height)[j/*HEIGHT_SIZE - j - 1*/]; - } - - //====================================================================// - // Hash constant message - //====================================================================// #pragma unroll - for (j = 0; ctx->c < BUF_SIZE_8 && j < CONST_MES_SIZE_8; ++j) - { - ctx->b[ctx->c++] - = ( - !((7 - (j & 7)) >> 1) - * ((j >> 3) >> (((~(j & 7)) & 1) << 3)) - ) & 0xFF; - } - - - while (j < CONST_MES_SIZE_8) - { - HOST_B2B_H(ctx, aux); - - for ( ; ctx->c < BUF_SIZE_8 && j < CONST_MES_SIZE_8; ++j) - { - ctx->b[ctx->c++] - = ( - !((7 - (j & 7)) >> 1) - * ((j >> 3) >> (((~(j & 7)) & 1) << 3)) - ) & 0xFF; - } - } - - - //====================================================================// - // Finalize hash - //====================================================================// - HOST_B2B_H_LAST(ctx, aux); -#pragma unroll - for (j = 0; j < NUM_SIZE_8; ++j) - { - ((uint8_t *)ldata)[NUM_SIZE_8 - j - 1] - = (ctx->h[j >> 3] >> ((j & 7) << 3)) & 0xFF; - } + for (int i = 1; i < 16; ++i) ((ulong *)b)[i] = 0UL; + BlakeCompress((ulong *)h, (ulong *)b, t, 0xFFFFFFFFFFFFFFFFUL); - //====================================================================// - // Dump result to global memory -- BIG ENDIAN - //====================================================================// + //====================================================================// + // Dump result to global memory -- BIG ENDIAN + //====================================================================// #pragma unroll - for (int i = 0; i < NUM_SIZE_8; ++i) - { - ((uint8_t __global *)hashes)[(tid + 1) * NUM_SIZE_8 - i - 1] - = ((uint8_t *)ldata)[i]; - } - - - j = ((cl_ulong *)ldata)[3] < Q3 - || ((cl_ulong *)ldata)[3] == Q3 && ( - ((cl_ulong *)ldata)[2] < Q2 - || ((cl_ulong *)ldata)[2] == Q2 && ( - ((cl_ulong *)ldata)[1] < Q1 - || ((cl_ulong *)ldata)[1] == Q1 - && ((cl_ulong *)ldata)[0] < Q0 - ) - ); - - - - #pragma unroll - for (int i = 0; i < NUM_SIZE_8-1; ++i) - { - //((uint8_t global*)hashes)[(tid + 1) * NUM_SIZE_8 - i -1] = ((uint8_t *)ldata)[i]; - ((uint8_t global*)hashes)[tid * NUM_SIZE_8 +i ] = ((uint8_t *)ldata)[i]; - } - // drop - ((uint8_t global*)hashes)[tid * NUM_SIZE_8 +31 ] = 0; + for (int i = 0; i < 4; ++i) ((__global ulong *)hashes)[(tid + 1) * 4 - i - 1] = as_ulong(as_uchar8(h[i]).s76543210); + + ((__global uchar *)hashes)[tid * 32 + 31] = 0; } - return; + return; } diff --git a/Ubuntu/ErgoOpenCL b/Ubuntu/ErgoOpenCL deleted file mode 100755 index beff66b..0000000 Binary files a/Ubuntu/ErgoOpenCL and /dev/null differ diff --git a/Ubuntu/MiningKernel.cl b/Ubuntu/MiningKernel.cl index b41ed47..2d14195 100755 --- a/Ubuntu/MiningKernel.cl +++ b/Ubuntu/MiningKernel.cl @@ -68,10 +68,10 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base, ((cl_ulong *)(aux))[14] = ~((cl_ulong *)(aux))[14]; - ((cl_ulong *)(aux))[16] = ((cl_ulong *)data)[0]; - ((cl_ulong *)(aux))[17] = ((cl_ulong *)data)[1]; - ((cl_ulong *)(aux))[18] = ((cl_ulong *)data)[2]; - ((cl_ulong *)(aux))[19] = ((cl_ulong *)data)[3]; + ((cl_ulong *)(aux))[16] = ((global cl_ulong *)data)[0]; + ((cl_ulong *)(aux))[17] = ((global cl_ulong *)data)[1]; + ((cl_ulong *)(aux))[18] = ((global cl_ulong *)data)[2]; + ((cl_ulong *)(aux))[19] = ((global cl_ulong *)data)[3]; ((cl_ulong *)(aux))[20] = tmp; ((cl_ulong *)(aux))[21] = 0; ((cl_ulong *)(aux))[22] = 0; @@ -114,10 +114,12 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base, h3 = h2 % N_LEN; //--------------------------read hash from lookup + cl_uint tmpL; #pragma unroll 8 for (int i = 0; i < 8; ++i) { - reverseBytesInt(hashes[(h3 << 3) + i], r[7 - i]); + tmpL = hashes[(h3 << 3) + i]; + reverseBytesInt(tmpL, r[7 - i]); } //------------------------------------------------------ @@ -136,7 +138,7 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base, bT[j] = ((uint8_t *)r)[j + 1]; #pragma unroll for (j = 31; j < 63; ++j) - bT[j] = ((uint8_t *)data)[j - 31]; + bT[j] = ((global uint8_t *)data)[j - 31]; #pragma unroll for (j = 63; j < 71; ++j) bT[j] = ((uint8_t *)&tmp)[j - 63]; diff --git a/Ubuntu/PreHashKernel.cl b/Ubuntu/PreHashKernel.cl index e57b5bb..2ccd1a4 100755 --- a/Ubuntu/PreHashKernel.cl +++ b/Ubuntu/PreHashKernel.cl @@ -1,155 +1,174 @@ #include "OCLdecs.h"////problem with relative path + +__constant uint8_t blake2b_sigma[12][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } +}; + +#pragma OPENCL EXTENSION cl_amd_media_ops : enable +#pragma OPENCL EXTENSION cl_amd_media_ops2 : enable + +ulong FAST_ROTL64_LO(const uint2 x, const uint y) { return(as_ulong(amd_bitalign(x, x.s10, 32 - y))); } +ulong FAST_ROTL64_HI(const uint2 x, const uint y) { return(as_ulong(amd_bitalign(x.s10, x, 32 - (y - 32)))); } + + +//#define rotr64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +ulong rotr64(const ulong x2, const uint y) +{ + uint2 x = as_uint2(x2); + if (y < 32) return(as_ulong(amd_bitalign(x.s10, x, y))); + else return(as_ulong(amd_bitalign(x, x.s10, (y - 32)))); +} + +#define G(m, r, i, a, b, c, d) do { \ + a += b + ((ulong *)m)[blake2b_sigma[r][i]]; \ + d = rotr64(d ^ a, 32);\ + c += d; \ + b = rotr64(b ^ c, 24); \ + a += b + ((ulong *)m)[blake2b_sigma[r][i + 1]]; \ + d = rotr64(d ^ a, 16); \ + c += d; \ + b = rotr64(b ^ c, 63); \ +} while(0) + + +#define BLAKE2B_RND(v, r, m) do { \ + G(m, r, 0, v[ 0], v[ 4], v[ 8], v[12]); \ + G(m, r, 2, v[ 1], v[ 5], v[ 9], v[13]); \ + G(m, r, 4, v[ 2], v[ 6], v[10], v[14]); \ + G(m, r, 6, v[ 3], v[ 7], v[11], v[15]); \ + G(m, r, 8, v[ 0], v[ 5], v[10], v[15]); \ + G(m, r, 10, v[ 1], v[ 6], v[11], v[12]); \ + G(m, r, 12, v[ 2], v[ 7], v[ 8], v[13]); \ + G(m, r, 14, v[ 3], v[ 4], v[ 9], v[14]); \ +} while(0) + + +inline void BlakeCompress(ulong *h, const ulong *m, ulong t, ulong f) +{ + ulong v[16]; + + ((ulong8 *)v)[0] = ((ulong8 *)h)[0]; + ((ulong8 *)v)[1] = (ulong8)(0x6A09E667F3BCC908UL, 0xBB67AE8584CAA73BUL, 0x3C6EF372FE94F82BUL, 0xA54FF53A5F1D36F1UL, 0x510E527FADE682D1UL, 0x9B05688C2B3E6C1FUL, 0x1F83D9ABFB41BD6BUL, 0x5BE0CD19137E2179UL); + + v[12] ^= t; + v[14] ^= f; + +#pragma unroll + for (int rnd = 0; rnd < 12; ++rnd) + { + BLAKE2B_RND(v, rnd, m); + } + + h[0] ^= v[0] ^ v[0 + 8]; + h[1] ^= v[1] ^ v[1 + 8]; + h[2] ^= v[2] ^ v[2 + 8]; + h[3] ^= v[3] ^ v[3 + 8]; + h[4] ^= v[4] ^ v[4 + 8]; + h[5] ^= v[5] ^ v[5 + 8]; + h[6] ^= v[6] ^ v[6 + 8]; + h[7] ^= v[7] ^ v[7 + 8]; +} + + //////////////////////////////////////////////////////////////////////////////// // First iteration of hashes precalculation //////////////////////////////////////////////////////////////////////////////// __kernel void InitPrehash( - // data: height - const cl_uint h, - // hashes - global cl_uint * hashes + // data: height + const cl_uint h, + // hashes + global cl_uint * hashes ) { - cl_uint tid =get_local_id(0); + cl_uint tid = get_local_id(0); - // shared memory - cl_uint height = h; + // shared memory + cl_uint height = h; + tid = get_global_id(0); + if (tid < N_LEN) + { + ulong h[8]; + ulong b[16]; + ulong t = 0; + //====================================================================// + // Initialize context + //====================================================================// + ((ulong8 *)h)[0] = (ulong8)(0x6A09E667F3BCC908UL, 0xBB67AE8584CAA73BUL, 0x3C6EF372FE94F82BUL, 0xA54FF53A5F1D36F1UL, 0x510E527FADE682D1UL, 0x9B05688C2B3E6C1FUL, 0x1F83D9ABFB41BD6BUL, 0x5BE0CD19137E2179UL); - tid = get_global_id(0); + h[0] ^= 0x01010020; - if (tid < N_LEN) - { + //====================================================================// + // Hash tid + //====================================================================// - cl_uint j; + ((uint *)b)[0] = as_uint(as_uchar4(tid).s3210); + //====================================================================// + // Hash height + //====================================================================// + ((uint *)b)[1] = height; - // local memory - // 472 bytes - cl_uint ldata[118]; + //====================================================================// + // Hash constant message + //====================================================================// - // 32 * 64 bits = 256 bytes - cl_ulong * aux = (cl_ulong *)ldata; - // (212 + 4) bytes - ctx_t * ctx = (ctx_t *)(ldata + 64); + ulong ctr = 0; + for (int x = 1; x < 16; ++x, ++ctr) + { + ((ulong *)b)[x] = as_ulong(as_uchar8(ctr).s76543210); + } - //====================================================================// - // Initialize context - //====================================================================// - //memset(ctx->b, 0, BUF_SIZE_8); - #pragma unroll - for (int am = 0; am < BUF_SIZE_8; am++) +#pragma unroll 1 + for (int z = 0; z < 63; ++z) + { + t += 128; + BlakeCompress((ulong *)h, (ulong *)b, t, 0UL); + +#pragma unroll + for (int x = 0; x < 16; ++x, ++ctr) { - ctx->b[am] = 0; + ((ulong *)b)[x] = as_ulong(as_uchar8(ctr).s76543210); } - B2B_IV(ctx->h); - + } + t += 128; + BlakeCompress((ulong *)h, (ulong *)b, t, 0UL); - ctx->h[0] ^= 0x01010000 ^ NUM_SIZE_8; - //memset(ctx->t, 0, 16); - ctx->t[0] = 0; - ctx->t[1] = 0; - ctx->c = 0; + ((ulong *)b)[0] = as_ulong(as_uchar8(ctr).s76543210); + t += 8; - - //====================================================================// - // Hash tid - //====================================================================// -#pragma unroll - for (j = 0; ctx->c < BUF_SIZE_8 && j < INDEX_SIZE_8; ++j) - { - ctx->b[ctx->c++] = ((const uint8_t *)&tid)[INDEX_SIZE_8 - j - 1]; - } - - //====================================================================// - // Hash height - //====================================================================// -#pragma unroll - for (j = 0; ctx->c < BUF_SIZE_8 && j < HEIGHT_SIZE ; ++j) - { - ctx->b[ctx->c++] = ((const uint8_t *)&height)[j/*HEIGHT_SIZE - j - 1*/]; - } - - //====================================================================// - // Hash constant message - //====================================================================// #pragma unroll - for (j = 0; ctx->c < BUF_SIZE_8 && j < CONST_MES_SIZE_8; ++j) - { - ctx->b[ctx->c++] - = ( - !((7 - (j & 7)) >> 1) - * ((j >> 3) >> (((~(j & 7)) & 1) << 3)) - ) & 0xFF; - } - - - while (j < CONST_MES_SIZE_8) - { - HOST_B2B_H(ctx, aux); - - for ( ; ctx->c < BUF_SIZE_8 && j < CONST_MES_SIZE_8; ++j) - { - ctx->b[ctx->c++] - = ( - !((7 - (j & 7)) >> 1) - * ((j >> 3) >> (((~(j & 7)) & 1) << 3)) - ) & 0xFF; - } - } - - - //====================================================================// - // Finalize hash - //====================================================================// - HOST_B2B_H_LAST(ctx, aux); -#pragma unroll - for (j = 0; j < NUM_SIZE_8; ++j) - { - ((uint8_t *)ldata)[NUM_SIZE_8 - j - 1] - = (ctx->h[j >> 3] >> ((j & 7) << 3)) & 0xFF; - } + for (int i = 1; i < 16; ++i) ((ulong *)b)[i] = 0UL; + BlakeCompress((ulong *)h, (ulong *)b, t, 0xFFFFFFFFFFFFFFFFUL); - //====================================================================// - // Dump result to global memory -- BIG ENDIAN - //====================================================================// + //====================================================================// + // Dump result to global memory -- BIG ENDIAN + //====================================================================// #pragma unroll - for (int i = 0; i < NUM_SIZE_8; ++i) - { - ((uint8_t __global *)hashes)[(tid + 1) * NUM_SIZE_8 - i - 1] - = ((uint8_t *)ldata)[i]; - } - - - j = ((cl_ulong *)ldata)[3] < Q3 - || ((cl_ulong *)ldata)[3] == Q3 && ( - ((cl_ulong *)ldata)[2] < Q2 - || ((cl_ulong *)ldata)[2] == Q2 && ( - ((cl_ulong *)ldata)[1] < Q1 - || ((cl_ulong *)ldata)[1] == Q1 - && ((cl_ulong *)ldata)[0] < Q0 - ) - ); - - - - #pragma unroll - for (int i = 0; i < NUM_SIZE_8-1; ++i) - { - //((uint8_t global*)hashes)[(tid + 1) * NUM_SIZE_8 - i -1] = ((uint8_t *)ldata)[i]; - ((uint8_t global*)hashes)[tid * NUM_SIZE_8 +i ] = ((uint8_t *)ldata)[i]; - } - // drop - ((uint8_t global*)hashes)[tid * NUM_SIZE_8 +31 ] = 0; + for (int i = 0; i < 4; ++i) ((__global ulong *)hashes)[(tid + 1) * 4 - i - 1] = as_ulong(as_uchar8(h[i]).s76543210); + + ((__global uchar *)hashes)[tid * 32 + 31] = 0; } - return; + return; } diff --git a/Ubuntu/config.json b/Ubuntu/config.json deleted file mode 100755 index 79743c2..0000000 --- a/Ubuntu/config.json +++ /dev/null @@ -1 +0,0 @@ -{ "node": "http://37.156.20.158:3028" } diff --git a/clMining.cpp b/clMining.cpp index 8c88330..2c4ccbc 100755 --- a/clMining.cpp +++ b/clMining.cpp @@ -4,7 +4,7 @@ MiningClass::MiningClass(CLWarpper *cll) { cl = cll; - const string buildOptions = " -cl-std=CL2 -I ."; + const string buildOptions = " -w -I ."; program = cl->buildProgramFromFile("MiningKernel.cl", buildOptions); int tr; diff --git a/clPreHash.cpp b/clPreHash.cpp index 7d1d5aa..1fa4532 100755 --- a/clPreHash.cpp +++ b/clPreHash.cpp @@ -3,7 +3,7 @@ PreHashClass::PreHashClass(CLWarpper *cll) { cl = cll; - const string buildOptions = " -w -I ."; + const string buildOptions = " -w -I ."; program = cl->buildProgramFromFile("PreHashKernel.cl", buildOptions); } diff --git a/config.json b/config.json new file mode 100644 index 0000000..2625f2f --- /dev/null +++ b/config.json @@ -0,0 +1 @@ +{ "node": "http://127.0.0.1:3000" } diff --git a/definitions.h b/definitions.h index 97161e4..c065a5c 100755 --- a/definitions.h +++ b/definitions.h @@ -43,6 +43,7 @@ struct info_t int keepPrehash; char to[MAX_URL_SIZE]; char endJob[MAX_URL_SIZE]; + bool doJob; uint8_t Hblock[HEIGHT_SIZE]; char stratumMode; diff --git a/ergoAutolykos.cpp b/ergoAutolykos.cpp index b2818f1..60639eb 100755 --- a/ergoAutolykos.cpp +++ b/ergoAutolykos.cpp @@ -180,6 +180,7 @@ void ergoAutolykos::MinerThread(CLWarpper *clw, const int deviceId, const int t cl_ulong base = 0; cl_ulong EndNonce = 0; cl_uint height = 0; + PreHashClass *ph = new PreHashClass(clw); MiningClass *min = new MiningClass(clw); @@ -224,6 +225,10 @@ void ergoAutolykos::MinerThread(CLWarpper *clw, const int deviceId, const int t state = STATE_CONTINUE; } + while (!info->doJob) + { + //LOG(INFO) << "GPU " << deviceId << " problem in proxy "; + } uint_t controlId = info->blockId.load(); @@ -271,9 +276,18 @@ void ergoAutolykos::MinerThread(CLWarpper *clw, const int deviceId, const int t ret = clw->CopyBuffer(data_d, hdata_d, (NUM_SIZE_8 ) * sizeof(char), false); + ch::milliseconds startP = ch::duration_cast( + ch::system_clock::now().time_since_epoch() + ); + //LOG(INFO) << "Starting prehashing with new block data"; ph->Prehash(height, hashes_d); + ch::milliseconds ms = ch::milliseconds::zero(); + ms = ch::duration_cast( + ch::system_clock::now().time_since_epoch() + ) - startP; + LOG(INFO) << "Prehash time: " << ms.count() << " ms"; //LOG(INFO) << "Starting InitMining"; //min->InitMining(&ctx_h, (cl_uint*)mes_h, NUM_SIZE_8); diff --git a/request.cc b/request.cc index 93b1546..72c0aab 100755 --- a/request.cc +++ b/request.cc @@ -85,7 +85,7 @@ void CurlLogError(CURLcode curl_status) // moved to separate function for tests /////////////////////////////////////////////////////////////////////////////// -int ParseRequest(json_t * oldreq, json_t * newreq, info_t *info, int checkPubKey) +int ParseRequest(json_t * oldreq, json_t * newreq, info_t *info, int checkPubKey, long http_code) { //LOG(INFO) << "Current block candidate: " << newreq->ptr; jsmn_parser parser; @@ -154,12 +154,21 @@ int ParseRequest(json_t * oldreq, json_t * newreq, info_t *info, int checkPubKey //(HPos == -1) ? info->AlgVer = 1 : info->AlgVer = 2; if ( BoundPos < 0 || MesPos < 0 || HPos < 0 ) { - LOG(ERROR) << "Some of expected fields not present in /block/candidate"; LOG(ERROR) << "Block data: " << newreq->ptr; + if (BoundPos < 0 && MesPos < 0 && HPos < 0 && http_code == 200) + { + LOG(ERROR) << "problem in proxy connection"; + info->doJob = false; + + } + else + { + LOG(ERROR) << "Some of expected fields not present in /block/candidate"; + } return EXIT_FAILURE; } - + info->doJob = true; if (checkPubKey) { if (newreq->GetTokenLen(PkPos) != PK_SIZE_4) @@ -416,6 +425,8 @@ int GetLatestBlock( CurlLogError(curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 10L)); CurlLogError(curl_easy_setopt(curl, CURLOPT_TIMEOUT, 30L)); curlError = curl_easy_perform(curl); + long http_code = 0; + curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code); CurlLogError(curlError); curl_easy_cleanup(curl); @@ -426,7 +437,7 @@ int GetLatestBlock( { int oldId = info->blockId.load(); - if (ParseRequest(oldreq, &newreq, info, checkPubKey) != EXIT_SUCCESS) + if (ParseRequest(oldreq, &newreq, info, checkPubKey,http_code) != EXIT_SUCCESS) { return EXIT_FAILURE; } diff --git a/request.h b/request.h index de61a27..63691a6 100755 --- a/request.h +++ b/request.h @@ -32,7 +32,8 @@ int ParseRequest( json_t * oldreq , json_t * newreq, info_t *info, - int checkPubKey + int checkPubKey, + long http_code ); // CURL http GET request diff --git a/win64/ErgoOpenCL.exe b/win64/ErgoOpenCL.exe index dd3a935..690959b 100755 Binary files a/win64/ErgoOpenCL.exe and b/win64/ErgoOpenCL.exe differ diff --git a/win64/MiningKernel.cl b/win64/MiningKernel.cl index b41ed47..2d14195 100755 --- a/win64/MiningKernel.cl +++ b/win64/MiningKernel.cl @@ -68,10 +68,10 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base, ((cl_ulong *)(aux))[14] = ~((cl_ulong *)(aux))[14]; - ((cl_ulong *)(aux))[16] = ((cl_ulong *)data)[0]; - ((cl_ulong *)(aux))[17] = ((cl_ulong *)data)[1]; - ((cl_ulong *)(aux))[18] = ((cl_ulong *)data)[2]; - ((cl_ulong *)(aux))[19] = ((cl_ulong *)data)[3]; + ((cl_ulong *)(aux))[16] = ((global cl_ulong *)data)[0]; + ((cl_ulong *)(aux))[17] = ((global cl_ulong *)data)[1]; + ((cl_ulong *)(aux))[18] = ((global cl_ulong *)data)[2]; + ((cl_ulong *)(aux))[19] = ((global cl_ulong *)data)[3]; ((cl_ulong *)(aux))[20] = tmp; ((cl_ulong *)(aux))[21] = 0; ((cl_ulong *)(aux))[22] = 0; @@ -114,10 +114,12 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base, h3 = h2 % N_LEN; //--------------------------read hash from lookup + cl_uint tmpL; #pragma unroll 8 for (int i = 0; i < 8; ++i) { - reverseBytesInt(hashes[(h3 << 3) + i], r[7 - i]); + tmpL = hashes[(h3 << 3) + i]; + reverseBytesInt(tmpL, r[7 - i]); } //------------------------------------------------------ @@ -136,7 +138,7 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base, bT[j] = ((uint8_t *)r)[j + 1]; #pragma unroll for (j = 31; j < 63; ++j) - bT[j] = ((uint8_t *)data)[j - 31]; + bT[j] = ((global uint8_t *)data)[j - 31]; #pragma unroll for (j = 63; j < 71; ++j) bT[j] = ((uint8_t *)&tmp)[j - 63]; diff --git a/win64/PreHashKernel.cl b/win64/PreHashKernel.cl index e57b5bb..2ccd1a4 100755 --- a/win64/PreHashKernel.cl +++ b/win64/PreHashKernel.cl @@ -1,155 +1,174 @@ #include "OCLdecs.h"////problem with relative path + +__constant uint8_t blake2b_sigma[12][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } +}; + +#pragma OPENCL EXTENSION cl_amd_media_ops : enable +#pragma OPENCL EXTENSION cl_amd_media_ops2 : enable + +ulong FAST_ROTL64_LO(const uint2 x, const uint y) { return(as_ulong(amd_bitalign(x, x.s10, 32 - y))); } +ulong FAST_ROTL64_HI(const uint2 x, const uint y) { return(as_ulong(amd_bitalign(x.s10, x, 32 - (y - 32)))); } + + +//#define rotr64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +ulong rotr64(const ulong x2, const uint y) +{ + uint2 x = as_uint2(x2); + if (y < 32) return(as_ulong(amd_bitalign(x.s10, x, y))); + else return(as_ulong(amd_bitalign(x, x.s10, (y - 32)))); +} + +#define G(m, r, i, a, b, c, d) do { \ + a += b + ((ulong *)m)[blake2b_sigma[r][i]]; \ + d = rotr64(d ^ a, 32);\ + c += d; \ + b = rotr64(b ^ c, 24); \ + a += b + ((ulong *)m)[blake2b_sigma[r][i + 1]]; \ + d = rotr64(d ^ a, 16); \ + c += d; \ + b = rotr64(b ^ c, 63); \ +} while(0) + + +#define BLAKE2B_RND(v, r, m) do { \ + G(m, r, 0, v[ 0], v[ 4], v[ 8], v[12]); \ + G(m, r, 2, v[ 1], v[ 5], v[ 9], v[13]); \ + G(m, r, 4, v[ 2], v[ 6], v[10], v[14]); \ + G(m, r, 6, v[ 3], v[ 7], v[11], v[15]); \ + G(m, r, 8, v[ 0], v[ 5], v[10], v[15]); \ + G(m, r, 10, v[ 1], v[ 6], v[11], v[12]); \ + G(m, r, 12, v[ 2], v[ 7], v[ 8], v[13]); \ + G(m, r, 14, v[ 3], v[ 4], v[ 9], v[14]); \ +} while(0) + + +inline void BlakeCompress(ulong *h, const ulong *m, ulong t, ulong f) +{ + ulong v[16]; + + ((ulong8 *)v)[0] = ((ulong8 *)h)[0]; + ((ulong8 *)v)[1] = (ulong8)(0x6A09E667F3BCC908UL, 0xBB67AE8584CAA73BUL, 0x3C6EF372FE94F82BUL, 0xA54FF53A5F1D36F1UL, 0x510E527FADE682D1UL, 0x9B05688C2B3E6C1FUL, 0x1F83D9ABFB41BD6BUL, 0x5BE0CD19137E2179UL); + + v[12] ^= t; + v[14] ^= f; + +#pragma unroll + for (int rnd = 0; rnd < 12; ++rnd) + { + BLAKE2B_RND(v, rnd, m); + } + + h[0] ^= v[0] ^ v[0 + 8]; + h[1] ^= v[1] ^ v[1 + 8]; + h[2] ^= v[2] ^ v[2 + 8]; + h[3] ^= v[3] ^ v[3 + 8]; + h[4] ^= v[4] ^ v[4 + 8]; + h[5] ^= v[5] ^ v[5 + 8]; + h[6] ^= v[6] ^ v[6 + 8]; + h[7] ^= v[7] ^ v[7 + 8]; +} + + //////////////////////////////////////////////////////////////////////////////// // First iteration of hashes precalculation //////////////////////////////////////////////////////////////////////////////// __kernel void InitPrehash( - // data: height - const cl_uint h, - // hashes - global cl_uint * hashes + // data: height + const cl_uint h, + // hashes + global cl_uint * hashes ) { - cl_uint tid =get_local_id(0); + cl_uint tid = get_local_id(0); - // shared memory - cl_uint height = h; + // shared memory + cl_uint height = h; + tid = get_global_id(0); + if (tid < N_LEN) + { + ulong h[8]; + ulong b[16]; + ulong t = 0; + //====================================================================// + // Initialize context + //====================================================================// + ((ulong8 *)h)[0] = (ulong8)(0x6A09E667F3BCC908UL, 0xBB67AE8584CAA73BUL, 0x3C6EF372FE94F82BUL, 0xA54FF53A5F1D36F1UL, 0x510E527FADE682D1UL, 0x9B05688C2B3E6C1FUL, 0x1F83D9ABFB41BD6BUL, 0x5BE0CD19137E2179UL); - tid = get_global_id(0); + h[0] ^= 0x01010020; - if (tid < N_LEN) - { + //====================================================================// + // Hash tid + //====================================================================// - cl_uint j; + ((uint *)b)[0] = as_uint(as_uchar4(tid).s3210); + //====================================================================// + // Hash height + //====================================================================// + ((uint *)b)[1] = height; - // local memory - // 472 bytes - cl_uint ldata[118]; + //====================================================================// + // Hash constant message + //====================================================================// - // 32 * 64 bits = 256 bytes - cl_ulong * aux = (cl_ulong *)ldata; - // (212 + 4) bytes - ctx_t * ctx = (ctx_t *)(ldata + 64); + ulong ctr = 0; + for (int x = 1; x < 16; ++x, ++ctr) + { + ((ulong *)b)[x] = as_ulong(as_uchar8(ctr).s76543210); + } - //====================================================================// - // Initialize context - //====================================================================// - //memset(ctx->b, 0, BUF_SIZE_8); - #pragma unroll - for (int am = 0; am < BUF_SIZE_8; am++) +#pragma unroll 1 + for (int z = 0; z < 63; ++z) + { + t += 128; + BlakeCompress((ulong *)h, (ulong *)b, t, 0UL); + +#pragma unroll + for (int x = 0; x < 16; ++x, ++ctr) { - ctx->b[am] = 0; + ((ulong *)b)[x] = as_ulong(as_uchar8(ctr).s76543210); } - B2B_IV(ctx->h); - + } + t += 128; + BlakeCompress((ulong *)h, (ulong *)b, t, 0UL); - ctx->h[0] ^= 0x01010000 ^ NUM_SIZE_8; - //memset(ctx->t, 0, 16); - ctx->t[0] = 0; - ctx->t[1] = 0; - ctx->c = 0; + ((ulong *)b)[0] = as_ulong(as_uchar8(ctr).s76543210); + t += 8; - - //====================================================================// - // Hash tid - //====================================================================// -#pragma unroll - for (j = 0; ctx->c < BUF_SIZE_8 && j < INDEX_SIZE_8; ++j) - { - ctx->b[ctx->c++] = ((const uint8_t *)&tid)[INDEX_SIZE_8 - j - 1]; - } - - //====================================================================// - // Hash height - //====================================================================// -#pragma unroll - for (j = 0; ctx->c < BUF_SIZE_8 && j < HEIGHT_SIZE ; ++j) - { - ctx->b[ctx->c++] = ((const uint8_t *)&height)[j/*HEIGHT_SIZE - j - 1*/]; - } - - //====================================================================// - // Hash constant message - //====================================================================// #pragma unroll - for (j = 0; ctx->c < BUF_SIZE_8 && j < CONST_MES_SIZE_8; ++j) - { - ctx->b[ctx->c++] - = ( - !((7 - (j & 7)) >> 1) - * ((j >> 3) >> (((~(j & 7)) & 1) << 3)) - ) & 0xFF; - } - - - while (j < CONST_MES_SIZE_8) - { - HOST_B2B_H(ctx, aux); - - for ( ; ctx->c < BUF_SIZE_8 && j < CONST_MES_SIZE_8; ++j) - { - ctx->b[ctx->c++] - = ( - !((7 - (j & 7)) >> 1) - * ((j >> 3) >> (((~(j & 7)) & 1) << 3)) - ) & 0xFF; - } - } - - - //====================================================================// - // Finalize hash - //====================================================================// - HOST_B2B_H_LAST(ctx, aux); -#pragma unroll - for (j = 0; j < NUM_SIZE_8; ++j) - { - ((uint8_t *)ldata)[NUM_SIZE_8 - j - 1] - = (ctx->h[j >> 3] >> ((j & 7) << 3)) & 0xFF; - } + for (int i = 1; i < 16; ++i) ((ulong *)b)[i] = 0UL; + BlakeCompress((ulong *)h, (ulong *)b, t, 0xFFFFFFFFFFFFFFFFUL); - //====================================================================// - // Dump result to global memory -- BIG ENDIAN - //====================================================================// + //====================================================================// + // Dump result to global memory -- BIG ENDIAN + //====================================================================// #pragma unroll - for (int i = 0; i < NUM_SIZE_8; ++i) - { - ((uint8_t __global *)hashes)[(tid + 1) * NUM_SIZE_8 - i - 1] - = ((uint8_t *)ldata)[i]; - } - - - j = ((cl_ulong *)ldata)[3] < Q3 - || ((cl_ulong *)ldata)[3] == Q3 && ( - ((cl_ulong *)ldata)[2] < Q2 - || ((cl_ulong *)ldata)[2] == Q2 && ( - ((cl_ulong *)ldata)[1] < Q1 - || ((cl_ulong *)ldata)[1] == Q1 - && ((cl_ulong *)ldata)[0] < Q0 - ) - ); - - - - #pragma unroll - for (int i = 0; i < NUM_SIZE_8-1; ++i) - { - //((uint8_t global*)hashes)[(tid + 1) * NUM_SIZE_8 - i -1] = ((uint8_t *)ldata)[i]; - ((uint8_t global*)hashes)[tid * NUM_SIZE_8 +i ] = ((uint8_t *)ldata)[i]; - } - // drop - ((uint8_t global*)hashes)[tid * NUM_SIZE_8 +31 ] = 0; + for (int i = 0; i < 4; ++i) ((__global ulong *)hashes)[(tid + 1) * 4 - i - 1] = as_ulong(as_uchar8(h[i]).s76543210); + + ((__global uchar *)hashes)[tid * 32 + 31] = 0; } - return; + return; } diff --git a/win64/config.json b/win64/config.json deleted file mode 100755 index a64edc7..0000000 --- a/win64/config.json +++ /dev/null @@ -1 +0,0 @@ -{ "node": "http://88.198.13.202:9052" } diff --git a/win64/libcurl.dll b/win64/libcurl.dll new file mode 100644 index 0000000..9452a8f Binary files /dev/null and b/win64/libcurl.dll differ