diff --git a/ErgoOpenCL.vcxproj b/ErgoOpenCL.vcxproj
index f3a200c..96edff3 100755
--- a/ErgoOpenCL.vcxproj
+++ b/ErgoOpenCL.vcxproj
@@ -22,7 +22,7 @@
15.0
{19C0E2B9-63F8-4C92-B426-8D0F7E4248BC}
ErgoOpenCL
- 8.1
+ 10.0.17763.0
@@ -41,13 +41,13 @@
Application
true
- v140
+ v141
MultiByte
Application
false
- v140
+ v141
true
MultiByte
@@ -70,10 +70,10 @@
- C:\boost_1_55_0\boost_1_55_0;C:\OpenSSL-Win64\include;E:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\include;$(IncludePath)
+ C:\boost_1_55_0\boost_1_55_0;C:\OpenSSL-Win64\include;F:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\include;$(IncludePath)
- C:\boost_1_55_0\boost_1_55_0;C:\OpenSSL-Win64\include;E:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\include;$(IncludePath)
+ C:\boost_1_55_0\boost_1_55_0;C:\OpenSSL-Win64\include;F:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\include;$(IncludePath)
@@ -93,7 +93,7 @@
WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)
- C:\OpenSSL-Win64\lib;E:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\lib;C:\Program Files %28x86%29\AMD APP SDK\3.0\lib\x86_64;%(AdditionalLibraryDirectories)
+ C:\OpenSSL-Win64\lib;F:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\lib;C:\Program Files %28x86%29\AMD APP SDK\3.0\lib\x86_64;%(AdditionalLibraryDirectories)
OpenCL.lib;libcurl.lib;libeay32.lib;%(AdditionalDependencies)
@@ -126,7 +126,7 @@
true
true
OpenCL.lib;libcurl.lib;libeay32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
- C:\OpenSSL-Win64\lib;E:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\lib;C:\Program Files %28x86%29\AMD APP SDK\3.0\lib\x86_64;%(AdditionalLibraryDirectories)
+ C:\OpenSSL-Win64\lib;F:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\lib;C:\Program Files %28x86%29\AMD APP SDK\3.0\lib\x86_64;%(AdditionalLibraryDirectories)
diff --git a/MiningKernel.cl b/MiningKernel.cl
index b41ed47..2d14195 100755
--- a/MiningKernel.cl
+++ b/MiningKernel.cl
@@ -68,10 +68,10 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base,
((cl_ulong *)(aux))[14] = ~((cl_ulong *)(aux))[14];
- ((cl_ulong *)(aux))[16] = ((cl_ulong *)data)[0];
- ((cl_ulong *)(aux))[17] = ((cl_ulong *)data)[1];
- ((cl_ulong *)(aux))[18] = ((cl_ulong *)data)[2];
- ((cl_ulong *)(aux))[19] = ((cl_ulong *)data)[3];
+ ((cl_ulong *)(aux))[16] = ((global cl_ulong *)data)[0];
+ ((cl_ulong *)(aux))[17] = ((global cl_ulong *)data)[1];
+ ((cl_ulong *)(aux))[18] = ((global cl_ulong *)data)[2];
+ ((cl_ulong *)(aux))[19] = ((global cl_ulong *)data)[3];
((cl_ulong *)(aux))[20] = tmp;
((cl_ulong *)(aux))[21] = 0;
((cl_ulong *)(aux))[22] = 0;
@@ -114,10 +114,12 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base,
h3 = h2 % N_LEN;
//--------------------------read hash from lookup
+ cl_uint tmpL;
#pragma unroll 8
for (int i = 0; i < 8; ++i)
{
- reverseBytesInt(hashes[(h3 << 3) + i], r[7 - i]);
+ tmpL = hashes[(h3 << 3) + i];
+ reverseBytesInt(tmpL, r[7 - i]);
}
//------------------------------------------------------
@@ -136,7 +138,7 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base,
bT[j] = ((uint8_t *)r)[j + 1];
#pragma unroll
for (j = 31; j < 63; ++j)
- bT[j] = ((uint8_t *)data)[j - 31];
+ bT[j] = ((global uint8_t *)data)[j - 31];
#pragma unroll
for (j = 63; j < 71; ++j)
bT[j] = ((uint8_t *)&tmp)[j - 63];
diff --git a/PreHashKernel.cl b/PreHashKernel.cl
index e57b5bb..2ccd1a4 100755
--- a/PreHashKernel.cl
+++ b/PreHashKernel.cl
@@ -1,155 +1,174 @@
#include "OCLdecs.h"////problem with relative path
+
+__constant uint8_t blake2b_sigma[12][16] = {
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
+ { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
+ { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
+ { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
+ { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
+ { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
+ { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
+ { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
+ { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
+};
+
+#pragma OPENCL EXTENSION cl_amd_media_ops : enable
+#pragma OPENCL EXTENSION cl_amd_media_ops2 : enable
+
+ulong FAST_ROTL64_LO(const uint2 x, const uint y) { return(as_ulong(amd_bitalign(x, x.s10, 32 - y))); }
+ulong FAST_ROTL64_HI(const uint2 x, const uint y) { return(as_ulong(amd_bitalign(x.s10, x, 32 - (y - 32)))); }
+
+
+//#define rotr64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
+ulong rotr64(const ulong x2, const uint y)
+{
+ uint2 x = as_uint2(x2);
+ if (y < 32) return(as_ulong(amd_bitalign(x.s10, x, y)));
+ else return(as_ulong(amd_bitalign(x, x.s10, (y - 32))));
+}
+
+#define G(m, r, i, a, b, c, d) do { \
+ a += b + ((ulong *)m)[blake2b_sigma[r][i]]; \
+ d = rotr64(d ^ a, 32);\
+ c += d; \
+ b = rotr64(b ^ c, 24); \
+ a += b + ((ulong *)m)[blake2b_sigma[r][i + 1]]; \
+ d = rotr64(d ^ a, 16); \
+ c += d; \
+ b = rotr64(b ^ c, 63); \
+} while(0)
+
+
+#define BLAKE2B_RND(v, r, m) do { \
+ G(m, r, 0, v[ 0], v[ 4], v[ 8], v[12]); \
+ G(m, r, 2, v[ 1], v[ 5], v[ 9], v[13]); \
+ G(m, r, 4, v[ 2], v[ 6], v[10], v[14]); \
+ G(m, r, 6, v[ 3], v[ 7], v[11], v[15]); \
+ G(m, r, 8, v[ 0], v[ 5], v[10], v[15]); \
+ G(m, r, 10, v[ 1], v[ 6], v[11], v[12]); \
+ G(m, r, 12, v[ 2], v[ 7], v[ 8], v[13]); \
+ G(m, r, 14, v[ 3], v[ 4], v[ 9], v[14]); \
+} while(0)
+
+
+inline void BlakeCompress(ulong *h, const ulong *m, ulong t, ulong f)
+{
+ ulong v[16];
+
+ ((ulong8 *)v)[0] = ((ulong8 *)h)[0];
+ ((ulong8 *)v)[1] = (ulong8)(0x6A09E667F3BCC908UL, 0xBB67AE8584CAA73BUL, 0x3C6EF372FE94F82BUL, 0xA54FF53A5F1D36F1UL, 0x510E527FADE682D1UL, 0x9B05688C2B3E6C1FUL, 0x1F83D9ABFB41BD6BUL, 0x5BE0CD19137E2179UL);
+
+ v[12] ^= t;
+ v[14] ^= f;
+
+#pragma unroll
+ for (int rnd = 0; rnd < 12; ++rnd)
+ {
+ BLAKE2B_RND(v, rnd, m);
+ }
+
+ h[0] ^= v[0] ^ v[0 + 8];
+ h[1] ^= v[1] ^ v[1 + 8];
+ h[2] ^= v[2] ^ v[2 + 8];
+ h[3] ^= v[3] ^ v[3 + 8];
+ h[4] ^= v[4] ^ v[4 + 8];
+ h[5] ^= v[5] ^ v[5 + 8];
+ h[6] ^= v[6] ^ v[6 + 8];
+ h[7] ^= v[7] ^ v[7 + 8];
+}
+
+
////////////////////////////////////////////////////////////////////////////////
// First iteration of hashes precalculation
////////////////////////////////////////////////////////////////////////////////
__kernel void InitPrehash(
- // data: height
- const cl_uint h,
- // hashes
- global cl_uint * hashes
+ // data: height
+ const cl_uint h,
+ // hashes
+ global cl_uint * hashes
)
{
- cl_uint tid =get_local_id(0);
+ cl_uint tid = get_local_id(0);
- // shared memory
- cl_uint height = h;
+ // shared memory
+ cl_uint height = h;
+ tid = get_global_id(0);
+ if (tid < N_LEN)
+ {
+ ulong h[8];
+ ulong b[16];
+ ulong t = 0;
+ //====================================================================//
+ // Initialize context
+ //====================================================================//
+ ((ulong8 *)h)[0] = (ulong8)(0x6A09E667F3BCC908UL, 0xBB67AE8584CAA73BUL, 0x3C6EF372FE94F82BUL, 0xA54FF53A5F1D36F1UL, 0x510E527FADE682D1UL, 0x9B05688C2B3E6C1FUL, 0x1F83D9ABFB41BD6BUL, 0x5BE0CD19137E2179UL);
- tid = get_global_id(0);
+ h[0] ^= 0x01010020;
- if (tid < N_LEN)
- {
+ //====================================================================//
+ // Hash tid
+ //====================================================================//
- cl_uint j;
+ ((uint *)b)[0] = as_uint(as_uchar4(tid).s3210);
+ //====================================================================//
+ // Hash height
+ //====================================================================//
+ ((uint *)b)[1] = height;
- // local memory
- // 472 bytes
- cl_uint ldata[118];
+ //====================================================================//
+ // Hash constant message
+ //====================================================================//
- // 32 * 64 bits = 256 bytes
- cl_ulong * aux = (cl_ulong *)ldata;
- // (212 + 4) bytes
- ctx_t * ctx = (ctx_t *)(ldata + 64);
+ ulong ctr = 0;
+ for (int x = 1; x < 16; ++x, ++ctr)
+ {
+ ((ulong *)b)[x] = as_ulong(as_uchar8(ctr).s76543210);
+ }
- //====================================================================//
- // Initialize context
- //====================================================================//
- //memset(ctx->b, 0, BUF_SIZE_8);
- #pragma unroll
- for (int am = 0; am < BUF_SIZE_8; am++)
+#pragma unroll 1
+ for (int z = 0; z < 63; ++z)
+ {
+ t += 128;
+ BlakeCompress((ulong *)h, (ulong *)b, t, 0UL);
+
+#pragma unroll
+ for (int x = 0; x < 16; ++x, ++ctr)
{
- ctx->b[am] = 0;
+ ((ulong *)b)[x] = as_ulong(as_uchar8(ctr).s76543210);
}
- B2B_IV(ctx->h);
-
+ }
+ t += 128;
+ BlakeCompress((ulong *)h, (ulong *)b, t, 0UL);
- ctx->h[0] ^= 0x01010000 ^ NUM_SIZE_8;
- //memset(ctx->t, 0, 16);
- ctx->t[0] = 0;
- ctx->t[1] = 0;
- ctx->c = 0;
+ ((ulong *)b)[0] = as_ulong(as_uchar8(ctr).s76543210);
+ t += 8;
-
- //====================================================================//
- // Hash tid
- //====================================================================//
-#pragma unroll
- for (j = 0; ctx->c < BUF_SIZE_8 && j < INDEX_SIZE_8; ++j)
- {
- ctx->b[ctx->c++] = ((const uint8_t *)&tid)[INDEX_SIZE_8 - j - 1];
- }
-
- //====================================================================//
- // Hash height
- //====================================================================//
-#pragma unroll
- for (j = 0; ctx->c < BUF_SIZE_8 && j < HEIGHT_SIZE ; ++j)
- {
- ctx->b[ctx->c++] = ((const uint8_t *)&height)[j/*HEIGHT_SIZE - j - 1*/];
- }
-
- //====================================================================//
- // Hash constant message
- //====================================================================//
#pragma unroll
- for (j = 0; ctx->c < BUF_SIZE_8 && j < CONST_MES_SIZE_8; ++j)
- {
- ctx->b[ctx->c++]
- = (
- !((7 - (j & 7)) >> 1)
- * ((j >> 3) >> (((~(j & 7)) & 1) << 3))
- ) & 0xFF;
- }
-
-
- while (j < CONST_MES_SIZE_8)
- {
- HOST_B2B_H(ctx, aux);
-
- for ( ; ctx->c < BUF_SIZE_8 && j < CONST_MES_SIZE_8; ++j)
- {
- ctx->b[ctx->c++]
- = (
- !((7 - (j & 7)) >> 1)
- * ((j >> 3) >> (((~(j & 7)) & 1) << 3))
- ) & 0xFF;
- }
- }
-
-
- //====================================================================//
- // Finalize hash
- //====================================================================//
- HOST_B2B_H_LAST(ctx, aux);
-#pragma unroll
- for (j = 0; j < NUM_SIZE_8; ++j)
- {
- ((uint8_t *)ldata)[NUM_SIZE_8 - j - 1]
- = (ctx->h[j >> 3] >> ((j & 7) << 3)) & 0xFF;
- }
+ for (int i = 1; i < 16; ++i) ((ulong *)b)[i] = 0UL;
+ BlakeCompress((ulong *)h, (ulong *)b, t, 0xFFFFFFFFFFFFFFFFUL);
- //====================================================================//
- // Dump result to global memory -- BIG ENDIAN
- //====================================================================//
+ //====================================================================//
+ // Dump result to global memory -- BIG ENDIAN
+ //====================================================================//
#pragma unroll
- for (int i = 0; i < NUM_SIZE_8; ++i)
- {
- ((uint8_t __global *)hashes)[(tid + 1) * NUM_SIZE_8 - i - 1]
- = ((uint8_t *)ldata)[i];
- }
-
-
- j = ((cl_ulong *)ldata)[3] < Q3
- || ((cl_ulong *)ldata)[3] == Q3 && (
- ((cl_ulong *)ldata)[2] < Q2
- || ((cl_ulong *)ldata)[2] == Q2 && (
- ((cl_ulong *)ldata)[1] < Q1
- || ((cl_ulong *)ldata)[1] == Q1
- && ((cl_ulong *)ldata)[0] < Q0
- )
- );
-
-
-
- #pragma unroll
- for (int i = 0; i < NUM_SIZE_8-1; ++i)
- {
- //((uint8_t global*)hashes)[(tid + 1) * NUM_SIZE_8 - i -1] = ((uint8_t *)ldata)[i];
- ((uint8_t global*)hashes)[tid * NUM_SIZE_8 +i ] = ((uint8_t *)ldata)[i];
- }
- // drop
- ((uint8_t global*)hashes)[tid * NUM_SIZE_8 +31 ] = 0;
+ for (int i = 0; i < 4; ++i) ((__global ulong *)hashes)[(tid + 1) * 4 - i - 1] = as_ulong(as_uchar8(h[i]).s76543210);
+
+ ((__global uchar *)hashes)[tid * 32 + 31] = 0;
}
- return;
+ return;
}
diff --git a/Ubuntu/ErgoOpenCL b/Ubuntu/ErgoOpenCL
deleted file mode 100755
index beff66b..0000000
Binary files a/Ubuntu/ErgoOpenCL and /dev/null differ
diff --git a/Ubuntu/MiningKernel.cl b/Ubuntu/MiningKernel.cl
index b41ed47..2d14195 100755
--- a/Ubuntu/MiningKernel.cl
+++ b/Ubuntu/MiningKernel.cl
@@ -68,10 +68,10 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base,
((cl_ulong *)(aux))[14] = ~((cl_ulong *)(aux))[14];
- ((cl_ulong *)(aux))[16] = ((cl_ulong *)data)[0];
- ((cl_ulong *)(aux))[17] = ((cl_ulong *)data)[1];
- ((cl_ulong *)(aux))[18] = ((cl_ulong *)data)[2];
- ((cl_ulong *)(aux))[19] = ((cl_ulong *)data)[3];
+ ((cl_ulong *)(aux))[16] = ((global cl_ulong *)data)[0];
+ ((cl_ulong *)(aux))[17] = ((global cl_ulong *)data)[1];
+ ((cl_ulong *)(aux))[18] = ((global cl_ulong *)data)[2];
+ ((cl_ulong *)(aux))[19] = ((global cl_ulong *)data)[3];
((cl_ulong *)(aux))[20] = tmp;
((cl_ulong *)(aux))[21] = 0;
((cl_ulong *)(aux))[22] = 0;
@@ -114,10 +114,12 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base,
h3 = h2 % N_LEN;
//--------------------------read hash from lookup
+ cl_uint tmpL;
#pragma unroll 8
for (int i = 0; i < 8; ++i)
{
- reverseBytesInt(hashes[(h3 << 3) + i], r[7 - i]);
+ tmpL = hashes[(h3 << 3) + i];
+ reverseBytesInt(tmpL, r[7 - i]);
}
//------------------------------------------------------
@@ -136,7 +138,7 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base,
bT[j] = ((uint8_t *)r)[j + 1];
#pragma unroll
for (j = 31; j < 63; ++j)
- bT[j] = ((uint8_t *)data)[j - 31];
+ bT[j] = ((global uint8_t *)data)[j - 31];
#pragma unroll
for (j = 63; j < 71; ++j)
bT[j] = ((uint8_t *)&tmp)[j - 63];
diff --git a/Ubuntu/PreHashKernel.cl b/Ubuntu/PreHashKernel.cl
index e57b5bb..2ccd1a4 100755
--- a/Ubuntu/PreHashKernel.cl
+++ b/Ubuntu/PreHashKernel.cl
@@ -1,155 +1,174 @@
#include "OCLdecs.h"////problem with relative path
+
+__constant uint8_t blake2b_sigma[12][16] = {
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
+ { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
+ { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
+ { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
+ { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
+ { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
+ { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
+ { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
+ { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
+};
+
+#pragma OPENCL EXTENSION cl_amd_media_ops : enable
+#pragma OPENCL EXTENSION cl_amd_media_ops2 : enable
+
+ulong FAST_ROTL64_LO(const uint2 x, const uint y) { return(as_ulong(amd_bitalign(x, x.s10, 32 - y))); }
+ulong FAST_ROTL64_HI(const uint2 x, const uint y) { return(as_ulong(amd_bitalign(x.s10, x, 32 - (y - 32)))); }
+
+
+//#define rotr64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
+ulong rotr64(const ulong x2, const uint y)
+{
+ uint2 x = as_uint2(x2);
+ if (y < 32) return(as_ulong(amd_bitalign(x.s10, x, y)));
+ else return(as_ulong(amd_bitalign(x, x.s10, (y - 32))));
+}
+
+#define G(m, r, i, a, b, c, d) do { \
+ a += b + ((ulong *)m)[blake2b_sigma[r][i]]; \
+ d = rotr64(d ^ a, 32);\
+ c += d; \
+ b = rotr64(b ^ c, 24); \
+ a += b + ((ulong *)m)[blake2b_sigma[r][i + 1]]; \
+ d = rotr64(d ^ a, 16); \
+ c += d; \
+ b = rotr64(b ^ c, 63); \
+} while(0)
+
+
+#define BLAKE2B_RND(v, r, m) do { \
+ G(m, r, 0, v[ 0], v[ 4], v[ 8], v[12]); \
+ G(m, r, 2, v[ 1], v[ 5], v[ 9], v[13]); \
+ G(m, r, 4, v[ 2], v[ 6], v[10], v[14]); \
+ G(m, r, 6, v[ 3], v[ 7], v[11], v[15]); \
+ G(m, r, 8, v[ 0], v[ 5], v[10], v[15]); \
+ G(m, r, 10, v[ 1], v[ 6], v[11], v[12]); \
+ G(m, r, 12, v[ 2], v[ 7], v[ 8], v[13]); \
+ G(m, r, 14, v[ 3], v[ 4], v[ 9], v[14]); \
+} while(0)
+
+
+inline void BlakeCompress(ulong *h, const ulong *m, ulong t, ulong f)
+{
+ ulong v[16];
+
+ ((ulong8 *)v)[0] = ((ulong8 *)h)[0];
+ ((ulong8 *)v)[1] = (ulong8)(0x6A09E667F3BCC908UL, 0xBB67AE8584CAA73BUL, 0x3C6EF372FE94F82BUL, 0xA54FF53A5F1D36F1UL, 0x510E527FADE682D1UL, 0x9B05688C2B3E6C1FUL, 0x1F83D9ABFB41BD6BUL, 0x5BE0CD19137E2179UL);
+
+ v[12] ^= t;
+ v[14] ^= f;
+
+#pragma unroll
+ for (int rnd = 0; rnd < 12; ++rnd)
+ {
+ BLAKE2B_RND(v, rnd, m);
+ }
+
+ h[0] ^= v[0] ^ v[0 + 8];
+ h[1] ^= v[1] ^ v[1 + 8];
+ h[2] ^= v[2] ^ v[2 + 8];
+ h[3] ^= v[3] ^ v[3 + 8];
+ h[4] ^= v[4] ^ v[4 + 8];
+ h[5] ^= v[5] ^ v[5 + 8];
+ h[6] ^= v[6] ^ v[6 + 8];
+ h[7] ^= v[7] ^ v[7 + 8];
+}
+
+
////////////////////////////////////////////////////////////////////////////////
// First iteration of hashes precalculation
////////////////////////////////////////////////////////////////////////////////
__kernel void InitPrehash(
- // data: height
- const cl_uint h,
- // hashes
- global cl_uint * hashes
+ // data: height
+ const cl_uint h,
+ // hashes
+ global cl_uint * hashes
)
{
- cl_uint tid =get_local_id(0);
+ cl_uint tid = get_local_id(0);
- // shared memory
- cl_uint height = h;
+ // shared memory
+ cl_uint height = h;
+ tid = get_global_id(0);
+ if (tid < N_LEN)
+ {
+ ulong h[8];
+ ulong b[16];
+ ulong t = 0;
+ //====================================================================//
+ // Initialize context
+ //====================================================================//
+ ((ulong8 *)h)[0] = (ulong8)(0x6A09E667F3BCC908UL, 0xBB67AE8584CAA73BUL, 0x3C6EF372FE94F82BUL, 0xA54FF53A5F1D36F1UL, 0x510E527FADE682D1UL, 0x9B05688C2B3E6C1FUL, 0x1F83D9ABFB41BD6BUL, 0x5BE0CD19137E2179UL);
- tid = get_global_id(0);
+ h[0] ^= 0x01010020;
- if (tid < N_LEN)
- {
+ //====================================================================//
+ // Hash tid
+ //====================================================================//
- cl_uint j;
+ ((uint *)b)[0] = as_uint(as_uchar4(tid).s3210);
+ //====================================================================//
+ // Hash height
+ //====================================================================//
+ ((uint *)b)[1] = height;
- // local memory
- // 472 bytes
- cl_uint ldata[118];
+ //====================================================================//
+ // Hash constant message
+ //====================================================================//
- // 32 * 64 bits = 256 bytes
- cl_ulong * aux = (cl_ulong *)ldata;
- // (212 + 4) bytes
- ctx_t * ctx = (ctx_t *)(ldata + 64);
+ ulong ctr = 0;
+ for (int x = 1; x < 16; ++x, ++ctr)
+ {
+ ((ulong *)b)[x] = as_ulong(as_uchar8(ctr).s76543210);
+ }
- //====================================================================//
- // Initialize context
- //====================================================================//
- //memset(ctx->b, 0, BUF_SIZE_8);
- #pragma unroll
- for (int am = 0; am < BUF_SIZE_8; am++)
+#pragma unroll 1
+ for (int z = 0; z < 63; ++z)
+ {
+ t += 128;
+ BlakeCompress((ulong *)h, (ulong *)b, t, 0UL);
+
+#pragma unroll
+ for (int x = 0; x < 16; ++x, ++ctr)
{
- ctx->b[am] = 0;
+ ((ulong *)b)[x] = as_ulong(as_uchar8(ctr).s76543210);
}
- B2B_IV(ctx->h);
-
+ }
+ t += 128;
+ BlakeCompress((ulong *)h, (ulong *)b, t, 0UL);
- ctx->h[0] ^= 0x01010000 ^ NUM_SIZE_8;
- //memset(ctx->t, 0, 16);
- ctx->t[0] = 0;
- ctx->t[1] = 0;
- ctx->c = 0;
+ ((ulong *)b)[0] = as_ulong(as_uchar8(ctr).s76543210);
+ t += 8;
-
- //====================================================================//
- // Hash tid
- //====================================================================//
-#pragma unroll
- for (j = 0; ctx->c < BUF_SIZE_8 && j < INDEX_SIZE_8; ++j)
- {
- ctx->b[ctx->c++] = ((const uint8_t *)&tid)[INDEX_SIZE_8 - j - 1];
- }
-
- //====================================================================//
- // Hash height
- //====================================================================//
-#pragma unroll
- for (j = 0; ctx->c < BUF_SIZE_8 && j < HEIGHT_SIZE ; ++j)
- {
- ctx->b[ctx->c++] = ((const uint8_t *)&height)[j/*HEIGHT_SIZE - j - 1*/];
- }
-
- //====================================================================//
- // Hash constant message
- //====================================================================//
#pragma unroll
- for (j = 0; ctx->c < BUF_SIZE_8 && j < CONST_MES_SIZE_8; ++j)
- {
- ctx->b[ctx->c++]
- = (
- !((7 - (j & 7)) >> 1)
- * ((j >> 3) >> (((~(j & 7)) & 1) << 3))
- ) & 0xFF;
- }
-
-
- while (j < CONST_MES_SIZE_8)
- {
- HOST_B2B_H(ctx, aux);
-
- for ( ; ctx->c < BUF_SIZE_8 && j < CONST_MES_SIZE_8; ++j)
- {
- ctx->b[ctx->c++]
- = (
- !((7 - (j & 7)) >> 1)
- * ((j >> 3) >> (((~(j & 7)) & 1) << 3))
- ) & 0xFF;
- }
- }
-
-
- //====================================================================//
- // Finalize hash
- //====================================================================//
- HOST_B2B_H_LAST(ctx, aux);
-#pragma unroll
- for (j = 0; j < NUM_SIZE_8; ++j)
- {
- ((uint8_t *)ldata)[NUM_SIZE_8 - j - 1]
- = (ctx->h[j >> 3] >> ((j & 7) << 3)) & 0xFF;
- }
+ for (int i = 1; i < 16; ++i) ((ulong *)b)[i] = 0UL;
+ BlakeCompress((ulong *)h, (ulong *)b, t, 0xFFFFFFFFFFFFFFFFUL);
- //====================================================================//
- // Dump result to global memory -- BIG ENDIAN
- //====================================================================//
+ //====================================================================//
+ // Dump result to global memory -- BIG ENDIAN
+ //====================================================================//
#pragma unroll
- for (int i = 0; i < NUM_SIZE_8; ++i)
- {
- ((uint8_t __global *)hashes)[(tid + 1) * NUM_SIZE_8 - i - 1]
- = ((uint8_t *)ldata)[i];
- }
-
-
- j = ((cl_ulong *)ldata)[3] < Q3
- || ((cl_ulong *)ldata)[3] == Q3 && (
- ((cl_ulong *)ldata)[2] < Q2
- || ((cl_ulong *)ldata)[2] == Q2 && (
- ((cl_ulong *)ldata)[1] < Q1
- || ((cl_ulong *)ldata)[1] == Q1
- && ((cl_ulong *)ldata)[0] < Q0
- )
- );
-
-
-
- #pragma unroll
- for (int i = 0; i < NUM_SIZE_8-1; ++i)
- {
- //((uint8_t global*)hashes)[(tid + 1) * NUM_SIZE_8 - i -1] = ((uint8_t *)ldata)[i];
- ((uint8_t global*)hashes)[tid * NUM_SIZE_8 +i ] = ((uint8_t *)ldata)[i];
- }
- // drop
- ((uint8_t global*)hashes)[tid * NUM_SIZE_8 +31 ] = 0;
+ for (int i = 0; i < 4; ++i) ((__global ulong *)hashes)[(tid + 1) * 4 - i - 1] = as_ulong(as_uchar8(h[i]).s76543210);
+
+ ((__global uchar *)hashes)[tid * 32 + 31] = 0;
}
- return;
+ return;
}
diff --git a/Ubuntu/config.json b/Ubuntu/config.json
deleted file mode 100755
index 79743c2..0000000
--- a/Ubuntu/config.json
+++ /dev/null
@@ -1 +0,0 @@
-{ "node": "http://37.156.20.158:3028" }
diff --git a/clMining.cpp b/clMining.cpp
index 8c88330..2c4ccbc 100755
--- a/clMining.cpp
+++ b/clMining.cpp
@@ -4,7 +4,7 @@ MiningClass::MiningClass(CLWarpper *cll)
{
cl = cll;
- const string buildOptions = " -cl-std=CL2 -I .";
+ const string buildOptions = " -w -I .";
program = cl->buildProgramFromFile("MiningKernel.cl", buildOptions);
int tr;
diff --git a/clPreHash.cpp b/clPreHash.cpp
index 7d1d5aa..1fa4532 100755
--- a/clPreHash.cpp
+++ b/clPreHash.cpp
@@ -3,7 +3,7 @@ PreHashClass::PreHashClass(CLWarpper *cll)
{
cl = cll;
- const string buildOptions = " -w -I .";
+ const string buildOptions = " -w -I .";
program = cl->buildProgramFromFile("PreHashKernel.cl", buildOptions);
}
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..2625f2f
--- /dev/null
+++ b/config.json
@@ -0,0 +1 @@
+{ "node": "http://127.0.0.1:3000" }
diff --git a/definitions.h b/definitions.h
index 97161e4..c065a5c 100755
--- a/definitions.h
+++ b/definitions.h
@@ -43,6 +43,7 @@ struct info_t
int keepPrehash;
char to[MAX_URL_SIZE];
char endJob[MAX_URL_SIZE];
+ bool doJob;
uint8_t Hblock[HEIGHT_SIZE];
char stratumMode;
diff --git a/ergoAutolykos.cpp b/ergoAutolykos.cpp
index b2818f1..60639eb 100755
--- a/ergoAutolykos.cpp
+++ b/ergoAutolykos.cpp
@@ -180,6 +180,7 @@ void ergoAutolykos::MinerThread(CLWarpper *clw, const int deviceId, const int t
cl_ulong base = 0;
cl_ulong EndNonce = 0;
cl_uint height = 0;
+
PreHashClass *ph = new PreHashClass(clw);
MiningClass *min = new MiningClass(clw);
@@ -224,6 +225,10 @@ void ergoAutolykos::MinerThread(CLWarpper *clw, const int deviceId, const int t
state = STATE_CONTINUE;
}
+ while (!info->doJob)
+ {
+ //LOG(INFO) << "GPU " << deviceId << " problem in proxy ";
+ }
uint_t controlId = info->blockId.load();
@@ -271,9 +276,18 @@ void ergoAutolykos::MinerThread(CLWarpper *clw, const int deviceId, const int t
ret = clw->CopyBuffer(data_d, hdata_d, (NUM_SIZE_8 ) * sizeof(char), false);
+ ch::milliseconds startP = ch::duration_cast(
+ ch::system_clock::now().time_since_epoch()
+ );
+
//LOG(INFO) << "Starting prehashing with new block data";
ph->Prehash(height, hashes_d);
+ ch::milliseconds ms = ch::milliseconds::zero();
+ ms = ch::duration_cast(
+ ch::system_clock::now().time_since_epoch()
+ ) - startP;
+ LOG(INFO) << "Prehash time: " << ms.count() << " ms";
//LOG(INFO) << "Starting InitMining";
//min->InitMining(&ctx_h, (cl_uint*)mes_h, NUM_SIZE_8);
diff --git a/request.cc b/request.cc
index 93b1546..72c0aab 100755
--- a/request.cc
+++ b/request.cc
@@ -85,7 +85,7 @@ void CurlLogError(CURLcode curl_status)
// moved to separate function for tests
///////////////////////////////////////////////////////////////////////////////
-int ParseRequest(json_t * oldreq, json_t * newreq, info_t *info, int checkPubKey)
+int ParseRequest(json_t * oldreq, json_t * newreq, info_t *info, int checkPubKey, long http_code)
{
//LOG(INFO) << "Current block candidate: " << newreq->ptr;
jsmn_parser parser;
@@ -154,12 +154,21 @@ int ParseRequest(json_t * oldreq, json_t * newreq, info_t *info, int checkPubKey
//(HPos == -1) ? info->AlgVer = 1 : info->AlgVer = 2;
if ( BoundPos < 0 || MesPos < 0 || HPos < 0 )
{
- LOG(ERROR) << "Some of expected fields not present in /block/candidate";
LOG(ERROR) << "Block data: " << newreq->ptr;
+ if (BoundPos < 0 && MesPos < 0 && HPos < 0 && http_code == 200)
+ {
+ LOG(ERROR) << "problem in proxy connection";
+ info->doJob = false;
+
+ }
+ else
+ {
+ LOG(ERROR) << "Some of expected fields not present in /block/candidate";
+ }
return EXIT_FAILURE;
}
-
+ info->doJob = true;
if (checkPubKey)
{
if (newreq->GetTokenLen(PkPos) != PK_SIZE_4)
@@ -416,6 +425,8 @@ int GetLatestBlock(
CurlLogError(curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 10L));
CurlLogError(curl_easy_setopt(curl, CURLOPT_TIMEOUT, 30L));
curlError = curl_easy_perform(curl);
+ long http_code = 0;
+ curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
CurlLogError(curlError);
curl_easy_cleanup(curl);
@@ -426,7 +437,7 @@ int GetLatestBlock(
{
int oldId = info->blockId.load();
- if (ParseRequest(oldreq, &newreq, info, checkPubKey) != EXIT_SUCCESS)
+ if (ParseRequest(oldreq, &newreq, info, checkPubKey,http_code) != EXIT_SUCCESS)
{
return EXIT_FAILURE;
}
diff --git a/request.h b/request.h
index de61a27..63691a6 100755
--- a/request.h
+++ b/request.h
@@ -32,7 +32,8 @@ int ParseRequest(
json_t * oldreq ,
json_t * newreq,
info_t *info,
- int checkPubKey
+ int checkPubKey,
+ long http_code
);
// CURL http GET request
diff --git a/win64/ErgoOpenCL.exe b/win64/ErgoOpenCL.exe
index dd3a935..690959b 100755
Binary files a/win64/ErgoOpenCL.exe and b/win64/ErgoOpenCL.exe differ
diff --git a/win64/MiningKernel.cl b/win64/MiningKernel.cl
index b41ed47..2d14195 100755
--- a/win64/MiningKernel.cl
+++ b/win64/MiningKernel.cl
@@ -68,10 +68,10 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base,
((cl_ulong *)(aux))[14] = ~((cl_ulong *)(aux))[14];
- ((cl_ulong *)(aux))[16] = ((cl_ulong *)data)[0];
- ((cl_ulong *)(aux))[17] = ((cl_ulong *)data)[1];
- ((cl_ulong *)(aux))[18] = ((cl_ulong *)data)[2];
- ((cl_ulong *)(aux))[19] = ((cl_ulong *)data)[3];
+ ((cl_ulong *)(aux))[16] = ((global cl_ulong *)data)[0];
+ ((cl_ulong *)(aux))[17] = ((global cl_ulong *)data)[1];
+ ((cl_ulong *)(aux))[18] = ((global cl_ulong *)data)[2];
+ ((cl_ulong *)(aux))[19] = ((global cl_ulong *)data)[3];
((cl_ulong *)(aux))[20] = tmp;
((cl_ulong *)(aux))[21] = 0;
((cl_ulong *)(aux))[22] = 0;
@@ -114,10 +114,12 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base,
h3 = h2 % N_LEN;
//--------------------------read hash from lookup
+ cl_uint tmpL;
#pragma unroll 8
for (int i = 0; i < 8; ++i)
{
- reverseBytesInt(hashes[(h3 << 3) + i], r[7 - i]);
+ tmpL = hashes[(h3 << 3) + i];
+ reverseBytesInt(tmpL, r[7 - i]);
}
//------------------------------------------------------
@@ -136,7 +138,7 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base,
bT[j] = ((uint8_t *)r)[j + 1];
#pragma unroll
for (j = 31; j < 63; ++j)
- bT[j] = ((uint8_t *)data)[j - 31];
+ bT[j] = ((global uint8_t *)data)[j - 31];
#pragma unroll
for (j = 63; j < 71; ++j)
bT[j] = ((uint8_t *)&tmp)[j - 63];
diff --git a/win64/PreHashKernel.cl b/win64/PreHashKernel.cl
index e57b5bb..2ccd1a4 100755
--- a/win64/PreHashKernel.cl
+++ b/win64/PreHashKernel.cl
@@ -1,155 +1,174 @@
#include "OCLdecs.h"////problem with relative path
+
+__constant uint8_t blake2b_sigma[12][16] = {
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
+ { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
+ { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
+ { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
+ { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
+ { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
+ { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
+ { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
+ { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
+};
+
+#pragma OPENCL EXTENSION cl_amd_media_ops : enable
+#pragma OPENCL EXTENSION cl_amd_media_ops2 : enable
+
+ulong FAST_ROTL64_LO(const uint2 x, const uint y) { return(as_ulong(amd_bitalign(x, x.s10, 32 - y))); }
+ulong FAST_ROTL64_HI(const uint2 x, const uint y) { return(as_ulong(amd_bitalign(x.s10, x, 32 - (y - 32)))); }
+
+
+//#define rotr64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
+ulong rotr64(const ulong x2, const uint y)
+{
+ uint2 x = as_uint2(x2);
+ if (y < 32) return(as_ulong(amd_bitalign(x.s10, x, y)));
+ else return(as_ulong(amd_bitalign(x, x.s10, (y - 32))));
+}
+
+#define G(m, r, i, a, b, c, d) do { \
+ a += b + ((ulong *)m)[blake2b_sigma[r][i]]; \
+ d = rotr64(d ^ a, 32);\
+ c += d; \
+ b = rotr64(b ^ c, 24); \
+ a += b + ((ulong *)m)[blake2b_sigma[r][i + 1]]; \
+ d = rotr64(d ^ a, 16); \
+ c += d; \
+ b = rotr64(b ^ c, 63); \
+} while(0)
+
+
+#define BLAKE2B_RND(v, r, m) do { \
+ G(m, r, 0, v[ 0], v[ 4], v[ 8], v[12]); \
+ G(m, r, 2, v[ 1], v[ 5], v[ 9], v[13]); \
+ G(m, r, 4, v[ 2], v[ 6], v[10], v[14]); \
+ G(m, r, 6, v[ 3], v[ 7], v[11], v[15]); \
+ G(m, r, 8, v[ 0], v[ 5], v[10], v[15]); \
+ G(m, r, 10, v[ 1], v[ 6], v[11], v[12]); \
+ G(m, r, 12, v[ 2], v[ 7], v[ 8], v[13]); \
+ G(m, r, 14, v[ 3], v[ 4], v[ 9], v[14]); \
+} while(0)
+
+
+inline void BlakeCompress(ulong *h, const ulong *m, ulong t, ulong f)
+{
+ ulong v[16];
+
+ ((ulong8 *)v)[0] = ((ulong8 *)h)[0];
+ ((ulong8 *)v)[1] = (ulong8)(0x6A09E667F3BCC908UL, 0xBB67AE8584CAA73BUL, 0x3C6EF372FE94F82BUL, 0xA54FF53A5F1D36F1UL, 0x510E527FADE682D1UL, 0x9B05688C2B3E6C1FUL, 0x1F83D9ABFB41BD6BUL, 0x5BE0CD19137E2179UL);
+
+ v[12] ^= t;
+ v[14] ^= f;
+
+#pragma unroll
+ for (int rnd = 0; rnd < 12; ++rnd)
+ {
+ BLAKE2B_RND(v, rnd, m);
+ }
+
+ h[0] ^= v[0] ^ v[0 + 8];
+ h[1] ^= v[1] ^ v[1 + 8];
+ h[2] ^= v[2] ^ v[2 + 8];
+ h[3] ^= v[3] ^ v[3 + 8];
+ h[4] ^= v[4] ^ v[4 + 8];
+ h[5] ^= v[5] ^ v[5 + 8];
+ h[6] ^= v[6] ^ v[6 + 8];
+ h[7] ^= v[7] ^ v[7 + 8];
+}
+
+
////////////////////////////////////////////////////////////////////////////////
// First iteration of hashes precalculation
////////////////////////////////////////////////////////////////////////////////
__kernel void InitPrehash(
- // data: height
- const cl_uint h,
- // hashes
- global cl_uint * hashes
+ // data: height
+ const cl_uint h,
+ // hashes
+ global cl_uint * hashes
)
{
- cl_uint tid =get_local_id(0);
+ cl_uint tid = get_local_id(0);
- // shared memory
- cl_uint height = h;
+ // shared memory
+ cl_uint height = h;
+ tid = get_global_id(0);
+ if (tid < N_LEN)
+ {
+ ulong h[8];
+ ulong b[16];
+ ulong t = 0;
+ //====================================================================//
+ // Initialize context
+ //====================================================================//
+ ((ulong8 *)h)[0] = (ulong8)(0x6A09E667F3BCC908UL, 0xBB67AE8584CAA73BUL, 0x3C6EF372FE94F82BUL, 0xA54FF53A5F1D36F1UL, 0x510E527FADE682D1UL, 0x9B05688C2B3E6C1FUL, 0x1F83D9ABFB41BD6BUL, 0x5BE0CD19137E2179UL);
- tid = get_global_id(0);
+ h[0] ^= 0x01010020;
- if (tid < N_LEN)
- {
+ //====================================================================//
+ // Hash tid
+ //====================================================================//
- cl_uint j;
+ ((uint *)b)[0] = as_uint(as_uchar4(tid).s3210);
+ //====================================================================//
+ // Hash height
+ //====================================================================//
+ ((uint *)b)[1] = height;
- // local memory
- // 472 bytes
- cl_uint ldata[118];
+ //====================================================================//
+ // Hash constant message
+ //====================================================================//
- // 32 * 64 bits = 256 bytes
- cl_ulong * aux = (cl_ulong *)ldata;
- // (212 + 4) bytes
- ctx_t * ctx = (ctx_t *)(ldata + 64);
+ ulong ctr = 0;
+ for (int x = 1; x < 16; ++x, ++ctr)
+ {
+ ((ulong *)b)[x] = as_ulong(as_uchar8(ctr).s76543210);
+ }
- //====================================================================//
- // Initialize context
- //====================================================================//
- //memset(ctx->b, 0, BUF_SIZE_8);
- #pragma unroll
- for (int am = 0; am < BUF_SIZE_8; am++)
+#pragma unroll 1
+ for (int z = 0; z < 63; ++z)
+ {
+ t += 128;
+ BlakeCompress((ulong *)h, (ulong *)b, t, 0UL);
+
+#pragma unroll
+ for (int x = 0; x < 16; ++x, ++ctr)
{
- ctx->b[am] = 0;
+ ((ulong *)b)[x] = as_ulong(as_uchar8(ctr).s76543210);
}
- B2B_IV(ctx->h);
-
+ }
+ t += 128;
+ BlakeCompress((ulong *)h, (ulong *)b, t, 0UL);
- ctx->h[0] ^= 0x01010000 ^ NUM_SIZE_8;
- //memset(ctx->t, 0, 16);
- ctx->t[0] = 0;
- ctx->t[1] = 0;
- ctx->c = 0;
+ ((ulong *)b)[0] = as_ulong(as_uchar8(ctr).s76543210);
+ t += 8;
-
- //====================================================================//
- // Hash tid
- //====================================================================//
-#pragma unroll
- for (j = 0; ctx->c < BUF_SIZE_8 && j < INDEX_SIZE_8; ++j)
- {
- ctx->b[ctx->c++] = ((const uint8_t *)&tid)[INDEX_SIZE_8 - j - 1];
- }
-
- //====================================================================//
- // Hash height
- //====================================================================//
-#pragma unroll
- for (j = 0; ctx->c < BUF_SIZE_8 && j < HEIGHT_SIZE ; ++j)
- {
- ctx->b[ctx->c++] = ((const uint8_t *)&height)[j/*HEIGHT_SIZE - j - 1*/];
- }
-
- //====================================================================//
- // Hash constant message
- //====================================================================//
#pragma unroll
- for (j = 0; ctx->c < BUF_SIZE_8 && j < CONST_MES_SIZE_8; ++j)
- {
- ctx->b[ctx->c++]
- = (
- !((7 - (j & 7)) >> 1)
- * ((j >> 3) >> (((~(j & 7)) & 1) << 3))
- ) & 0xFF;
- }
-
-
- while (j < CONST_MES_SIZE_8)
- {
- HOST_B2B_H(ctx, aux);
-
- for ( ; ctx->c < BUF_SIZE_8 && j < CONST_MES_SIZE_8; ++j)
- {
- ctx->b[ctx->c++]
- = (
- !((7 - (j & 7)) >> 1)
- * ((j >> 3) >> (((~(j & 7)) & 1) << 3))
- ) & 0xFF;
- }
- }
-
-
- //====================================================================//
- // Finalize hash
- //====================================================================//
- HOST_B2B_H_LAST(ctx, aux);
-#pragma unroll
- for (j = 0; j < NUM_SIZE_8; ++j)
- {
- ((uint8_t *)ldata)[NUM_SIZE_8 - j - 1]
- = (ctx->h[j >> 3] >> ((j & 7) << 3)) & 0xFF;
- }
+ for (int i = 1; i < 16; ++i) ((ulong *)b)[i] = 0UL;
+ BlakeCompress((ulong *)h, (ulong *)b, t, 0xFFFFFFFFFFFFFFFFUL);
- //====================================================================//
- // Dump result to global memory -- BIG ENDIAN
- //====================================================================//
+ //====================================================================//
+ // Dump result to global memory -- BIG ENDIAN
+ //====================================================================//
#pragma unroll
- for (int i = 0; i < NUM_SIZE_8; ++i)
- {
- ((uint8_t __global *)hashes)[(tid + 1) * NUM_SIZE_8 - i - 1]
- = ((uint8_t *)ldata)[i];
- }
-
-
- j = ((cl_ulong *)ldata)[3] < Q3
- || ((cl_ulong *)ldata)[3] == Q3 && (
- ((cl_ulong *)ldata)[2] < Q2
- || ((cl_ulong *)ldata)[2] == Q2 && (
- ((cl_ulong *)ldata)[1] < Q1
- || ((cl_ulong *)ldata)[1] == Q1
- && ((cl_ulong *)ldata)[0] < Q0
- )
- );
-
-
-
- #pragma unroll
- for (int i = 0; i < NUM_SIZE_8-1; ++i)
- {
- //((uint8_t global*)hashes)[(tid + 1) * NUM_SIZE_8 - i -1] = ((uint8_t *)ldata)[i];
- ((uint8_t global*)hashes)[tid * NUM_SIZE_8 +i ] = ((uint8_t *)ldata)[i];
- }
- // drop
- ((uint8_t global*)hashes)[tid * NUM_SIZE_8 +31 ] = 0;
+ for (int i = 0; i < 4; ++i) ((__global ulong *)hashes)[(tid + 1) * 4 - i - 1] = as_ulong(as_uchar8(h[i]).s76543210);
+
+ ((__global uchar *)hashes)[tid * 32 + 31] = 0;
}
- return;
+ return;
}
diff --git a/win64/config.json b/win64/config.json
deleted file mode 100755
index a64edc7..0000000
--- a/win64/config.json
+++ /dev/null
@@ -1 +0,0 @@
-{ "node": "http://88.198.13.202:9052" }
diff --git a/win64/libcurl.dll b/win64/libcurl.dll
new file mode 100644
index 0000000..9452a8f
Binary files /dev/null and b/win64/libcurl.dll differ