From 19c9c9b814ccf3b1a14ff3f47238d327214a073f Mon Sep 17 00:00:00 2001 From: Ryo Date: Thu, 21 Feb 2019 23:40:04 +0300 Subject: [PATCH 1/9] Edit pools.txt comments Set default verbose to 4 users are used to seing hashrate by default. Set it to 30 sec. edit config.tpl typos --- xmrstak/backend/amd/config.tpl | 10 +++++----- xmrstak/config.tpl | 4 ++-- xmrstak/pools.tpl | 6 ++---- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/xmrstak/backend/amd/config.tpl b/xmrstak/backend/amd/config.tpl index 26a8ef48a..d37c623d2 100644 --- a/xmrstak/backend/amd/config.tpl +++ b/xmrstak/backend/amd/config.tpl @@ -6,26 +6,26 @@ R"===(// generated by XMRSTAK_VERSION * intensity - Number of parallel GPU threads (nothing to do with CPU threads) * worksize - Number of local GPU threads (nothing to do with CPU threads) * affine_to_cpu - This will affine the thread to a CPU. This can make a GPU miner play along nicer with a CPU miner. - * strided_index - switch memory pattern used for the scratch pad memory + * strided_index - switch memory pattern used for the scratchpad memory * 3 = chunked memory, chunk size based on the 'worksize' * required: intensity must be a multiple of worksize * 2 = chunked memory, chunk size is controlled by 'mem_chunk' * required: intensity must be a multiple of worksize - * 1 or true = use 16byte contiguous memory per thread, the next memory block has offset of intensity blocks + * 1 or true = use 16 byte contiguous memory per thread, the next memory block has offset of intensity blocks * (for cryptonight_v8 and monero it is equal to strided_index = 0) * 0 or false = use a contiguous block of memory per thread * mem_chunk - range 0 to 18: set the number of elements (16byte) per chunk * this value is only used if 'strided_index' == 2 - * element count is computed with the equation: 2 to the power of 'mem_chunk' e.g. 4 means a chunk of 16 elements(256byte) + * element count is computed with the equation: 2 to the power of 'mem_chunk' e.g. 4 means a chunk of 16 elements(256 byte) * unroll - allow to control how often the POW main loop is unrolled; valid range [1;128) - for most OpenCL implementations it must be a power of two. * comp_mode - Compatibility enable/disable the automatic guard around compute kernel which allows - * to use a intensity which is not the multiple of the worksize. + * to use an intensity which is not the multiple of the worksize. * If you set false and the intensity is not multiple of the worksize the miner can crash: * in this case set the intensity to a multiple of the worksize or activate comp_mode. * interleave - Controls the starting point in time between two threads on the same GPU device relative to the last started thread. * This option has only an effect if two compute threads using the same GPU device: valid range [0;100] * 0 = disable thread interleaving - * 40 = each working thread waits until 40% of the hash calculation of the previous started thread is finished + * 40 = each working thread waits until 40% of the hash calculation of the previously started thread is finished * "gpu_threads_conf" : * [ * { "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false, diff --git a/xmrstak/config.tpl b/xmrstak/config.tpl index 73ae054c2..19ffc3541 100644 --- a/xmrstak/config.tpl +++ b/xmrstak/config.tpl @@ -33,7 +33,7 @@ R"===(// generated by XMRSTAK_VERSION * * print_motd - Display messages from your pool operator in the hashrate result. */ -"verbose_level" : 3, +"verbose_level" : 4, "print_motd" : true, /* @@ -42,7 +42,7 @@ R"===(// generated by XMRSTAK_VERSION * h_print_time - How often, in seconds, should we print a hashrate report if verbose_level is set to 4. * This option has no effect if verbose_level is not 4. */ -"h_print_time" : 60, +"h_print_time" : 30, /* * Manual hardware AES override diff --git a/xmrstak/pools.tpl b/xmrstak/pools.tpl index f8f1d7d6c..a5b754f55 100644 --- a/xmrstak/pools.tpl +++ b/xmrstak/pools.tpl @@ -1,7 +1,7 @@ R"===(// generated by XMRSTAK_VERSION /* - * pool_address - Pool address should be in the form "pool.supportxmr.com:3333". Only stratum pools are supported. + * pool_address - Pool address should be entered as "pool_address:port" (e.g "pool.ryo-currency.com:4444"). Only stratum pools are supported. * wallet_address - Your wallet, or pool login. * rig_id - Rig identifier for pool-side statistics (needs pool support). * pool_password - Can be empty in most cases or "x". @@ -10,8 +10,6 @@ R"===(// generated by XMRSTAK_VERSION * tls_fingerprint - Server's SHA256 fingerprint. If this string is non-empty then we will check the server's cert against it. * pool_weight - Pool weight is a number telling the miner how important the pool is. Miner will mine mostly at the pool * with the highest weight, unless the pool fails. Weight must be an integer larger than 0. - * - * We feature pools up to 1MH/s. For a more complete list see M5M400's pool list at www.moneropools.com */ "pool_list" : @@ -35,7 +33,7 @@ POOLCONF], * turtlecoin * plenteum * - * Native algorithms which not depends on any block versions: + * Native algorithms which do not depend on any block versions: * * # 256KiB scratchpad memory * cryptonight_turtle From 1335525835c6331dc8411d43652a76054b1339b7 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Mon, 4 Mar 2019 08:48:00 +0100 Subject: [PATCH 2/9] OpenCl: fix multi gpu support Since #2268 the multi gpu support is broken. It is not possible to have more than one opencl context in a process. Use one OpenCl context for the process instead per GPU. --- xmrstak/backend/amd/amd_gpu/gpu.cpp | 34 +++++++++++------------------ 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index a2cbe8f54..1b016e84f 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -774,7 +774,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) // Same as the platform index sanity check, except we must check all requested device indexes for(int i = 0; i < num_gpus; ++i) { - if(entries <= ctx[i].deviceIdx) + if(ctx[i].deviceIdx >= entries) { printer::inst()->print_msg(L1,"Selected OpenCL device index %lu doesn't exist.\n", ctx[i].deviceIdx); return ERR_STUPID_PARAMS; @@ -793,17 +793,22 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) } // Indexes sanity checked above -#ifdef __GNUC__ - cl_device_id TempDeviceList[num_gpus]; -#else - cl_device_id* TempDeviceList = (cl_device_id*)_alloca(entries * sizeof(cl_device_id)); -#endif + std::vector TempDeviceList(num_gpus, nullptr); + + printer::inst()->print_msg(LDEBUG, "Number of OpenCL GPUs %d", entries); for(int i = 0; i < num_gpus; ++i) { ctx[i].DeviceID = DeviceIDList[ctx[i].deviceIdx]; TempDeviceList[i] = DeviceIDList[ctx[i].deviceIdx]; } + cl_context opencl_ctx = clCreateContext(NULL, num_gpus, TempDeviceList.data(), NULL, NULL, &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateContext.", err_to_str(ret)); + return ERR_OCL_API; + } + const char *fastIntMathV2CL = #include "./opencl/fast_int_math_v2.cl" ; @@ -847,22 +852,9 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) std::vector> interleaveData(num_gpus, nullptr); - std::vector context_vec(entries, nullptr); - for(int i = 0; i < num_gpus; ++i) - { - if(context_vec[ctx[i].deviceIdx] == nullptr) - { - context_vec[ctx[i].deviceIdx] = clCreateContext(NULL, 1, &(ctx[i].DeviceID), NULL, NULL, &ret); - if(ret != CL_SUCCESS) - { - printer::inst()->print_msg(L1,"Error %s when calling clCreateContext.", err_to_str(ret)); - return ERR_OCL_API; - } - } - } - for(int i = 0; i < num_gpus; ++i) { + printer::inst()->print_msg(LDEBUG,"OpenCL Init device %d", ctx[i].deviceIdx); const size_t devIdx = ctx[i].deviceIdx; if(interleaveData.size() <= devIdx) { @@ -879,7 +871,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) ctx[i].interleaveData = interleaveData[devIdx]; ctx[i].interleaveData->adjustThreshold = static_cast(ctx[i].interleave)/100.0; ctx[i].interleaveData->startAdjustThreshold = ctx[i].interleaveData->adjustThreshold; - ctx[i].opencl_ctx = context_vec[ctx[i].deviceIdx]; + ctx[i].opencl_ctx = opencl_ctx; if((ret = InitOpenCLGpu(ctx->opencl_ctx, &ctx[i], source_code.c_str())) != ERR_SUCCESS) { From 70aea9c7cbacf087c73bda90e771334c69b17209 Mon Sep 17 00:00:00 2001 From: Tony Butler Date: Mon, 4 Mar 2019 11:09:52 -0700 Subject: [PATCH 3/9] CN_GPU: Fix old compilers and old kernels (Clang 3.5 tested, without CONFIG_HUGETLBFS) --- xmrstak/backend/cpu/crypto/cn_gpu_avx.cpp | 6 ++++++ xmrstak/backend/cpu/crypto/cryptonight_common.cpp | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/xmrstak/backend/cpu/crypto/cn_gpu_avx.cpp b/xmrstak/backend/cpu/crypto/cn_gpu_avx.cpp index 8b4aefe13..79b38373a 100644 --- a/xmrstak/backend/cpu/crypto/cn_gpu_avx.cpp +++ b/xmrstak/backend/cpu/crypto/cn_gpu_avx.cpp @@ -2,6 +2,12 @@ #include "../../cryptonight.hpp" #pragma GCC target ("avx2") +#ifndef _mm256_bslli_epi128 + #define _mm256_bslli_epi128(a, count) _mm256_slli_si256((a), (count)) +#endif +#ifndef _mm256_bsrli_epi128 + #define _mm256_bsrli_epi128(a, count) _mm256_srli_si256((a), (count)) +#endif inline void prep_dv_avx(__m256i* idx, __m256i& v, __m256& n01) { diff --git a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp index a065abe01..a9d1c96fd 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp +++ b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp @@ -261,6 +261,13 @@ cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, al #else ptr->long_state = (uint8_t*)mmap(NULL, hashMemSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0); + if (ptr->long_state == MAP_FAILED) + { + // try without MAP_HUGETLB for crappy kernels + msg->warning = "mmap with HUGETLB failed, attempting without it (you should fix your kernel)"; + ptr->long_state = (uint8_t*)mmap(NULL, hashMemSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, -1, 0); + } #endif if (ptr->long_state == MAP_FAILED) From 746cf7fe19c049ffd13b5f9007b487938388da6c Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Wed, 6 Mar 2019 21:58:14 +0100 Subject: [PATCH 4/9] fix wrong logic operator usage fix #2276 --- xmrstak/backend/cpu/crypto/cryptonight_aesni.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index 43f719873..f9cefed0c 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -798,7 +798,7 @@ inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var) ah0 += lo; \ al0 += hi; \ } \ - if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_r || ALGO != cryptonight_r_wow) \ + if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_r || ALGO == cryptonight_r_wow) \ { \ bx1 = bx0; \ bx0 = cx; \ From 03a0ca278a43b737486f1d3cb874a3dac10250c5 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Wed, 6 Mar 2019 21:50:43 +0100 Subject: [PATCH 5/9] assembly patching - add cryptonight_r assembly - fix cryptobight_v8 assembly --- CMakeLists.txt | 5 + xmrstak/backend/amd/OclCryptonightR_gen.cpp | 2 +- xmrstak/backend/amd/minethd.cpp | 9 +- .../backend/cpu/crypto/CryptonightR_gen.cpp | 107 ++ .../crypto/asm/cnR/CryptonightR_template.S | 1593 +++++++++++++++++ .../crypto/asm/cnR/CryptonightR_template.asm | 1583 ++++++++++++++++ .../crypto/asm/cnR/CryptonightR_template.h | 1063 +++++++++++ .../crypto/asm/cnR/CryptonightR_template.inc | 529 ++++++ .../asm/cnR/CryptonightR_template_win.inc | 529 ++++++ .../asm/cnR/CryptonightWOW_template.inc | 486 +++++ .../asm/cnR/CryptonightWOW_template_win.inc | 486 +++++ ..._v8_double_main_loop_sandybridge_linux.inc | 4 +- ..._v8_double_main_loop_sandybridge_win64.inc | 2 +- .../cpu/crypto/asm/cryptonight_v8_main_loop.S | 15 +- .../crypto/asm/cryptonight_v8_main_loop.asm | 11 +- ...yptonight_v8_main_loop_ivybridge_linux.inc | 2 +- ...yptonight_v8_main_loop_ivybridge_win64.inc | 2 +- .../cryptonight_v8_main_loop_ryzen_linux.inc | 2 +- .../cryptonight_v8_main_loop_ryzen_win64.inc | 2 +- xmrstak/backend/cpu/crypto/cryptonight.h | 26 +- .../backend/cpu/crypto/cryptonight_aesni.h | 206 ++- .../backend/cpu/crypto/variant4_random_math.h | 2 + xmrstak/backend/cpu/minethd.cpp | 229 +-- xmrstak/backend/cpu/minethd.hpp | 7 +- xmrstak/backend/cryptonight.hpp | 1 + xmrstak/backend/nvidia/minethd.cpp | 32 +- 26 files changed, 6760 insertions(+), 175 deletions(-) create mode 100644 xmrstak/backend/cpu/crypto/CryptonightR_gen.cpp create mode 100644 xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.S create mode 100644 xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.asm create mode 100644 xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.h create mode 100644 xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.inc create mode 100644 xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template_win.inc create mode 100644 xmrstak/backend/cpu/crypto/asm/cnR/CryptonightWOW_template.inc create mode 100644 xmrstak/backend/cpu/crypto/asm/cnR/CryptonightWOW_template_win.inc diff --git a/CMakeLists.txt b/CMakeLists.txt index a5c06df8a..004b5555c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -508,18 +508,23 @@ if(CMAKE_C_COMPILER_ID MATCHES "MSVC") # asm optimized monero v8 code enable_language(ASM_MASM) set_property(SOURCE "xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.asm" PROPERTY ASM_MASM) + set_property(SOURCE "xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.asm" PROPERTY ASM_MASM) add_library(xmr-stak-asm STATIC "xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.asm" + "xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.asm" ) else() # asm optimized monero v8 code enable_language(ASM) set_property(SOURCE "xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.S" PROPERTY CPP) + set_property(SOURCE "xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.S" PROPERTY CPP) set_source_files_properties("xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.S" PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp") + set_source_files_properties("xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.S" PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp") add_library(xmr-stak-asm STATIC "xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.S" + "xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.S" ) endif() diff --git a/xmrstak/backend/amd/OclCryptonightR_gen.cpp b/xmrstak/backend/amd/OclCryptonightR_gen.cpp index 4aabe51d0..13785d64b 100644 --- a/xmrstak/backend/amd/OclCryptonightR_gen.cpp +++ b/xmrstak/backend/amd/OclCryptonightR_gen.cpp @@ -282,7 +282,7 @@ cl_program CryptonightR_get_program(GpuContext* ctx, xmrstak_algo algo, uint64_t code_size = v4_random_math_init(code, height); break; default: - printer::inst()->print_msg(LDEBUG, "CryptonightR_get_program: invalid algo %d", algo); + printer::inst()->print_msg(L0, "CryptonightR_get_program: invalid algo %d", algo); return nullptr; } diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp index eb0009413..09e030e66 100644 --- a/xmrstak/backend/amd/minethd.cpp +++ b/xmrstak/backend/amd/minethd.cpp @@ -186,8 +186,7 @@ void minethd::work_main() cpu::minethd::cn_on_new_job set_job; - cn_hash_fun hash_fun; - cpu::minethd::func_multi_selector<1>(hash_fun, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); + cpu::minethd::func_multi_selector<1>(&cpu_ctx, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); uint8_t version = 0; size_t lastPoolId = 0; @@ -228,12 +227,12 @@ void minethd::work_main() if(new_version >= coinDesc.GetMiningForkVersion()) { miner_algo = coinDesc.GetMiningAlgo(); - cpu::minethd::func_multi_selector<1>(hash_fun, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); + cpu::minethd::func_multi_selector<1>(&cpu_ctx, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); } else { miner_algo = coinDesc.GetMiningAlgoRoot(); - cpu::minethd::func_multi_selector<1>(hash_fun, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); + cpu::minethd::func_multi_selector<1>(&cpu_ctx, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); } lastPoolId = oWork.iPoolId; version = new_version; @@ -282,7 +281,7 @@ void minethd::work_main() *(uint32_t*)(bWorkBlob + 39) = results[i]; - hash_fun(bWorkBlob, oWork.iWorkSize, bResult, &cpu_ctx, miner_algo); + cpu_ctx->hash_fn(bWorkBlob, oWork.iWorkSize, bResult, &cpu_ctx, miner_algo); if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget) executor::inst()->push_event(ex_event(job_result(oWork.sJobID, results[i], bResult, iThreadNo, miner_algo), oWork.iPoolId)); else diff --git a/xmrstak/backend/cpu/crypto/CryptonightR_gen.cpp b/xmrstak/backend/cpu/crypto/CryptonightR_gen.cpp new file mode 100644 index 000000000..a289ac559 --- /dev/null +++ b/xmrstak/backend/cpu/crypto/CryptonightR_gen.cpp @@ -0,0 +1,107 @@ +#include + +typedef void(*void_func)(); + +#include "xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.h" +#include "cryptonight_aesni.h" +#include "cryptonight.h" +#include "xmrstak/misc/console.hpp" + +static inline void add_code(uint8_t* &p, void (*p1)(), void (*p2)()) +{ + const ptrdiff_t size = reinterpret_cast(p2) - reinterpret_cast(p1); + if (size > 0) { + memcpy(p, reinterpret_cast(p1), size); + p += size; + } +} + +static inline void add_random_math(uint8_t* &p, const V4_Instruction* code, int code_size, const void_func* instructions, const void_func* instructions_mov, bool is_64_bit, int selected_asm) +{ + uint32_t prev_rot_src = (uint32_t)(-1); + + for (int i = 0;; ++i) { + const V4_Instruction inst = code[i]; + if (inst.opcode == RET) { + break; + } + + uint8_t opcode = (inst.opcode == MUL) ? inst.opcode : (inst.opcode + 2); + uint8_t dst_index = inst.dst_index; + uint8_t src_index = inst.src_index; + + const uint32_t a = inst.dst_index; + const uint32_t b = inst.src_index; + const uint8_t c = opcode | (dst_index << V4_OPCODE_BITS) | (((src_index == 8) ? dst_index : src_index) << (V4_OPCODE_BITS + V4_DST_INDEX_BITS)); + + switch (inst.opcode) { + case ROR: + case ROL: + if (b != prev_rot_src) { + prev_rot_src = b; + add_code(p, instructions_mov[c], instructions_mov[c + 1]); + } + break; + } + + if (a == prev_rot_src) { + prev_rot_src = (uint32_t)(-1); + } + + void_func begin = instructions[c]; + + // AMD == 2 + if ((selected_asm == 2) && (inst.opcode == MUL && !is_64_bit)) { + // AMD Bulldozer has latency 4 for 32-bit IMUL and 6 for 64-bit IMUL + // Always use 32-bit IMUL for AMD Bulldozer in 32-bit mode - skip prefix 0x48 and change 0x49 to 0x41 + uint8_t* prefix = reinterpret_cast(begin); + + if (*prefix == 0x49) { + *(p++) = 0x41; + } + + begin = reinterpret_cast(prefix + 1); + } + + add_code(p, begin, instructions[c + 1]); + + if (inst.opcode == ADD) { + *(uint32_t*)(p - sizeof(uint32_t) - (is_64_bit ? 3 : 0)) = inst.C; + if (is_64_bit) { + prev_rot_src = (uint32_t)(-1); + } + } + } +} + +void v4_compile_code(cryptonight_ctx* ctx, int code_size) +{ + printer::inst()->print_msg(LDEBUG, "CryptonightR update ASM code"); + const int allocation_size = 65536; + + if(ctx->fun_data == nullptr) + ctx->fun_data = static_cast(allocateExecutableMemory(allocation_size)); + else + unprotectExecutableMemory(ctx->fun_data, allocation_size); + + uint8_t* p0 = ctx->fun_data; + uint8_t* p = p0; + if(ctx->fun_data != nullptr) + { + + add_code(p, CryptonightR_template_part1, CryptonightR_template_part2); + add_random_math(p, ctx->cn_r_ctx.code, code_size, instructions, instructions_mov, false, ctx->asm_version); + add_code(p, CryptonightR_template_part2, CryptonightR_template_part3); + *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightR_template_mainloop) - ((const uint8_t*)CryptonightR_template_part1)) - (p - p0)); + add_code(p, CryptonightR_template_part3, CryptonightR_template_end); + + + ctx->loop_fn = reinterpret_cast(ctx->fun_data); + protectExecutableMemory(ctx->fun_data, allocation_size); + flushInstructionCache(ctx->fun_data, p - p0); + } + else + { + printer::inst()->print_msg(L0, "Error: CPU CryptonightR update ASM code ctx->fun_data is a nullptr"); + } +} diff --git a/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.S b/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.S new file mode 100644 index 000000000..5f3046cb9 --- /dev/null +++ b/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.S @@ -0,0 +1,1593 @@ +#ifdef __APPLE__ +# define ALIGN(x) .align 6 +#else +# define ALIGN(x) .align 64 +#endif +.intel_syntax noprefix +#ifdef __APPLE__ +# define FN_PREFIX(fn) _ ## fn +.text +#else +# define FN_PREFIX(fn) fn +.section .text +#endif + +#define PUBLIC .global + +PUBLIC FN_PREFIX(CryptonightR_instruction0) +PUBLIC FN_PREFIX(CryptonightR_instruction1) +PUBLIC FN_PREFIX(CryptonightR_instruction2) +PUBLIC FN_PREFIX(CryptonightR_instruction3) +PUBLIC FN_PREFIX(CryptonightR_instruction4) +PUBLIC FN_PREFIX(CryptonightR_instruction5) +PUBLIC FN_PREFIX(CryptonightR_instruction6) +PUBLIC FN_PREFIX(CryptonightR_instruction7) +PUBLIC FN_PREFIX(CryptonightR_instruction8) +PUBLIC FN_PREFIX(CryptonightR_instruction9) +PUBLIC FN_PREFIX(CryptonightR_instruction10) +PUBLIC FN_PREFIX(CryptonightR_instruction11) +PUBLIC FN_PREFIX(CryptonightR_instruction12) +PUBLIC FN_PREFIX(CryptonightR_instruction13) +PUBLIC FN_PREFIX(CryptonightR_instruction14) +PUBLIC FN_PREFIX(CryptonightR_instruction15) +PUBLIC FN_PREFIX(CryptonightR_instruction16) +PUBLIC FN_PREFIX(CryptonightR_instruction17) +PUBLIC FN_PREFIX(CryptonightR_instruction18) +PUBLIC FN_PREFIX(CryptonightR_instruction19) +PUBLIC FN_PREFIX(CryptonightR_instruction20) +PUBLIC FN_PREFIX(CryptonightR_instruction21) +PUBLIC FN_PREFIX(CryptonightR_instruction22) +PUBLIC FN_PREFIX(CryptonightR_instruction23) +PUBLIC FN_PREFIX(CryptonightR_instruction24) +PUBLIC FN_PREFIX(CryptonightR_instruction25) +PUBLIC FN_PREFIX(CryptonightR_instruction26) +PUBLIC FN_PREFIX(CryptonightR_instruction27) +PUBLIC FN_PREFIX(CryptonightR_instruction28) +PUBLIC FN_PREFIX(CryptonightR_instruction29) +PUBLIC FN_PREFIX(CryptonightR_instruction30) +PUBLIC FN_PREFIX(CryptonightR_instruction31) +PUBLIC FN_PREFIX(CryptonightR_instruction32) +PUBLIC FN_PREFIX(CryptonightR_instruction33) +PUBLIC FN_PREFIX(CryptonightR_instruction34) +PUBLIC FN_PREFIX(CryptonightR_instruction35) +PUBLIC FN_PREFIX(CryptonightR_instruction36) +PUBLIC FN_PREFIX(CryptonightR_instruction37) +PUBLIC FN_PREFIX(CryptonightR_instruction38) +PUBLIC FN_PREFIX(CryptonightR_instruction39) +PUBLIC FN_PREFIX(CryptonightR_instruction40) +PUBLIC FN_PREFIX(CryptonightR_instruction41) +PUBLIC FN_PREFIX(CryptonightR_instruction42) +PUBLIC FN_PREFIX(CryptonightR_instruction43) +PUBLIC FN_PREFIX(CryptonightR_instruction44) +PUBLIC FN_PREFIX(CryptonightR_instruction45) +PUBLIC FN_PREFIX(CryptonightR_instruction46) +PUBLIC FN_PREFIX(CryptonightR_instruction47) +PUBLIC FN_PREFIX(CryptonightR_instruction48) +PUBLIC FN_PREFIX(CryptonightR_instruction49) +PUBLIC FN_PREFIX(CryptonightR_instruction50) +PUBLIC FN_PREFIX(CryptonightR_instruction51) +PUBLIC FN_PREFIX(CryptonightR_instruction52) +PUBLIC FN_PREFIX(CryptonightR_instruction53) +PUBLIC FN_PREFIX(CryptonightR_instruction54) +PUBLIC FN_PREFIX(CryptonightR_instruction55) +PUBLIC FN_PREFIX(CryptonightR_instruction56) +PUBLIC FN_PREFIX(CryptonightR_instruction57) +PUBLIC FN_PREFIX(CryptonightR_instruction58) +PUBLIC FN_PREFIX(CryptonightR_instruction59) +PUBLIC FN_PREFIX(CryptonightR_instruction60) +PUBLIC FN_PREFIX(CryptonightR_instruction61) +PUBLIC FN_PREFIX(CryptonightR_instruction62) +PUBLIC FN_PREFIX(CryptonightR_instruction63) +PUBLIC FN_PREFIX(CryptonightR_instruction64) +PUBLIC FN_PREFIX(CryptonightR_instruction65) +PUBLIC FN_PREFIX(CryptonightR_instruction66) +PUBLIC FN_PREFIX(CryptonightR_instruction67) +PUBLIC FN_PREFIX(CryptonightR_instruction68) +PUBLIC FN_PREFIX(CryptonightR_instruction69) +PUBLIC FN_PREFIX(CryptonightR_instruction70) +PUBLIC FN_PREFIX(CryptonightR_instruction71) +PUBLIC FN_PREFIX(CryptonightR_instruction72) +PUBLIC FN_PREFIX(CryptonightR_instruction73) +PUBLIC FN_PREFIX(CryptonightR_instruction74) +PUBLIC FN_PREFIX(CryptonightR_instruction75) +PUBLIC FN_PREFIX(CryptonightR_instruction76) +PUBLIC FN_PREFIX(CryptonightR_instruction77) +PUBLIC FN_PREFIX(CryptonightR_instruction78) +PUBLIC FN_PREFIX(CryptonightR_instruction79) +PUBLIC FN_PREFIX(CryptonightR_instruction80) +PUBLIC FN_PREFIX(CryptonightR_instruction81) +PUBLIC FN_PREFIX(CryptonightR_instruction82) +PUBLIC FN_PREFIX(CryptonightR_instruction83) +PUBLIC FN_PREFIX(CryptonightR_instruction84) +PUBLIC FN_PREFIX(CryptonightR_instruction85) +PUBLIC FN_PREFIX(CryptonightR_instruction86) +PUBLIC FN_PREFIX(CryptonightR_instruction87) +PUBLIC FN_PREFIX(CryptonightR_instruction88) +PUBLIC FN_PREFIX(CryptonightR_instruction89) +PUBLIC FN_PREFIX(CryptonightR_instruction90) +PUBLIC FN_PREFIX(CryptonightR_instruction91) +PUBLIC FN_PREFIX(CryptonightR_instruction92) +PUBLIC FN_PREFIX(CryptonightR_instruction93) +PUBLIC FN_PREFIX(CryptonightR_instruction94) +PUBLIC FN_PREFIX(CryptonightR_instruction95) +PUBLIC FN_PREFIX(CryptonightR_instruction96) +PUBLIC FN_PREFIX(CryptonightR_instruction97) +PUBLIC FN_PREFIX(CryptonightR_instruction98) +PUBLIC FN_PREFIX(CryptonightR_instruction99) +PUBLIC FN_PREFIX(CryptonightR_instruction100) +PUBLIC FN_PREFIX(CryptonightR_instruction101) +PUBLIC FN_PREFIX(CryptonightR_instruction102) +PUBLIC FN_PREFIX(CryptonightR_instruction103) +PUBLIC FN_PREFIX(CryptonightR_instruction104) +PUBLIC FN_PREFIX(CryptonightR_instruction105) +PUBLIC FN_PREFIX(CryptonightR_instruction106) +PUBLIC FN_PREFIX(CryptonightR_instruction107) +PUBLIC FN_PREFIX(CryptonightR_instruction108) +PUBLIC FN_PREFIX(CryptonightR_instruction109) +PUBLIC FN_PREFIX(CryptonightR_instruction110) +PUBLIC FN_PREFIX(CryptonightR_instruction111) +PUBLIC FN_PREFIX(CryptonightR_instruction112) +PUBLIC FN_PREFIX(CryptonightR_instruction113) +PUBLIC FN_PREFIX(CryptonightR_instruction114) +PUBLIC FN_PREFIX(CryptonightR_instruction115) +PUBLIC FN_PREFIX(CryptonightR_instruction116) +PUBLIC FN_PREFIX(CryptonightR_instruction117) +PUBLIC FN_PREFIX(CryptonightR_instruction118) +PUBLIC FN_PREFIX(CryptonightR_instruction119) +PUBLIC FN_PREFIX(CryptonightR_instruction120) +PUBLIC FN_PREFIX(CryptonightR_instruction121) +PUBLIC FN_PREFIX(CryptonightR_instruction122) +PUBLIC FN_PREFIX(CryptonightR_instruction123) +PUBLIC FN_PREFIX(CryptonightR_instruction124) +PUBLIC FN_PREFIX(CryptonightR_instruction125) +PUBLIC FN_PREFIX(CryptonightR_instruction126) +PUBLIC FN_PREFIX(CryptonightR_instruction127) +PUBLIC FN_PREFIX(CryptonightR_instruction128) +PUBLIC FN_PREFIX(CryptonightR_instruction129) +PUBLIC FN_PREFIX(CryptonightR_instruction130) +PUBLIC FN_PREFIX(CryptonightR_instruction131) +PUBLIC FN_PREFIX(CryptonightR_instruction132) +PUBLIC FN_PREFIX(CryptonightR_instruction133) +PUBLIC FN_PREFIX(CryptonightR_instruction134) +PUBLIC FN_PREFIX(CryptonightR_instruction135) +PUBLIC FN_PREFIX(CryptonightR_instruction136) +PUBLIC FN_PREFIX(CryptonightR_instruction137) +PUBLIC FN_PREFIX(CryptonightR_instruction138) +PUBLIC FN_PREFIX(CryptonightR_instruction139) +PUBLIC FN_PREFIX(CryptonightR_instruction140) +PUBLIC FN_PREFIX(CryptonightR_instruction141) +PUBLIC FN_PREFIX(CryptonightR_instruction142) +PUBLIC FN_PREFIX(CryptonightR_instruction143) +PUBLIC FN_PREFIX(CryptonightR_instruction144) +PUBLIC FN_PREFIX(CryptonightR_instruction145) +PUBLIC FN_PREFIX(CryptonightR_instruction146) +PUBLIC FN_PREFIX(CryptonightR_instruction147) +PUBLIC FN_PREFIX(CryptonightR_instruction148) +PUBLIC FN_PREFIX(CryptonightR_instruction149) +PUBLIC FN_PREFIX(CryptonightR_instruction150) +PUBLIC FN_PREFIX(CryptonightR_instruction151) +PUBLIC FN_PREFIX(CryptonightR_instruction152) +PUBLIC FN_PREFIX(CryptonightR_instruction153) +PUBLIC FN_PREFIX(CryptonightR_instruction154) +PUBLIC FN_PREFIX(CryptonightR_instruction155) +PUBLIC FN_PREFIX(CryptonightR_instruction156) +PUBLIC FN_PREFIX(CryptonightR_instruction157) +PUBLIC FN_PREFIX(CryptonightR_instruction158) +PUBLIC FN_PREFIX(CryptonightR_instruction159) +PUBLIC FN_PREFIX(CryptonightR_instruction160) +PUBLIC FN_PREFIX(CryptonightR_instruction161) +PUBLIC FN_PREFIX(CryptonightR_instruction162) +PUBLIC FN_PREFIX(CryptonightR_instruction163) +PUBLIC FN_PREFIX(CryptonightR_instruction164) +PUBLIC FN_PREFIX(CryptonightR_instruction165) +PUBLIC FN_PREFIX(CryptonightR_instruction166) +PUBLIC FN_PREFIX(CryptonightR_instruction167) +PUBLIC FN_PREFIX(CryptonightR_instruction168) +PUBLIC FN_PREFIX(CryptonightR_instruction169) +PUBLIC FN_PREFIX(CryptonightR_instruction170) +PUBLIC FN_PREFIX(CryptonightR_instruction171) +PUBLIC FN_PREFIX(CryptonightR_instruction172) +PUBLIC FN_PREFIX(CryptonightR_instruction173) +PUBLIC FN_PREFIX(CryptonightR_instruction174) +PUBLIC FN_PREFIX(CryptonightR_instruction175) +PUBLIC FN_PREFIX(CryptonightR_instruction176) +PUBLIC FN_PREFIX(CryptonightR_instruction177) +PUBLIC FN_PREFIX(CryptonightR_instruction178) +PUBLIC FN_PREFIX(CryptonightR_instruction179) +PUBLIC FN_PREFIX(CryptonightR_instruction180) +PUBLIC FN_PREFIX(CryptonightR_instruction181) +PUBLIC FN_PREFIX(CryptonightR_instruction182) +PUBLIC FN_PREFIX(CryptonightR_instruction183) +PUBLIC FN_PREFIX(CryptonightR_instruction184) +PUBLIC FN_PREFIX(CryptonightR_instruction185) +PUBLIC FN_PREFIX(CryptonightR_instruction186) +PUBLIC FN_PREFIX(CryptonightR_instruction187) +PUBLIC FN_PREFIX(CryptonightR_instruction188) +PUBLIC FN_PREFIX(CryptonightR_instruction189) +PUBLIC FN_PREFIX(CryptonightR_instruction190) +PUBLIC FN_PREFIX(CryptonightR_instruction191) +PUBLIC FN_PREFIX(CryptonightR_instruction192) +PUBLIC FN_PREFIX(CryptonightR_instruction193) +PUBLIC FN_PREFIX(CryptonightR_instruction194) +PUBLIC FN_PREFIX(CryptonightR_instruction195) +PUBLIC FN_PREFIX(CryptonightR_instruction196) +PUBLIC FN_PREFIX(CryptonightR_instruction197) +PUBLIC FN_PREFIX(CryptonightR_instruction198) +PUBLIC FN_PREFIX(CryptonightR_instruction199) +PUBLIC FN_PREFIX(CryptonightR_instruction200) +PUBLIC FN_PREFIX(CryptonightR_instruction201) +PUBLIC FN_PREFIX(CryptonightR_instruction202) +PUBLIC FN_PREFIX(CryptonightR_instruction203) +PUBLIC FN_PREFIX(CryptonightR_instruction204) +PUBLIC FN_PREFIX(CryptonightR_instruction205) +PUBLIC FN_PREFIX(CryptonightR_instruction206) +PUBLIC FN_PREFIX(CryptonightR_instruction207) +PUBLIC FN_PREFIX(CryptonightR_instruction208) +PUBLIC FN_PREFIX(CryptonightR_instruction209) +PUBLIC FN_PREFIX(CryptonightR_instruction210) +PUBLIC FN_PREFIX(CryptonightR_instruction211) +PUBLIC FN_PREFIX(CryptonightR_instruction212) +PUBLIC FN_PREFIX(CryptonightR_instruction213) +PUBLIC FN_PREFIX(CryptonightR_instruction214) +PUBLIC FN_PREFIX(CryptonightR_instruction215) +PUBLIC FN_PREFIX(CryptonightR_instruction216) +PUBLIC FN_PREFIX(CryptonightR_instruction217) +PUBLIC FN_PREFIX(CryptonightR_instruction218) +PUBLIC FN_PREFIX(CryptonightR_instruction219) +PUBLIC FN_PREFIX(CryptonightR_instruction220) +PUBLIC FN_PREFIX(CryptonightR_instruction221) +PUBLIC FN_PREFIX(CryptonightR_instruction222) +PUBLIC FN_PREFIX(CryptonightR_instruction223) +PUBLIC FN_PREFIX(CryptonightR_instruction224) +PUBLIC FN_PREFIX(CryptonightR_instruction225) +PUBLIC FN_PREFIX(CryptonightR_instruction226) +PUBLIC FN_PREFIX(CryptonightR_instruction227) +PUBLIC FN_PREFIX(CryptonightR_instruction228) +PUBLIC FN_PREFIX(CryptonightR_instruction229) +PUBLIC FN_PREFIX(CryptonightR_instruction230) +PUBLIC FN_PREFIX(CryptonightR_instruction231) +PUBLIC FN_PREFIX(CryptonightR_instruction232) +PUBLIC FN_PREFIX(CryptonightR_instruction233) +PUBLIC FN_PREFIX(CryptonightR_instruction234) +PUBLIC FN_PREFIX(CryptonightR_instruction235) +PUBLIC FN_PREFIX(CryptonightR_instruction236) +PUBLIC FN_PREFIX(CryptonightR_instruction237) +PUBLIC FN_PREFIX(CryptonightR_instruction238) +PUBLIC FN_PREFIX(CryptonightR_instruction239) +PUBLIC FN_PREFIX(CryptonightR_instruction240) +PUBLIC FN_PREFIX(CryptonightR_instruction241) +PUBLIC FN_PREFIX(CryptonightR_instruction242) +PUBLIC FN_PREFIX(CryptonightR_instruction243) +PUBLIC FN_PREFIX(CryptonightR_instruction244) +PUBLIC FN_PREFIX(CryptonightR_instruction245) +PUBLIC FN_PREFIX(CryptonightR_instruction246) +PUBLIC FN_PREFIX(CryptonightR_instruction247) +PUBLIC FN_PREFIX(CryptonightR_instruction248) +PUBLIC FN_PREFIX(CryptonightR_instruction249) +PUBLIC FN_PREFIX(CryptonightR_instruction250) +PUBLIC FN_PREFIX(CryptonightR_instruction251) +PUBLIC FN_PREFIX(CryptonightR_instruction252) +PUBLIC FN_PREFIX(CryptonightR_instruction253) +PUBLIC FN_PREFIX(CryptonightR_instruction254) +PUBLIC FN_PREFIX(CryptonightR_instruction255) +PUBLIC FN_PREFIX(CryptonightR_instruction256) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov0) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov1) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov2) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov3) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov4) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov5) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov6) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov7) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov8) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov9) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov10) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov11) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov12) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov13) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov14) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov15) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov16) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov17) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov18) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov19) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov20) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov21) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov22) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov23) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov24) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov25) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov26) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov27) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov28) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov29) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov30) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov31) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov32) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov33) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov34) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov35) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov36) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov37) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov38) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov39) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov40) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov41) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov42) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov43) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov44) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov45) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov46) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov47) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov48) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov49) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov50) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov51) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov52) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov53) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov54) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov55) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov56) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov57) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov58) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov59) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov60) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov61) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov62) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov63) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov64) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov65) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov66) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov67) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov68) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov69) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov70) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov71) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov72) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov73) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov74) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov75) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov76) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov77) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov78) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov79) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov80) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov81) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov82) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov83) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov84) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov85) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov86) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov87) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov88) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov89) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov90) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov91) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov92) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov93) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov94) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov95) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov96) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov97) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov98) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov99) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov100) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov101) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov102) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov103) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov104) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov105) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov106) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov107) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov108) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov109) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov110) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov111) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov112) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov113) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov114) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov115) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov116) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov117) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov118) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov119) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov120) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov121) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov122) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov123) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov124) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov125) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov126) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov127) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov128) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov129) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov130) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov131) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov132) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov133) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov134) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov135) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov136) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov137) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov138) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov139) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov140) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov141) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov142) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov143) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov144) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov145) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov146) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov147) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov148) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov149) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov150) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov151) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov152) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov153) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov154) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov155) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov156) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov157) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov158) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov159) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov160) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov161) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov162) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov163) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov164) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov165) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov166) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov167) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov168) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov169) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov170) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov171) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov172) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov173) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov174) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov175) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov176) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov177) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov178) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov179) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov180) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov181) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov182) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov183) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov184) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov185) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov186) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov187) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov188) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov189) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov190) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov191) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov192) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov193) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov194) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov195) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov196) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov197) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov198) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov199) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov200) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov201) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov202) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov203) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov204) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov205) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov206) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov207) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov208) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov209) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov210) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov211) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov212) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov213) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov214) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov215) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov216) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov217) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov218) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov219) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov220) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov221) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov222) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov223) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov224) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov225) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov226) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov227) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov228) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov229) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov230) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov231) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov232) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov233) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov234) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov235) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov236) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov237) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov238) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov239) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov240) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov241) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov242) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov243) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov244) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov245) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov246) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov247) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov248) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov249) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov250) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov251) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov252) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov253) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov254) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov255) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov256) + +#include "CryptonightWOW_template.inc" +#include "CryptonightR_template.inc" + +FN_PREFIX(CryptonightR_instruction0): + imul rbx, rbx +FN_PREFIX(CryptonightR_instruction1): + imul rbx, rbx +FN_PREFIX(CryptonightR_instruction2): + imul rbx, rbx +FN_PREFIX(CryptonightR_instruction3): + add rbx, r9 + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction4): + sub rbx, r9 +FN_PREFIX(CryptonightR_instruction5): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction6): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction7): + xor rbx, r9 +FN_PREFIX(CryptonightR_instruction8): + imul rsi, rbx +FN_PREFIX(CryptonightR_instruction9): + imul rsi, rbx +FN_PREFIX(CryptonightR_instruction10): + imul rsi, rbx +FN_PREFIX(CryptonightR_instruction11): + add rsi, rbx + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction12): + sub rsi, rbx +FN_PREFIX(CryptonightR_instruction13): + ror esi, cl +FN_PREFIX(CryptonightR_instruction14): + rol esi, cl +FN_PREFIX(CryptonightR_instruction15): + xor rsi, rbx +FN_PREFIX(CryptonightR_instruction16): + imul rdi, rbx +FN_PREFIX(CryptonightR_instruction17): + imul rdi, rbx +FN_PREFIX(CryptonightR_instruction18): + imul rdi, rbx +FN_PREFIX(CryptonightR_instruction19): + add rdi, rbx + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction20): + sub rdi, rbx +FN_PREFIX(CryptonightR_instruction21): + ror edi, cl +FN_PREFIX(CryptonightR_instruction22): + rol edi, cl +FN_PREFIX(CryptonightR_instruction23): + xor rdi, rbx +FN_PREFIX(CryptonightR_instruction24): + imul rbp, rbx +FN_PREFIX(CryptonightR_instruction25): + imul rbp, rbx +FN_PREFIX(CryptonightR_instruction26): + imul rbp, rbx +FN_PREFIX(CryptonightR_instruction27): + add rbp, rbx + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction28): + sub rbp, rbx +FN_PREFIX(CryptonightR_instruction29): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction30): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction31): + xor rbp, rbx +FN_PREFIX(CryptonightR_instruction32): + imul rbx, rsi +FN_PREFIX(CryptonightR_instruction33): + imul rbx, rsi +FN_PREFIX(CryptonightR_instruction34): + imul rbx, rsi +FN_PREFIX(CryptonightR_instruction35): + add rbx, rsi + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction36): + sub rbx, rsi +FN_PREFIX(CryptonightR_instruction37): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction38): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction39): + xor rbx, rsi +FN_PREFIX(CryptonightR_instruction40): + imul rsi, rsi +FN_PREFIX(CryptonightR_instruction41): + imul rsi, rsi +FN_PREFIX(CryptonightR_instruction42): + imul rsi, rsi +FN_PREFIX(CryptonightR_instruction43): + add rsi, r9 + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction44): + sub rsi, r9 +FN_PREFIX(CryptonightR_instruction45): + ror esi, cl +FN_PREFIX(CryptonightR_instruction46): + rol esi, cl +FN_PREFIX(CryptonightR_instruction47): + xor rsi, r9 +FN_PREFIX(CryptonightR_instruction48): + imul rdi, rsi +FN_PREFIX(CryptonightR_instruction49): + imul rdi, rsi +FN_PREFIX(CryptonightR_instruction50): + imul rdi, rsi +FN_PREFIX(CryptonightR_instruction51): + add rdi, rsi + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction52): + sub rdi, rsi +FN_PREFIX(CryptonightR_instruction53): + ror edi, cl +FN_PREFIX(CryptonightR_instruction54): + rol edi, cl +FN_PREFIX(CryptonightR_instruction55): + xor rdi, rsi +FN_PREFIX(CryptonightR_instruction56): + imul rbp, rsi +FN_PREFIX(CryptonightR_instruction57): + imul rbp, rsi +FN_PREFIX(CryptonightR_instruction58): + imul rbp, rsi +FN_PREFIX(CryptonightR_instruction59): + add rbp, rsi + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction60): + sub rbp, rsi +FN_PREFIX(CryptonightR_instruction61): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction62): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction63): + xor rbp, rsi +FN_PREFIX(CryptonightR_instruction64): + imul rbx, rdi +FN_PREFIX(CryptonightR_instruction65): + imul rbx, rdi +FN_PREFIX(CryptonightR_instruction66): + imul rbx, rdi +FN_PREFIX(CryptonightR_instruction67): + add rbx, rdi + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction68): + sub rbx, rdi +FN_PREFIX(CryptonightR_instruction69): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction70): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction71): + xor rbx, rdi +FN_PREFIX(CryptonightR_instruction72): + imul rsi, rdi +FN_PREFIX(CryptonightR_instruction73): + imul rsi, rdi +FN_PREFIX(CryptonightR_instruction74): + imul rsi, rdi +FN_PREFIX(CryptonightR_instruction75): + add rsi, rdi + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction76): + sub rsi, rdi +FN_PREFIX(CryptonightR_instruction77): + ror esi, cl +FN_PREFIX(CryptonightR_instruction78): + rol esi, cl +FN_PREFIX(CryptonightR_instruction79): + xor rsi, rdi +FN_PREFIX(CryptonightR_instruction80): + imul rdi, rdi +FN_PREFIX(CryptonightR_instruction81): + imul rdi, rdi +FN_PREFIX(CryptonightR_instruction82): + imul rdi, rdi +FN_PREFIX(CryptonightR_instruction83): + add rdi, r9 + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction84): + sub rdi, r9 +FN_PREFIX(CryptonightR_instruction85): + ror edi, cl +FN_PREFIX(CryptonightR_instruction86): + rol edi, cl +FN_PREFIX(CryptonightR_instruction87): + xor rdi, r9 +FN_PREFIX(CryptonightR_instruction88): + imul rbp, rdi +FN_PREFIX(CryptonightR_instruction89): + imul rbp, rdi +FN_PREFIX(CryptonightR_instruction90): + imul rbp, rdi +FN_PREFIX(CryptonightR_instruction91): + add rbp, rdi + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction92): + sub rbp, rdi +FN_PREFIX(CryptonightR_instruction93): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction94): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction95): + xor rbp, rdi +FN_PREFIX(CryptonightR_instruction96): + imul rbx, rbp +FN_PREFIX(CryptonightR_instruction97): + imul rbx, rbp +FN_PREFIX(CryptonightR_instruction98): + imul rbx, rbp +FN_PREFIX(CryptonightR_instruction99): + add rbx, rbp + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction100): + sub rbx, rbp +FN_PREFIX(CryptonightR_instruction101): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction102): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction103): + xor rbx, rbp +FN_PREFIX(CryptonightR_instruction104): + imul rsi, rbp +FN_PREFIX(CryptonightR_instruction105): + imul rsi, rbp +FN_PREFIX(CryptonightR_instruction106): + imul rsi, rbp +FN_PREFIX(CryptonightR_instruction107): + add rsi, rbp + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction108): + sub rsi, rbp +FN_PREFIX(CryptonightR_instruction109): + ror esi, cl +FN_PREFIX(CryptonightR_instruction110): + rol esi, cl +FN_PREFIX(CryptonightR_instruction111): + xor rsi, rbp +FN_PREFIX(CryptonightR_instruction112): + imul rdi, rbp +FN_PREFIX(CryptonightR_instruction113): + imul rdi, rbp +FN_PREFIX(CryptonightR_instruction114): + imul rdi, rbp +FN_PREFIX(CryptonightR_instruction115): + add rdi, rbp + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction116): + sub rdi, rbp +FN_PREFIX(CryptonightR_instruction117): + ror edi, cl +FN_PREFIX(CryptonightR_instruction118): + rol edi, cl +FN_PREFIX(CryptonightR_instruction119): + xor rdi, rbp +FN_PREFIX(CryptonightR_instruction120): + imul rbp, rbp +FN_PREFIX(CryptonightR_instruction121): + imul rbp, rbp +FN_PREFIX(CryptonightR_instruction122): + imul rbp, rbp +FN_PREFIX(CryptonightR_instruction123): + add rbp, r9 + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction124): + sub rbp, r9 +FN_PREFIX(CryptonightR_instruction125): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction126): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction127): + xor rbp, r9 +FN_PREFIX(CryptonightR_instruction128): + imul rbx, rsp +FN_PREFIX(CryptonightR_instruction129): + imul rbx, rsp +FN_PREFIX(CryptonightR_instruction130): + imul rbx, rsp +FN_PREFIX(CryptonightR_instruction131): + add rbx, rsp + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction132): + sub rbx, rsp +FN_PREFIX(CryptonightR_instruction133): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction134): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction135): + xor rbx, rsp +FN_PREFIX(CryptonightR_instruction136): + imul rsi, rsp +FN_PREFIX(CryptonightR_instruction137): + imul rsi, rsp +FN_PREFIX(CryptonightR_instruction138): + imul rsi, rsp +FN_PREFIX(CryptonightR_instruction139): + add rsi, rsp + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction140): + sub rsi, rsp +FN_PREFIX(CryptonightR_instruction141): + ror esi, cl +FN_PREFIX(CryptonightR_instruction142): + rol esi, cl +FN_PREFIX(CryptonightR_instruction143): + xor rsi, rsp +FN_PREFIX(CryptonightR_instruction144): + imul rdi, rsp +FN_PREFIX(CryptonightR_instruction145): + imul rdi, rsp +FN_PREFIX(CryptonightR_instruction146): + imul rdi, rsp +FN_PREFIX(CryptonightR_instruction147): + add rdi, rsp + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction148): + sub rdi, rsp +FN_PREFIX(CryptonightR_instruction149): + ror edi, cl +FN_PREFIX(CryptonightR_instruction150): + rol edi, cl +FN_PREFIX(CryptonightR_instruction151): + xor rdi, rsp +FN_PREFIX(CryptonightR_instruction152): + imul rbp, rsp +FN_PREFIX(CryptonightR_instruction153): + imul rbp, rsp +FN_PREFIX(CryptonightR_instruction154): + imul rbp, rsp +FN_PREFIX(CryptonightR_instruction155): + add rbp, rsp + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction156): + sub rbp, rsp +FN_PREFIX(CryptonightR_instruction157): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction158): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction159): + xor rbp, rsp +FN_PREFIX(CryptonightR_instruction160): + imul rbx, r15 +FN_PREFIX(CryptonightR_instruction161): + imul rbx, r15 +FN_PREFIX(CryptonightR_instruction162): + imul rbx, r15 +FN_PREFIX(CryptonightR_instruction163): + add rbx, r15 + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction164): + sub rbx, r15 +FN_PREFIX(CryptonightR_instruction165): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction166): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction167): + xor rbx, r15 +FN_PREFIX(CryptonightR_instruction168): + imul rsi, r15 +FN_PREFIX(CryptonightR_instruction169): + imul rsi, r15 +FN_PREFIX(CryptonightR_instruction170): + imul rsi, r15 +FN_PREFIX(CryptonightR_instruction171): + add rsi, r15 + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction172): + sub rsi, r15 +FN_PREFIX(CryptonightR_instruction173): + ror esi, cl +FN_PREFIX(CryptonightR_instruction174): + rol esi, cl +FN_PREFIX(CryptonightR_instruction175): + xor rsi, r15 +FN_PREFIX(CryptonightR_instruction176): + imul rdi, r15 +FN_PREFIX(CryptonightR_instruction177): + imul rdi, r15 +FN_PREFIX(CryptonightR_instruction178): + imul rdi, r15 +FN_PREFIX(CryptonightR_instruction179): + add rdi, r15 + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction180): + sub rdi, r15 +FN_PREFIX(CryptonightR_instruction181): + ror edi, cl +FN_PREFIX(CryptonightR_instruction182): + rol edi, cl +FN_PREFIX(CryptonightR_instruction183): + xor rdi, r15 +FN_PREFIX(CryptonightR_instruction184): + imul rbp, r15 +FN_PREFIX(CryptonightR_instruction185): + imul rbp, r15 +FN_PREFIX(CryptonightR_instruction186): + imul rbp, r15 +FN_PREFIX(CryptonightR_instruction187): + add rbp, r15 + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction188): + sub rbp, r15 +FN_PREFIX(CryptonightR_instruction189): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction190): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction191): + xor rbp, r15 +FN_PREFIX(CryptonightR_instruction192): + imul rbx, rax +FN_PREFIX(CryptonightR_instruction193): + imul rbx, rax +FN_PREFIX(CryptonightR_instruction194): + imul rbx, rax +FN_PREFIX(CryptonightR_instruction195): + add rbx, rax + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction196): + sub rbx, rax +FN_PREFIX(CryptonightR_instruction197): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction198): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction199): + xor rbx, rax +FN_PREFIX(CryptonightR_instruction200): + imul rsi, rax +FN_PREFIX(CryptonightR_instruction201): + imul rsi, rax +FN_PREFIX(CryptonightR_instruction202): + imul rsi, rax +FN_PREFIX(CryptonightR_instruction203): + add rsi, rax + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction204): + sub rsi, rax +FN_PREFIX(CryptonightR_instruction205): + ror esi, cl +FN_PREFIX(CryptonightR_instruction206): + rol esi, cl +FN_PREFIX(CryptonightR_instruction207): + xor rsi, rax +FN_PREFIX(CryptonightR_instruction208): + imul rdi, rax +FN_PREFIX(CryptonightR_instruction209): + imul rdi, rax +FN_PREFIX(CryptonightR_instruction210): + imul rdi, rax +FN_PREFIX(CryptonightR_instruction211): + add rdi, rax + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction212): + sub rdi, rax +FN_PREFIX(CryptonightR_instruction213): + ror edi, cl +FN_PREFIX(CryptonightR_instruction214): + rol edi, cl +FN_PREFIX(CryptonightR_instruction215): + xor rdi, rax +FN_PREFIX(CryptonightR_instruction216): + imul rbp, rax +FN_PREFIX(CryptonightR_instruction217): + imul rbp, rax +FN_PREFIX(CryptonightR_instruction218): + imul rbp, rax +FN_PREFIX(CryptonightR_instruction219): + add rbp, rax + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction220): + sub rbp, rax +FN_PREFIX(CryptonightR_instruction221): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction222): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction223): + xor rbp, rax +FN_PREFIX(CryptonightR_instruction224): + imul rbx, rdx +FN_PREFIX(CryptonightR_instruction225): + imul rbx, rdx +FN_PREFIX(CryptonightR_instruction226): + imul rbx, rdx +FN_PREFIX(CryptonightR_instruction227): + add rbx, rdx + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction228): + sub rbx, rdx +FN_PREFIX(CryptonightR_instruction229): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction230): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction231): + xor rbx, rdx +FN_PREFIX(CryptonightR_instruction232): + imul rsi, rdx +FN_PREFIX(CryptonightR_instruction233): + imul rsi, rdx +FN_PREFIX(CryptonightR_instruction234): + imul rsi, rdx +FN_PREFIX(CryptonightR_instruction235): + add rsi, rdx + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction236): + sub rsi, rdx +FN_PREFIX(CryptonightR_instruction237): + ror esi, cl +FN_PREFIX(CryptonightR_instruction238): + rol esi, cl +FN_PREFIX(CryptonightR_instruction239): + xor rsi, rdx +FN_PREFIX(CryptonightR_instruction240): + imul rdi, rdx +FN_PREFIX(CryptonightR_instruction241): + imul rdi, rdx +FN_PREFIX(CryptonightR_instruction242): + imul rdi, rdx +FN_PREFIX(CryptonightR_instruction243): + add rdi, rdx + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction244): + sub rdi, rdx +FN_PREFIX(CryptonightR_instruction245): + ror edi, cl +FN_PREFIX(CryptonightR_instruction246): + rol edi, cl +FN_PREFIX(CryptonightR_instruction247): + xor rdi, rdx +FN_PREFIX(CryptonightR_instruction248): + imul rbp, rdx +FN_PREFIX(CryptonightR_instruction249): + imul rbp, rdx +FN_PREFIX(CryptonightR_instruction250): + imul rbp, rdx +FN_PREFIX(CryptonightR_instruction251): + add rbp, rdx + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction252): + sub rbp, rdx +FN_PREFIX(CryptonightR_instruction253): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction254): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction255): + xor rbp, rdx +FN_PREFIX(CryptonightR_instruction256): + imul rbx, rbx +FN_PREFIX(CryptonightR_instruction_mov0): + +FN_PREFIX(CryptonightR_instruction_mov1): + +FN_PREFIX(CryptonightR_instruction_mov2): + +FN_PREFIX(CryptonightR_instruction_mov3): + +FN_PREFIX(CryptonightR_instruction_mov4): + +FN_PREFIX(CryptonightR_instruction_mov5): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov6): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov7): + +FN_PREFIX(CryptonightR_instruction_mov8): + +FN_PREFIX(CryptonightR_instruction_mov9): + +FN_PREFIX(CryptonightR_instruction_mov10): + +FN_PREFIX(CryptonightR_instruction_mov11): + +FN_PREFIX(CryptonightR_instruction_mov12): + +FN_PREFIX(CryptonightR_instruction_mov13): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov14): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov15): + +FN_PREFIX(CryptonightR_instruction_mov16): + +FN_PREFIX(CryptonightR_instruction_mov17): + +FN_PREFIX(CryptonightR_instruction_mov18): + +FN_PREFIX(CryptonightR_instruction_mov19): + +FN_PREFIX(CryptonightR_instruction_mov20): + +FN_PREFIX(CryptonightR_instruction_mov21): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov22): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov23): + +FN_PREFIX(CryptonightR_instruction_mov24): + +FN_PREFIX(CryptonightR_instruction_mov25): + +FN_PREFIX(CryptonightR_instruction_mov26): + +FN_PREFIX(CryptonightR_instruction_mov27): + +FN_PREFIX(CryptonightR_instruction_mov28): + +FN_PREFIX(CryptonightR_instruction_mov29): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov30): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov31): + +FN_PREFIX(CryptonightR_instruction_mov32): + +FN_PREFIX(CryptonightR_instruction_mov33): + +FN_PREFIX(CryptonightR_instruction_mov34): + +FN_PREFIX(CryptonightR_instruction_mov35): + +FN_PREFIX(CryptonightR_instruction_mov36): + +FN_PREFIX(CryptonightR_instruction_mov37): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov38): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov39): + +FN_PREFIX(CryptonightR_instruction_mov40): + +FN_PREFIX(CryptonightR_instruction_mov41): + +FN_PREFIX(CryptonightR_instruction_mov42): + +FN_PREFIX(CryptonightR_instruction_mov43): + +FN_PREFIX(CryptonightR_instruction_mov44): + +FN_PREFIX(CryptonightR_instruction_mov45): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov46): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov47): + +FN_PREFIX(CryptonightR_instruction_mov48): + +FN_PREFIX(CryptonightR_instruction_mov49): + +FN_PREFIX(CryptonightR_instruction_mov50): + +FN_PREFIX(CryptonightR_instruction_mov51): + +FN_PREFIX(CryptonightR_instruction_mov52): + +FN_PREFIX(CryptonightR_instruction_mov53): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov54): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov55): + +FN_PREFIX(CryptonightR_instruction_mov56): + +FN_PREFIX(CryptonightR_instruction_mov57): + +FN_PREFIX(CryptonightR_instruction_mov58): + +FN_PREFIX(CryptonightR_instruction_mov59): + +FN_PREFIX(CryptonightR_instruction_mov60): + +FN_PREFIX(CryptonightR_instruction_mov61): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov62): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov63): + +FN_PREFIX(CryptonightR_instruction_mov64): + +FN_PREFIX(CryptonightR_instruction_mov65): + +FN_PREFIX(CryptonightR_instruction_mov66): + +FN_PREFIX(CryptonightR_instruction_mov67): + +FN_PREFIX(CryptonightR_instruction_mov68): + +FN_PREFIX(CryptonightR_instruction_mov69): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov70): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov71): + +FN_PREFIX(CryptonightR_instruction_mov72): + +FN_PREFIX(CryptonightR_instruction_mov73): + +FN_PREFIX(CryptonightR_instruction_mov74): + +FN_PREFIX(CryptonightR_instruction_mov75): + +FN_PREFIX(CryptonightR_instruction_mov76): + +FN_PREFIX(CryptonightR_instruction_mov77): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov78): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov79): + +FN_PREFIX(CryptonightR_instruction_mov80): + +FN_PREFIX(CryptonightR_instruction_mov81): + +FN_PREFIX(CryptonightR_instruction_mov82): + +FN_PREFIX(CryptonightR_instruction_mov83): + +FN_PREFIX(CryptonightR_instruction_mov84): + +FN_PREFIX(CryptonightR_instruction_mov85): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov86): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov87): + +FN_PREFIX(CryptonightR_instruction_mov88): + +FN_PREFIX(CryptonightR_instruction_mov89): + +FN_PREFIX(CryptonightR_instruction_mov90): + +FN_PREFIX(CryptonightR_instruction_mov91): + +FN_PREFIX(CryptonightR_instruction_mov92): + +FN_PREFIX(CryptonightR_instruction_mov93): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov94): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov95): + +FN_PREFIX(CryptonightR_instruction_mov96): + +FN_PREFIX(CryptonightR_instruction_mov97): + +FN_PREFIX(CryptonightR_instruction_mov98): + +FN_PREFIX(CryptonightR_instruction_mov99): + +FN_PREFIX(CryptonightR_instruction_mov100): + +FN_PREFIX(CryptonightR_instruction_mov101): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov102): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov103): + +FN_PREFIX(CryptonightR_instruction_mov104): + +FN_PREFIX(CryptonightR_instruction_mov105): + +FN_PREFIX(CryptonightR_instruction_mov106): + +FN_PREFIX(CryptonightR_instruction_mov107): + +FN_PREFIX(CryptonightR_instruction_mov108): + +FN_PREFIX(CryptonightR_instruction_mov109): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov110): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov111): + +FN_PREFIX(CryptonightR_instruction_mov112): + +FN_PREFIX(CryptonightR_instruction_mov113): + +FN_PREFIX(CryptonightR_instruction_mov114): + +FN_PREFIX(CryptonightR_instruction_mov115): + +FN_PREFIX(CryptonightR_instruction_mov116): + +FN_PREFIX(CryptonightR_instruction_mov117): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov118): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov119): + +FN_PREFIX(CryptonightR_instruction_mov120): + +FN_PREFIX(CryptonightR_instruction_mov121): + +FN_PREFIX(CryptonightR_instruction_mov122): + +FN_PREFIX(CryptonightR_instruction_mov123): + +FN_PREFIX(CryptonightR_instruction_mov124): + +FN_PREFIX(CryptonightR_instruction_mov125): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov126): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov127): + +FN_PREFIX(CryptonightR_instruction_mov128): + +FN_PREFIX(CryptonightR_instruction_mov129): + +FN_PREFIX(CryptonightR_instruction_mov130): + +FN_PREFIX(CryptonightR_instruction_mov131): + +FN_PREFIX(CryptonightR_instruction_mov132): + +FN_PREFIX(CryptonightR_instruction_mov133): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov134): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov135): + +FN_PREFIX(CryptonightR_instruction_mov136): + +FN_PREFIX(CryptonightR_instruction_mov137): + +FN_PREFIX(CryptonightR_instruction_mov138): + +FN_PREFIX(CryptonightR_instruction_mov139): + +FN_PREFIX(CryptonightR_instruction_mov140): + +FN_PREFIX(CryptonightR_instruction_mov141): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov142): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov143): + +FN_PREFIX(CryptonightR_instruction_mov144): + +FN_PREFIX(CryptonightR_instruction_mov145): + +FN_PREFIX(CryptonightR_instruction_mov146): + +FN_PREFIX(CryptonightR_instruction_mov147): + +FN_PREFIX(CryptonightR_instruction_mov148): + +FN_PREFIX(CryptonightR_instruction_mov149): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov150): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov151): + +FN_PREFIX(CryptonightR_instruction_mov152): + +FN_PREFIX(CryptonightR_instruction_mov153): + +FN_PREFIX(CryptonightR_instruction_mov154): + +FN_PREFIX(CryptonightR_instruction_mov155): + +FN_PREFIX(CryptonightR_instruction_mov156): + +FN_PREFIX(CryptonightR_instruction_mov157): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov158): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov159): + +FN_PREFIX(CryptonightR_instruction_mov160): + +FN_PREFIX(CryptonightR_instruction_mov161): + +FN_PREFIX(CryptonightR_instruction_mov162): + +FN_PREFIX(CryptonightR_instruction_mov163): + +FN_PREFIX(CryptonightR_instruction_mov164): + +FN_PREFIX(CryptonightR_instruction_mov165): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov166): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov167): + +FN_PREFIX(CryptonightR_instruction_mov168): + +FN_PREFIX(CryptonightR_instruction_mov169): + +FN_PREFIX(CryptonightR_instruction_mov170): + +FN_PREFIX(CryptonightR_instruction_mov171): + +FN_PREFIX(CryptonightR_instruction_mov172): + +FN_PREFIX(CryptonightR_instruction_mov173): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov174): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov175): + +FN_PREFIX(CryptonightR_instruction_mov176): + +FN_PREFIX(CryptonightR_instruction_mov177): + +FN_PREFIX(CryptonightR_instruction_mov178): + +FN_PREFIX(CryptonightR_instruction_mov179): + +FN_PREFIX(CryptonightR_instruction_mov180): + +FN_PREFIX(CryptonightR_instruction_mov181): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov182): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov183): + +FN_PREFIX(CryptonightR_instruction_mov184): + +FN_PREFIX(CryptonightR_instruction_mov185): + +FN_PREFIX(CryptonightR_instruction_mov186): + +FN_PREFIX(CryptonightR_instruction_mov187): + +FN_PREFIX(CryptonightR_instruction_mov188): + +FN_PREFIX(CryptonightR_instruction_mov189): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov190): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov191): + +FN_PREFIX(CryptonightR_instruction_mov192): + +FN_PREFIX(CryptonightR_instruction_mov193): + +FN_PREFIX(CryptonightR_instruction_mov194): + +FN_PREFIX(CryptonightR_instruction_mov195): + +FN_PREFIX(CryptonightR_instruction_mov196): + +FN_PREFIX(CryptonightR_instruction_mov197): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov198): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov199): + +FN_PREFIX(CryptonightR_instruction_mov200): + +FN_PREFIX(CryptonightR_instruction_mov201): + +FN_PREFIX(CryptonightR_instruction_mov202): + +FN_PREFIX(CryptonightR_instruction_mov203): + +FN_PREFIX(CryptonightR_instruction_mov204): + +FN_PREFIX(CryptonightR_instruction_mov205): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov206): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov207): + +FN_PREFIX(CryptonightR_instruction_mov208): + +FN_PREFIX(CryptonightR_instruction_mov209): + +FN_PREFIX(CryptonightR_instruction_mov210): + +FN_PREFIX(CryptonightR_instruction_mov211): + +FN_PREFIX(CryptonightR_instruction_mov212): + +FN_PREFIX(CryptonightR_instruction_mov213): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov214): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov215): + +FN_PREFIX(CryptonightR_instruction_mov216): + +FN_PREFIX(CryptonightR_instruction_mov217): + +FN_PREFIX(CryptonightR_instruction_mov218): + +FN_PREFIX(CryptonightR_instruction_mov219): + +FN_PREFIX(CryptonightR_instruction_mov220): + +FN_PREFIX(CryptonightR_instruction_mov221): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov222): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov223): + +FN_PREFIX(CryptonightR_instruction_mov224): + +FN_PREFIX(CryptonightR_instruction_mov225): + +FN_PREFIX(CryptonightR_instruction_mov226): + +FN_PREFIX(CryptonightR_instruction_mov227): + +FN_PREFIX(CryptonightR_instruction_mov228): + +FN_PREFIX(CryptonightR_instruction_mov229): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov230): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov231): + +FN_PREFIX(CryptonightR_instruction_mov232): + +FN_PREFIX(CryptonightR_instruction_mov233): + +FN_PREFIX(CryptonightR_instruction_mov234): + +FN_PREFIX(CryptonightR_instruction_mov235): + +FN_PREFIX(CryptonightR_instruction_mov236): + +FN_PREFIX(CryptonightR_instruction_mov237): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov238): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov239): + +FN_PREFIX(CryptonightR_instruction_mov240): + +FN_PREFIX(CryptonightR_instruction_mov241): + +FN_PREFIX(CryptonightR_instruction_mov242): + +FN_PREFIX(CryptonightR_instruction_mov243): + +FN_PREFIX(CryptonightR_instruction_mov244): + +FN_PREFIX(CryptonightR_instruction_mov245): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov246): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov247): + +FN_PREFIX(CryptonightR_instruction_mov248): + +FN_PREFIX(CryptonightR_instruction_mov249): + +FN_PREFIX(CryptonightR_instruction_mov250): + +FN_PREFIX(CryptonightR_instruction_mov251): + +FN_PREFIX(CryptonightR_instruction_mov252): + +FN_PREFIX(CryptonightR_instruction_mov253): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov254): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov255): + +FN_PREFIX(CryptonightR_instruction_mov256): diff --git a/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.asm b/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.asm new file mode 100644 index 000000000..25b72c3c0 --- /dev/null +++ b/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.asm @@ -0,0 +1,1583 @@ +; Auto-generated file, do not edit + +_TEXT_CN_TEMPLATE SEGMENT PAGE READ EXECUTE +PUBLIC CryptonightR_instruction0 +PUBLIC CryptonightR_instruction1 +PUBLIC CryptonightR_instruction2 +PUBLIC CryptonightR_instruction3 +PUBLIC CryptonightR_instruction4 +PUBLIC CryptonightR_instruction5 +PUBLIC CryptonightR_instruction6 +PUBLIC CryptonightR_instruction7 +PUBLIC CryptonightR_instruction8 +PUBLIC CryptonightR_instruction9 +PUBLIC CryptonightR_instruction10 +PUBLIC CryptonightR_instruction11 +PUBLIC CryptonightR_instruction12 +PUBLIC CryptonightR_instruction13 +PUBLIC CryptonightR_instruction14 +PUBLIC CryptonightR_instruction15 +PUBLIC CryptonightR_instruction16 +PUBLIC CryptonightR_instruction17 +PUBLIC CryptonightR_instruction18 +PUBLIC CryptonightR_instruction19 +PUBLIC CryptonightR_instruction20 +PUBLIC CryptonightR_instruction21 +PUBLIC CryptonightR_instruction22 +PUBLIC CryptonightR_instruction23 +PUBLIC CryptonightR_instruction24 +PUBLIC CryptonightR_instruction25 +PUBLIC CryptonightR_instruction26 +PUBLIC CryptonightR_instruction27 +PUBLIC CryptonightR_instruction28 +PUBLIC CryptonightR_instruction29 +PUBLIC CryptonightR_instruction30 +PUBLIC CryptonightR_instruction31 +PUBLIC CryptonightR_instruction32 +PUBLIC CryptonightR_instruction33 +PUBLIC CryptonightR_instruction34 +PUBLIC CryptonightR_instruction35 +PUBLIC CryptonightR_instruction36 +PUBLIC CryptonightR_instruction37 +PUBLIC CryptonightR_instruction38 +PUBLIC CryptonightR_instruction39 +PUBLIC CryptonightR_instruction40 +PUBLIC CryptonightR_instruction41 +PUBLIC CryptonightR_instruction42 +PUBLIC CryptonightR_instruction43 +PUBLIC CryptonightR_instruction44 +PUBLIC CryptonightR_instruction45 +PUBLIC CryptonightR_instruction46 +PUBLIC CryptonightR_instruction47 +PUBLIC CryptonightR_instruction48 +PUBLIC CryptonightR_instruction49 +PUBLIC CryptonightR_instruction50 +PUBLIC CryptonightR_instruction51 +PUBLIC CryptonightR_instruction52 +PUBLIC CryptonightR_instruction53 +PUBLIC CryptonightR_instruction54 +PUBLIC CryptonightR_instruction55 +PUBLIC CryptonightR_instruction56 +PUBLIC CryptonightR_instruction57 +PUBLIC CryptonightR_instruction58 +PUBLIC CryptonightR_instruction59 +PUBLIC CryptonightR_instruction60 +PUBLIC CryptonightR_instruction61 +PUBLIC CryptonightR_instruction62 +PUBLIC CryptonightR_instruction63 +PUBLIC CryptonightR_instruction64 +PUBLIC CryptonightR_instruction65 +PUBLIC CryptonightR_instruction66 +PUBLIC CryptonightR_instruction67 +PUBLIC CryptonightR_instruction68 +PUBLIC CryptonightR_instruction69 +PUBLIC CryptonightR_instruction70 +PUBLIC CryptonightR_instruction71 +PUBLIC CryptonightR_instruction72 +PUBLIC CryptonightR_instruction73 +PUBLIC CryptonightR_instruction74 +PUBLIC CryptonightR_instruction75 +PUBLIC CryptonightR_instruction76 +PUBLIC CryptonightR_instruction77 +PUBLIC CryptonightR_instruction78 +PUBLIC CryptonightR_instruction79 +PUBLIC CryptonightR_instruction80 +PUBLIC CryptonightR_instruction81 +PUBLIC CryptonightR_instruction82 +PUBLIC CryptonightR_instruction83 +PUBLIC CryptonightR_instruction84 +PUBLIC CryptonightR_instruction85 +PUBLIC CryptonightR_instruction86 +PUBLIC CryptonightR_instruction87 +PUBLIC CryptonightR_instruction88 +PUBLIC CryptonightR_instruction89 +PUBLIC CryptonightR_instruction90 +PUBLIC CryptonightR_instruction91 +PUBLIC CryptonightR_instruction92 +PUBLIC CryptonightR_instruction93 +PUBLIC CryptonightR_instruction94 +PUBLIC CryptonightR_instruction95 +PUBLIC CryptonightR_instruction96 +PUBLIC CryptonightR_instruction97 +PUBLIC CryptonightR_instruction98 +PUBLIC CryptonightR_instruction99 +PUBLIC CryptonightR_instruction100 +PUBLIC CryptonightR_instruction101 +PUBLIC CryptonightR_instruction102 +PUBLIC CryptonightR_instruction103 +PUBLIC CryptonightR_instruction104 +PUBLIC CryptonightR_instruction105 +PUBLIC CryptonightR_instruction106 +PUBLIC CryptonightR_instruction107 +PUBLIC CryptonightR_instruction108 +PUBLIC CryptonightR_instruction109 +PUBLIC CryptonightR_instruction110 +PUBLIC CryptonightR_instruction111 +PUBLIC CryptonightR_instruction112 +PUBLIC CryptonightR_instruction113 +PUBLIC CryptonightR_instruction114 +PUBLIC CryptonightR_instruction115 +PUBLIC CryptonightR_instruction116 +PUBLIC CryptonightR_instruction117 +PUBLIC CryptonightR_instruction118 +PUBLIC CryptonightR_instruction119 +PUBLIC CryptonightR_instruction120 +PUBLIC CryptonightR_instruction121 +PUBLIC CryptonightR_instruction122 +PUBLIC CryptonightR_instruction123 +PUBLIC CryptonightR_instruction124 +PUBLIC CryptonightR_instruction125 +PUBLIC CryptonightR_instruction126 +PUBLIC CryptonightR_instruction127 +PUBLIC CryptonightR_instruction128 +PUBLIC CryptonightR_instruction129 +PUBLIC CryptonightR_instruction130 +PUBLIC CryptonightR_instruction131 +PUBLIC CryptonightR_instruction132 +PUBLIC CryptonightR_instruction133 +PUBLIC CryptonightR_instruction134 +PUBLIC CryptonightR_instruction135 +PUBLIC CryptonightR_instruction136 +PUBLIC CryptonightR_instruction137 +PUBLIC CryptonightR_instruction138 +PUBLIC CryptonightR_instruction139 +PUBLIC CryptonightR_instruction140 +PUBLIC CryptonightR_instruction141 +PUBLIC CryptonightR_instruction142 +PUBLIC CryptonightR_instruction143 +PUBLIC CryptonightR_instruction144 +PUBLIC CryptonightR_instruction145 +PUBLIC CryptonightR_instruction146 +PUBLIC CryptonightR_instruction147 +PUBLIC CryptonightR_instruction148 +PUBLIC CryptonightR_instruction149 +PUBLIC CryptonightR_instruction150 +PUBLIC CryptonightR_instruction151 +PUBLIC CryptonightR_instruction152 +PUBLIC CryptonightR_instruction153 +PUBLIC CryptonightR_instruction154 +PUBLIC CryptonightR_instruction155 +PUBLIC CryptonightR_instruction156 +PUBLIC CryptonightR_instruction157 +PUBLIC CryptonightR_instruction158 +PUBLIC CryptonightR_instruction159 +PUBLIC CryptonightR_instruction160 +PUBLIC CryptonightR_instruction161 +PUBLIC CryptonightR_instruction162 +PUBLIC CryptonightR_instruction163 +PUBLIC CryptonightR_instruction164 +PUBLIC CryptonightR_instruction165 +PUBLIC CryptonightR_instruction166 +PUBLIC CryptonightR_instruction167 +PUBLIC CryptonightR_instruction168 +PUBLIC CryptonightR_instruction169 +PUBLIC CryptonightR_instruction170 +PUBLIC CryptonightR_instruction171 +PUBLIC CryptonightR_instruction172 +PUBLIC CryptonightR_instruction173 +PUBLIC CryptonightR_instruction174 +PUBLIC CryptonightR_instruction175 +PUBLIC CryptonightR_instruction176 +PUBLIC CryptonightR_instruction177 +PUBLIC CryptonightR_instruction178 +PUBLIC CryptonightR_instruction179 +PUBLIC CryptonightR_instruction180 +PUBLIC CryptonightR_instruction181 +PUBLIC CryptonightR_instruction182 +PUBLIC CryptonightR_instruction183 +PUBLIC CryptonightR_instruction184 +PUBLIC CryptonightR_instruction185 +PUBLIC CryptonightR_instruction186 +PUBLIC CryptonightR_instruction187 +PUBLIC CryptonightR_instruction188 +PUBLIC CryptonightR_instruction189 +PUBLIC CryptonightR_instruction190 +PUBLIC CryptonightR_instruction191 +PUBLIC CryptonightR_instruction192 +PUBLIC CryptonightR_instruction193 +PUBLIC CryptonightR_instruction194 +PUBLIC CryptonightR_instruction195 +PUBLIC CryptonightR_instruction196 +PUBLIC CryptonightR_instruction197 +PUBLIC CryptonightR_instruction198 +PUBLIC CryptonightR_instruction199 +PUBLIC CryptonightR_instruction200 +PUBLIC CryptonightR_instruction201 +PUBLIC CryptonightR_instruction202 +PUBLIC CryptonightR_instruction203 +PUBLIC CryptonightR_instruction204 +PUBLIC CryptonightR_instruction205 +PUBLIC CryptonightR_instruction206 +PUBLIC CryptonightR_instruction207 +PUBLIC CryptonightR_instruction208 +PUBLIC CryptonightR_instruction209 +PUBLIC CryptonightR_instruction210 +PUBLIC CryptonightR_instruction211 +PUBLIC CryptonightR_instruction212 +PUBLIC CryptonightR_instruction213 +PUBLIC CryptonightR_instruction214 +PUBLIC CryptonightR_instruction215 +PUBLIC CryptonightR_instruction216 +PUBLIC CryptonightR_instruction217 +PUBLIC CryptonightR_instruction218 +PUBLIC CryptonightR_instruction219 +PUBLIC CryptonightR_instruction220 +PUBLIC CryptonightR_instruction221 +PUBLIC CryptonightR_instruction222 +PUBLIC CryptonightR_instruction223 +PUBLIC CryptonightR_instruction224 +PUBLIC CryptonightR_instruction225 +PUBLIC CryptonightR_instruction226 +PUBLIC CryptonightR_instruction227 +PUBLIC CryptonightR_instruction228 +PUBLIC CryptonightR_instruction229 +PUBLIC CryptonightR_instruction230 +PUBLIC CryptonightR_instruction231 +PUBLIC CryptonightR_instruction232 +PUBLIC CryptonightR_instruction233 +PUBLIC CryptonightR_instruction234 +PUBLIC CryptonightR_instruction235 +PUBLIC CryptonightR_instruction236 +PUBLIC CryptonightR_instruction237 +PUBLIC CryptonightR_instruction238 +PUBLIC CryptonightR_instruction239 +PUBLIC CryptonightR_instruction240 +PUBLIC CryptonightR_instruction241 +PUBLIC CryptonightR_instruction242 +PUBLIC CryptonightR_instruction243 +PUBLIC CryptonightR_instruction244 +PUBLIC CryptonightR_instruction245 +PUBLIC CryptonightR_instruction246 +PUBLIC CryptonightR_instruction247 +PUBLIC CryptonightR_instruction248 +PUBLIC CryptonightR_instruction249 +PUBLIC CryptonightR_instruction250 +PUBLIC CryptonightR_instruction251 +PUBLIC CryptonightR_instruction252 +PUBLIC CryptonightR_instruction253 +PUBLIC CryptonightR_instruction254 +PUBLIC CryptonightR_instruction255 +PUBLIC CryptonightR_instruction256 +PUBLIC CryptonightR_instruction_mov0 +PUBLIC CryptonightR_instruction_mov1 +PUBLIC CryptonightR_instruction_mov2 +PUBLIC CryptonightR_instruction_mov3 +PUBLIC CryptonightR_instruction_mov4 +PUBLIC CryptonightR_instruction_mov5 +PUBLIC CryptonightR_instruction_mov6 +PUBLIC CryptonightR_instruction_mov7 +PUBLIC CryptonightR_instruction_mov8 +PUBLIC CryptonightR_instruction_mov9 +PUBLIC CryptonightR_instruction_mov10 +PUBLIC CryptonightR_instruction_mov11 +PUBLIC CryptonightR_instruction_mov12 +PUBLIC CryptonightR_instruction_mov13 +PUBLIC CryptonightR_instruction_mov14 +PUBLIC CryptonightR_instruction_mov15 +PUBLIC CryptonightR_instruction_mov16 +PUBLIC CryptonightR_instruction_mov17 +PUBLIC CryptonightR_instruction_mov18 +PUBLIC CryptonightR_instruction_mov19 +PUBLIC CryptonightR_instruction_mov20 +PUBLIC CryptonightR_instruction_mov21 +PUBLIC CryptonightR_instruction_mov22 +PUBLIC CryptonightR_instruction_mov23 +PUBLIC CryptonightR_instruction_mov24 +PUBLIC CryptonightR_instruction_mov25 +PUBLIC CryptonightR_instruction_mov26 +PUBLIC CryptonightR_instruction_mov27 +PUBLIC CryptonightR_instruction_mov28 +PUBLIC CryptonightR_instruction_mov29 +PUBLIC CryptonightR_instruction_mov30 +PUBLIC CryptonightR_instruction_mov31 +PUBLIC CryptonightR_instruction_mov32 +PUBLIC CryptonightR_instruction_mov33 +PUBLIC CryptonightR_instruction_mov34 +PUBLIC CryptonightR_instruction_mov35 +PUBLIC CryptonightR_instruction_mov36 +PUBLIC CryptonightR_instruction_mov37 +PUBLIC CryptonightR_instruction_mov38 +PUBLIC CryptonightR_instruction_mov39 +PUBLIC CryptonightR_instruction_mov40 +PUBLIC CryptonightR_instruction_mov41 +PUBLIC CryptonightR_instruction_mov42 +PUBLIC CryptonightR_instruction_mov43 +PUBLIC CryptonightR_instruction_mov44 +PUBLIC CryptonightR_instruction_mov45 +PUBLIC CryptonightR_instruction_mov46 +PUBLIC CryptonightR_instruction_mov47 +PUBLIC CryptonightR_instruction_mov48 +PUBLIC CryptonightR_instruction_mov49 +PUBLIC CryptonightR_instruction_mov50 +PUBLIC CryptonightR_instruction_mov51 +PUBLIC CryptonightR_instruction_mov52 +PUBLIC CryptonightR_instruction_mov53 +PUBLIC CryptonightR_instruction_mov54 +PUBLIC CryptonightR_instruction_mov55 +PUBLIC CryptonightR_instruction_mov56 +PUBLIC CryptonightR_instruction_mov57 +PUBLIC CryptonightR_instruction_mov58 +PUBLIC CryptonightR_instruction_mov59 +PUBLIC CryptonightR_instruction_mov60 +PUBLIC CryptonightR_instruction_mov61 +PUBLIC CryptonightR_instruction_mov62 +PUBLIC CryptonightR_instruction_mov63 +PUBLIC CryptonightR_instruction_mov64 +PUBLIC CryptonightR_instruction_mov65 +PUBLIC CryptonightR_instruction_mov66 +PUBLIC CryptonightR_instruction_mov67 +PUBLIC CryptonightR_instruction_mov68 +PUBLIC CryptonightR_instruction_mov69 +PUBLIC CryptonightR_instruction_mov70 +PUBLIC CryptonightR_instruction_mov71 +PUBLIC CryptonightR_instruction_mov72 +PUBLIC CryptonightR_instruction_mov73 +PUBLIC CryptonightR_instruction_mov74 +PUBLIC CryptonightR_instruction_mov75 +PUBLIC CryptonightR_instruction_mov76 +PUBLIC CryptonightR_instruction_mov77 +PUBLIC CryptonightR_instruction_mov78 +PUBLIC CryptonightR_instruction_mov79 +PUBLIC CryptonightR_instruction_mov80 +PUBLIC CryptonightR_instruction_mov81 +PUBLIC CryptonightR_instruction_mov82 +PUBLIC CryptonightR_instruction_mov83 +PUBLIC CryptonightR_instruction_mov84 +PUBLIC CryptonightR_instruction_mov85 +PUBLIC CryptonightR_instruction_mov86 +PUBLIC CryptonightR_instruction_mov87 +PUBLIC CryptonightR_instruction_mov88 +PUBLIC CryptonightR_instruction_mov89 +PUBLIC CryptonightR_instruction_mov90 +PUBLIC CryptonightR_instruction_mov91 +PUBLIC CryptonightR_instruction_mov92 +PUBLIC CryptonightR_instruction_mov93 +PUBLIC CryptonightR_instruction_mov94 +PUBLIC CryptonightR_instruction_mov95 +PUBLIC CryptonightR_instruction_mov96 +PUBLIC CryptonightR_instruction_mov97 +PUBLIC CryptonightR_instruction_mov98 +PUBLIC CryptonightR_instruction_mov99 +PUBLIC CryptonightR_instruction_mov100 +PUBLIC CryptonightR_instruction_mov101 +PUBLIC CryptonightR_instruction_mov102 +PUBLIC CryptonightR_instruction_mov103 +PUBLIC CryptonightR_instruction_mov104 +PUBLIC CryptonightR_instruction_mov105 +PUBLIC CryptonightR_instruction_mov106 +PUBLIC CryptonightR_instruction_mov107 +PUBLIC CryptonightR_instruction_mov108 +PUBLIC CryptonightR_instruction_mov109 +PUBLIC CryptonightR_instruction_mov110 +PUBLIC CryptonightR_instruction_mov111 +PUBLIC CryptonightR_instruction_mov112 +PUBLIC CryptonightR_instruction_mov113 +PUBLIC CryptonightR_instruction_mov114 +PUBLIC CryptonightR_instruction_mov115 +PUBLIC CryptonightR_instruction_mov116 +PUBLIC CryptonightR_instruction_mov117 +PUBLIC CryptonightR_instruction_mov118 +PUBLIC CryptonightR_instruction_mov119 +PUBLIC CryptonightR_instruction_mov120 +PUBLIC CryptonightR_instruction_mov121 +PUBLIC CryptonightR_instruction_mov122 +PUBLIC CryptonightR_instruction_mov123 +PUBLIC CryptonightR_instruction_mov124 +PUBLIC CryptonightR_instruction_mov125 +PUBLIC CryptonightR_instruction_mov126 +PUBLIC CryptonightR_instruction_mov127 +PUBLIC CryptonightR_instruction_mov128 +PUBLIC CryptonightR_instruction_mov129 +PUBLIC CryptonightR_instruction_mov130 +PUBLIC CryptonightR_instruction_mov131 +PUBLIC CryptonightR_instruction_mov132 +PUBLIC CryptonightR_instruction_mov133 +PUBLIC CryptonightR_instruction_mov134 +PUBLIC CryptonightR_instruction_mov135 +PUBLIC CryptonightR_instruction_mov136 +PUBLIC CryptonightR_instruction_mov137 +PUBLIC CryptonightR_instruction_mov138 +PUBLIC CryptonightR_instruction_mov139 +PUBLIC CryptonightR_instruction_mov140 +PUBLIC CryptonightR_instruction_mov141 +PUBLIC CryptonightR_instruction_mov142 +PUBLIC CryptonightR_instruction_mov143 +PUBLIC CryptonightR_instruction_mov144 +PUBLIC CryptonightR_instruction_mov145 +PUBLIC CryptonightR_instruction_mov146 +PUBLIC CryptonightR_instruction_mov147 +PUBLIC CryptonightR_instruction_mov148 +PUBLIC CryptonightR_instruction_mov149 +PUBLIC CryptonightR_instruction_mov150 +PUBLIC CryptonightR_instruction_mov151 +PUBLIC CryptonightR_instruction_mov152 +PUBLIC CryptonightR_instruction_mov153 +PUBLIC CryptonightR_instruction_mov154 +PUBLIC CryptonightR_instruction_mov155 +PUBLIC CryptonightR_instruction_mov156 +PUBLIC CryptonightR_instruction_mov157 +PUBLIC CryptonightR_instruction_mov158 +PUBLIC CryptonightR_instruction_mov159 +PUBLIC CryptonightR_instruction_mov160 +PUBLIC CryptonightR_instruction_mov161 +PUBLIC CryptonightR_instruction_mov162 +PUBLIC CryptonightR_instruction_mov163 +PUBLIC CryptonightR_instruction_mov164 +PUBLIC CryptonightR_instruction_mov165 +PUBLIC CryptonightR_instruction_mov166 +PUBLIC CryptonightR_instruction_mov167 +PUBLIC CryptonightR_instruction_mov168 +PUBLIC CryptonightR_instruction_mov169 +PUBLIC CryptonightR_instruction_mov170 +PUBLIC CryptonightR_instruction_mov171 +PUBLIC CryptonightR_instruction_mov172 +PUBLIC CryptonightR_instruction_mov173 +PUBLIC CryptonightR_instruction_mov174 +PUBLIC CryptonightR_instruction_mov175 +PUBLIC CryptonightR_instruction_mov176 +PUBLIC CryptonightR_instruction_mov177 +PUBLIC CryptonightR_instruction_mov178 +PUBLIC CryptonightR_instruction_mov179 +PUBLIC CryptonightR_instruction_mov180 +PUBLIC CryptonightR_instruction_mov181 +PUBLIC CryptonightR_instruction_mov182 +PUBLIC CryptonightR_instruction_mov183 +PUBLIC CryptonightR_instruction_mov184 +PUBLIC CryptonightR_instruction_mov185 +PUBLIC CryptonightR_instruction_mov186 +PUBLIC CryptonightR_instruction_mov187 +PUBLIC CryptonightR_instruction_mov188 +PUBLIC CryptonightR_instruction_mov189 +PUBLIC CryptonightR_instruction_mov190 +PUBLIC CryptonightR_instruction_mov191 +PUBLIC CryptonightR_instruction_mov192 +PUBLIC CryptonightR_instruction_mov193 +PUBLIC CryptonightR_instruction_mov194 +PUBLIC CryptonightR_instruction_mov195 +PUBLIC CryptonightR_instruction_mov196 +PUBLIC CryptonightR_instruction_mov197 +PUBLIC CryptonightR_instruction_mov198 +PUBLIC CryptonightR_instruction_mov199 +PUBLIC CryptonightR_instruction_mov200 +PUBLIC CryptonightR_instruction_mov201 +PUBLIC CryptonightR_instruction_mov202 +PUBLIC CryptonightR_instruction_mov203 +PUBLIC CryptonightR_instruction_mov204 +PUBLIC CryptonightR_instruction_mov205 +PUBLIC CryptonightR_instruction_mov206 +PUBLIC CryptonightR_instruction_mov207 +PUBLIC CryptonightR_instruction_mov208 +PUBLIC CryptonightR_instruction_mov209 +PUBLIC CryptonightR_instruction_mov210 +PUBLIC CryptonightR_instruction_mov211 +PUBLIC CryptonightR_instruction_mov212 +PUBLIC CryptonightR_instruction_mov213 +PUBLIC CryptonightR_instruction_mov214 +PUBLIC CryptonightR_instruction_mov215 +PUBLIC CryptonightR_instruction_mov216 +PUBLIC CryptonightR_instruction_mov217 +PUBLIC CryptonightR_instruction_mov218 +PUBLIC CryptonightR_instruction_mov219 +PUBLIC CryptonightR_instruction_mov220 +PUBLIC CryptonightR_instruction_mov221 +PUBLIC CryptonightR_instruction_mov222 +PUBLIC CryptonightR_instruction_mov223 +PUBLIC CryptonightR_instruction_mov224 +PUBLIC CryptonightR_instruction_mov225 +PUBLIC CryptonightR_instruction_mov226 +PUBLIC CryptonightR_instruction_mov227 +PUBLIC CryptonightR_instruction_mov228 +PUBLIC CryptonightR_instruction_mov229 +PUBLIC CryptonightR_instruction_mov230 +PUBLIC CryptonightR_instruction_mov231 +PUBLIC CryptonightR_instruction_mov232 +PUBLIC CryptonightR_instruction_mov233 +PUBLIC CryptonightR_instruction_mov234 +PUBLIC CryptonightR_instruction_mov235 +PUBLIC CryptonightR_instruction_mov236 +PUBLIC CryptonightR_instruction_mov237 +PUBLIC CryptonightR_instruction_mov238 +PUBLIC CryptonightR_instruction_mov239 +PUBLIC CryptonightR_instruction_mov240 +PUBLIC CryptonightR_instruction_mov241 +PUBLIC CryptonightR_instruction_mov242 +PUBLIC CryptonightR_instruction_mov243 +PUBLIC CryptonightR_instruction_mov244 +PUBLIC CryptonightR_instruction_mov245 +PUBLIC CryptonightR_instruction_mov246 +PUBLIC CryptonightR_instruction_mov247 +PUBLIC CryptonightR_instruction_mov248 +PUBLIC CryptonightR_instruction_mov249 +PUBLIC CryptonightR_instruction_mov250 +PUBLIC CryptonightR_instruction_mov251 +PUBLIC CryptonightR_instruction_mov252 +PUBLIC CryptonightR_instruction_mov253 +PUBLIC CryptonightR_instruction_mov254 +PUBLIC CryptonightR_instruction_mov255 +PUBLIC CryptonightR_instruction_mov256 + +INCLUDE CryptonightWOW_template_win.inc +INCLUDE CryptonightR_template_win.inc + +CryptonightR_instruction0: + imul rbx, rbx +CryptonightR_instruction1: + imul rbx, rbx +CryptonightR_instruction2: + imul rbx, rbx +CryptonightR_instruction3: + add rbx, r9 + add rbx, 2147483647 +CryptonightR_instruction4: + sub rbx, r9 +CryptonightR_instruction5: + ror ebx, cl +CryptonightR_instruction6: + rol ebx, cl +CryptonightR_instruction7: + xor rbx, r9 +CryptonightR_instruction8: + imul rsi, rbx +CryptonightR_instruction9: + imul rsi, rbx +CryptonightR_instruction10: + imul rsi, rbx +CryptonightR_instruction11: + add rsi, rbx + add rsi, 2147483647 +CryptonightR_instruction12: + sub rsi, rbx +CryptonightR_instruction13: + ror esi, cl +CryptonightR_instruction14: + rol esi, cl +CryptonightR_instruction15: + xor rsi, rbx +CryptonightR_instruction16: + imul rdi, rbx +CryptonightR_instruction17: + imul rdi, rbx +CryptonightR_instruction18: + imul rdi, rbx +CryptonightR_instruction19: + add rdi, rbx + add rdi, 2147483647 +CryptonightR_instruction20: + sub rdi, rbx +CryptonightR_instruction21: + ror edi, cl +CryptonightR_instruction22: + rol edi, cl +CryptonightR_instruction23: + xor rdi, rbx +CryptonightR_instruction24: + imul rbp, rbx +CryptonightR_instruction25: + imul rbp, rbx +CryptonightR_instruction26: + imul rbp, rbx +CryptonightR_instruction27: + add rbp, rbx + add rbp, 2147483647 +CryptonightR_instruction28: + sub rbp, rbx +CryptonightR_instruction29: + ror ebp, cl +CryptonightR_instruction30: + rol ebp, cl +CryptonightR_instruction31: + xor rbp, rbx +CryptonightR_instruction32: + imul rbx, rsi +CryptonightR_instruction33: + imul rbx, rsi +CryptonightR_instruction34: + imul rbx, rsi +CryptonightR_instruction35: + add rbx, rsi + add rbx, 2147483647 +CryptonightR_instruction36: + sub rbx, rsi +CryptonightR_instruction37: + ror ebx, cl +CryptonightR_instruction38: + rol ebx, cl +CryptonightR_instruction39: + xor rbx, rsi +CryptonightR_instruction40: + imul rsi, rsi +CryptonightR_instruction41: + imul rsi, rsi +CryptonightR_instruction42: + imul rsi, rsi +CryptonightR_instruction43: + add rsi, r9 + add rsi, 2147483647 +CryptonightR_instruction44: + sub rsi, r9 +CryptonightR_instruction45: + ror esi, cl +CryptonightR_instruction46: + rol esi, cl +CryptonightR_instruction47: + xor rsi, r9 +CryptonightR_instruction48: + imul rdi, rsi +CryptonightR_instruction49: + imul rdi, rsi +CryptonightR_instruction50: + imul rdi, rsi +CryptonightR_instruction51: + add rdi, rsi + add rdi, 2147483647 +CryptonightR_instruction52: + sub rdi, rsi +CryptonightR_instruction53: + ror edi, cl +CryptonightR_instruction54: + rol edi, cl +CryptonightR_instruction55: + xor rdi, rsi +CryptonightR_instruction56: + imul rbp, rsi +CryptonightR_instruction57: + imul rbp, rsi +CryptonightR_instruction58: + imul rbp, rsi +CryptonightR_instruction59: + add rbp, rsi + add rbp, 2147483647 +CryptonightR_instruction60: + sub rbp, rsi +CryptonightR_instruction61: + ror ebp, cl +CryptonightR_instruction62: + rol ebp, cl +CryptonightR_instruction63: + xor rbp, rsi +CryptonightR_instruction64: + imul rbx, rdi +CryptonightR_instruction65: + imul rbx, rdi +CryptonightR_instruction66: + imul rbx, rdi +CryptonightR_instruction67: + add rbx, rdi + add rbx, 2147483647 +CryptonightR_instruction68: + sub rbx, rdi +CryptonightR_instruction69: + ror ebx, cl +CryptonightR_instruction70: + rol ebx, cl +CryptonightR_instruction71: + xor rbx, rdi +CryptonightR_instruction72: + imul rsi, rdi +CryptonightR_instruction73: + imul rsi, rdi +CryptonightR_instruction74: + imul rsi, rdi +CryptonightR_instruction75: + add rsi, rdi + add rsi, 2147483647 +CryptonightR_instruction76: + sub rsi, rdi +CryptonightR_instruction77: + ror esi, cl +CryptonightR_instruction78: + rol esi, cl +CryptonightR_instruction79: + xor rsi, rdi +CryptonightR_instruction80: + imul rdi, rdi +CryptonightR_instruction81: + imul rdi, rdi +CryptonightR_instruction82: + imul rdi, rdi +CryptonightR_instruction83: + add rdi, r9 + add rdi, 2147483647 +CryptonightR_instruction84: + sub rdi, r9 +CryptonightR_instruction85: + ror edi, cl +CryptonightR_instruction86: + rol edi, cl +CryptonightR_instruction87: + xor rdi, r9 +CryptonightR_instruction88: + imul rbp, rdi +CryptonightR_instruction89: + imul rbp, rdi +CryptonightR_instruction90: + imul rbp, rdi +CryptonightR_instruction91: + add rbp, rdi + add rbp, 2147483647 +CryptonightR_instruction92: + sub rbp, rdi +CryptonightR_instruction93: + ror ebp, cl +CryptonightR_instruction94: + rol ebp, cl +CryptonightR_instruction95: + xor rbp, rdi +CryptonightR_instruction96: + imul rbx, rbp +CryptonightR_instruction97: + imul rbx, rbp +CryptonightR_instruction98: + imul rbx, rbp +CryptonightR_instruction99: + add rbx, rbp + add rbx, 2147483647 +CryptonightR_instruction100: + sub rbx, rbp +CryptonightR_instruction101: + ror ebx, cl +CryptonightR_instruction102: + rol ebx, cl +CryptonightR_instruction103: + xor rbx, rbp +CryptonightR_instruction104: + imul rsi, rbp +CryptonightR_instruction105: + imul rsi, rbp +CryptonightR_instruction106: + imul rsi, rbp +CryptonightR_instruction107: + add rsi, rbp + add rsi, 2147483647 +CryptonightR_instruction108: + sub rsi, rbp +CryptonightR_instruction109: + ror esi, cl +CryptonightR_instruction110: + rol esi, cl +CryptonightR_instruction111: + xor rsi, rbp +CryptonightR_instruction112: + imul rdi, rbp +CryptonightR_instruction113: + imul rdi, rbp +CryptonightR_instruction114: + imul rdi, rbp +CryptonightR_instruction115: + add rdi, rbp + add rdi, 2147483647 +CryptonightR_instruction116: + sub rdi, rbp +CryptonightR_instruction117: + ror edi, cl +CryptonightR_instruction118: + rol edi, cl +CryptonightR_instruction119: + xor rdi, rbp +CryptonightR_instruction120: + imul rbp, rbp +CryptonightR_instruction121: + imul rbp, rbp +CryptonightR_instruction122: + imul rbp, rbp +CryptonightR_instruction123: + add rbp, r9 + add rbp, 2147483647 +CryptonightR_instruction124: + sub rbp, r9 +CryptonightR_instruction125: + ror ebp, cl +CryptonightR_instruction126: + rol ebp, cl +CryptonightR_instruction127: + xor rbp, r9 +CryptonightR_instruction128: + imul rbx, rsp +CryptonightR_instruction129: + imul rbx, rsp +CryptonightR_instruction130: + imul rbx, rsp +CryptonightR_instruction131: + add rbx, rsp + add rbx, 2147483647 +CryptonightR_instruction132: + sub rbx, rsp +CryptonightR_instruction133: + ror ebx, cl +CryptonightR_instruction134: + rol ebx, cl +CryptonightR_instruction135: + xor rbx, rsp +CryptonightR_instruction136: + imul rsi, rsp +CryptonightR_instruction137: + imul rsi, rsp +CryptonightR_instruction138: + imul rsi, rsp +CryptonightR_instruction139: + add rsi, rsp + add rsi, 2147483647 +CryptonightR_instruction140: + sub rsi, rsp +CryptonightR_instruction141: + ror esi, cl +CryptonightR_instruction142: + rol esi, cl +CryptonightR_instruction143: + xor rsi, rsp +CryptonightR_instruction144: + imul rdi, rsp +CryptonightR_instruction145: + imul rdi, rsp +CryptonightR_instruction146: + imul rdi, rsp +CryptonightR_instruction147: + add rdi, rsp + add rdi, 2147483647 +CryptonightR_instruction148: + sub rdi, rsp +CryptonightR_instruction149: + ror edi, cl +CryptonightR_instruction150: + rol edi, cl +CryptonightR_instruction151: + xor rdi, rsp +CryptonightR_instruction152: + imul rbp, rsp +CryptonightR_instruction153: + imul rbp, rsp +CryptonightR_instruction154: + imul rbp, rsp +CryptonightR_instruction155: + add rbp, rsp + add rbp, 2147483647 +CryptonightR_instruction156: + sub rbp, rsp +CryptonightR_instruction157: + ror ebp, cl +CryptonightR_instruction158: + rol ebp, cl +CryptonightR_instruction159: + xor rbp, rsp +CryptonightR_instruction160: + imul rbx, r15 +CryptonightR_instruction161: + imul rbx, r15 +CryptonightR_instruction162: + imul rbx, r15 +CryptonightR_instruction163: + add rbx, r15 + add rbx, 2147483647 +CryptonightR_instruction164: + sub rbx, r15 +CryptonightR_instruction165: + ror ebx, cl +CryptonightR_instruction166: + rol ebx, cl +CryptonightR_instruction167: + xor rbx, r15 +CryptonightR_instruction168: + imul rsi, r15 +CryptonightR_instruction169: + imul rsi, r15 +CryptonightR_instruction170: + imul rsi, r15 +CryptonightR_instruction171: + add rsi, r15 + add rsi, 2147483647 +CryptonightR_instruction172: + sub rsi, r15 +CryptonightR_instruction173: + ror esi, cl +CryptonightR_instruction174: + rol esi, cl +CryptonightR_instruction175: + xor rsi, r15 +CryptonightR_instruction176: + imul rdi, r15 +CryptonightR_instruction177: + imul rdi, r15 +CryptonightR_instruction178: + imul rdi, r15 +CryptonightR_instruction179: + add rdi, r15 + add rdi, 2147483647 +CryptonightR_instruction180: + sub rdi, r15 +CryptonightR_instruction181: + ror edi, cl +CryptonightR_instruction182: + rol edi, cl +CryptonightR_instruction183: + xor rdi, r15 +CryptonightR_instruction184: + imul rbp, r15 +CryptonightR_instruction185: + imul rbp, r15 +CryptonightR_instruction186: + imul rbp, r15 +CryptonightR_instruction187: + add rbp, r15 + add rbp, 2147483647 +CryptonightR_instruction188: + sub rbp, r15 +CryptonightR_instruction189: + ror ebp, cl +CryptonightR_instruction190: + rol ebp, cl +CryptonightR_instruction191: + xor rbp, r15 +CryptonightR_instruction192: + imul rbx, rax +CryptonightR_instruction193: + imul rbx, rax +CryptonightR_instruction194: + imul rbx, rax +CryptonightR_instruction195: + add rbx, rax + add rbx, 2147483647 +CryptonightR_instruction196: + sub rbx, rax +CryptonightR_instruction197: + ror ebx, cl +CryptonightR_instruction198: + rol ebx, cl +CryptonightR_instruction199: + xor rbx, rax +CryptonightR_instruction200: + imul rsi, rax +CryptonightR_instruction201: + imul rsi, rax +CryptonightR_instruction202: + imul rsi, rax +CryptonightR_instruction203: + add rsi, rax + add rsi, 2147483647 +CryptonightR_instruction204: + sub rsi, rax +CryptonightR_instruction205: + ror esi, cl +CryptonightR_instruction206: + rol esi, cl +CryptonightR_instruction207: + xor rsi, rax +CryptonightR_instruction208: + imul rdi, rax +CryptonightR_instruction209: + imul rdi, rax +CryptonightR_instruction210: + imul rdi, rax +CryptonightR_instruction211: + add rdi, rax + add rdi, 2147483647 +CryptonightR_instruction212: + sub rdi, rax +CryptonightR_instruction213: + ror edi, cl +CryptonightR_instruction214: + rol edi, cl +CryptonightR_instruction215: + xor rdi, rax +CryptonightR_instruction216: + imul rbp, rax +CryptonightR_instruction217: + imul rbp, rax +CryptonightR_instruction218: + imul rbp, rax +CryptonightR_instruction219: + add rbp, rax + add rbp, 2147483647 +CryptonightR_instruction220: + sub rbp, rax +CryptonightR_instruction221: + ror ebp, cl +CryptonightR_instruction222: + rol ebp, cl +CryptonightR_instruction223: + xor rbp, rax +CryptonightR_instruction224: + imul rbx, rdx +CryptonightR_instruction225: + imul rbx, rdx +CryptonightR_instruction226: + imul rbx, rdx +CryptonightR_instruction227: + add rbx, rdx + add rbx, 2147483647 +CryptonightR_instruction228: + sub rbx, rdx +CryptonightR_instruction229: + ror ebx, cl +CryptonightR_instruction230: + rol ebx, cl +CryptonightR_instruction231: + xor rbx, rdx +CryptonightR_instruction232: + imul rsi, rdx +CryptonightR_instruction233: + imul rsi, rdx +CryptonightR_instruction234: + imul rsi, rdx +CryptonightR_instruction235: + add rsi, rdx + add rsi, 2147483647 +CryptonightR_instruction236: + sub rsi, rdx +CryptonightR_instruction237: + ror esi, cl +CryptonightR_instruction238: + rol esi, cl +CryptonightR_instruction239: + xor rsi, rdx +CryptonightR_instruction240: + imul rdi, rdx +CryptonightR_instruction241: + imul rdi, rdx +CryptonightR_instruction242: + imul rdi, rdx +CryptonightR_instruction243: + add rdi, rdx + add rdi, 2147483647 +CryptonightR_instruction244: + sub rdi, rdx +CryptonightR_instruction245: + ror edi, cl +CryptonightR_instruction246: + rol edi, cl +CryptonightR_instruction247: + xor rdi, rdx +CryptonightR_instruction248: + imul rbp, rdx +CryptonightR_instruction249: + imul rbp, rdx +CryptonightR_instruction250: + imul rbp, rdx +CryptonightR_instruction251: + add rbp, rdx + add rbp, 2147483647 +CryptonightR_instruction252: + sub rbp, rdx +CryptonightR_instruction253: + ror ebp, cl +CryptonightR_instruction254: + rol ebp, cl +CryptonightR_instruction255: + xor rbp, rdx +CryptonightR_instruction256: + imul rbx, rbx +CryptonightR_instruction_mov0: + +CryptonightR_instruction_mov1: + +CryptonightR_instruction_mov2: + +CryptonightR_instruction_mov3: + +CryptonightR_instruction_mov4: + +CryptonightR_instruction_mov5: + mov rcx, rbx +CryptonightR_instruction_mov6: + mov rcx, rbx +CryptonightR_instruction_mov7: + +CryptonightR_instruction_mov8: + +CryptonightR_instruction_mov9: + +CryptonightR_instruction_mov10: + +CryptonightR_instruction_mov11: + +CryptonightR_instruction_mov12: + +CryptonightR_instruction_mov13: + mov rcx, rbx +CryptonightR_instruction_mov14: + mov rcx, rbx +CryptonightR_instruction_mov15: + +CryptonightR_instruction_mov16: + +CryptonightR_instruction_mov17: + +CryptonightR_instruction_mov18: + +CryptonightR_instruction_mov19: + +CryptonightR_instruction_mov20: + +CryptonightR_instruction_mov21: + mov rcx, rbx +CryptonightR_instruction_mov22: + mov rcx, rbx +CryptonightR_instruction_mov23: + +CryptonightR_instruction_mov24: + +CryptonightR_instruction_mov25: + +CryptonightR_instruction_mov26: + +CryptonightR_instruction_mov27: + +CryptonightR_instruction_mov28: + +CryptonightR_instruction_mov29: + mov rcx, rbx +CryptonightR_instruction_mov30: + mov rcx, rbx +CryptonightR_instruction_mov31: + +CryptonightR_instruction_mov32: + +CryptonightR_instruction_mov33: + +CryptonightR_instruction_mov34: + +CryptonightR_instruction_mov35: + +CryptonightR_instruction_mov36: + +CryptonightR_instruction_mov37: + mov rcx, rsi +CryptonightR_instruction_mov38: + mov rcx, rsi +CryptonightR_instruction_mov39: + +CryptonightR_instruction_mov40: + +CryptonightR_instruction_mov41: + +CryptonightR_instruction_mov42: + +CryptonightR_instruction_mov43: + +CryptonightR_instruction_mov44: + +CryptonightR_instruction_mov45: + mov rcx, rsi +CryptonightR_instruction_mov46: + mov rcx, rsi +CryptonightR_instruction_mov47: + +CryptonightR_instruction_mov48: + +CryptonightR_instruction_mov49: + +CryptonightR_instruction_mov50: + +CryptonightR_instruction_mov51: + +CryptonightR_instruction_mov52: + +CryptonightR_instruction_mov53: + mov rcx, rsi +CryptonightR_instruction_mov54: + mov rcx, rsi +CryptonightR_instruction_mov55: + +CryptonightR_instruction_mov56: + +CryptonightR_instruction_mov57: + +CryptonightR_instruction_mov58: + +CryptonightR_instruction_mov59: + +CryptonightR_instruction_mov60: + +CryptonightR_instruction_mov61: + mov rcx, rsi +CryptonightR_instruction_mov62: + mov rcx, rsi +CryptonightR_instruction_mov63: + +CryptonightR_instruction_mov64: + +CryptonightR_instruction_mov65: + +CryptonightR_instruction_mov66: + +CryptonightR_instruction_mov67: + +CryptonightR_instruction_mov68: + +CryptonightR_instruction_mov69: + mov rcx, rdi +CryptonightR_instruction_mov70: + mov rcx, rdi +CryptonightR_instruction_mov71: + +CryptonightR_instruction_mov72: + +CryptonightR_instruction_mov73: + +CryptonightR_instruction_mov74: + +CryptonightR_instruction_mov75: + +CryptonightR_instruction_mov76: + +CryptonightR_instruction_mov77: + mov rcx, rdi +CryptonightR_instruction_mov78: + mov rcx, rdi +CryptonightR_instruction_mov79: + +CryptonightR_instruction_mov80: + +CryptonightR_instruction_mov81: + +CryptonightR_instruction_mov82: + +CryptonightR_instruction_mov83: + +CryptonightR_instruction_mov84: + +CryptonightR_instruction_mov85: + mov rcx, rdi +CryptonightR_instruction_mov86: + mov rcx, rdi +CryptonightR_instruction_mov87: + +CryptonightR_instruction_mov88: + +CryptonightR_instruction_mov89: + +CryptonightR_instruction_mov90: + +CryptonightR_instruction_mov91: + +CryptonightR_instruction_mov92: + +CryptonightR_instruction_mov93: + mov rcx, rdi +CryptonightR_instruction_mov94: + mov rcx, rdi +CryptonightR_instruction_mov95: + +CryptonightR_instruction_mov96: + +CryptonightR_instruction_mov97: + +CryptonightR_instruction_mov98: + +CryptonightR_instruction_mov99: + +CryptonightR_instruction_mov100: + +CryptonightR_instruction_mov101: + mov rcx, rbp +CryptonightR_instruction_mov102: + mov rcx, rbp +CryptonightR_instruction_mov103: + +CryptonightR_instruction_mov104: + +CryptonightR_instruction_mov105: + +CryptonightR_instruction_mov106: + +CryptonightR_instruction_mov107: + +CryptonightR_instruction_mov108: + +CryptonightR_instruction_mov109: + mov rcx, rbp +CryptonightR_instruction_mov110: + mov rcx, rbp +CryptonightR_instruction_mov111: + +CryptonightR_instruction_mov112: + +CryptonightR_instruction_mov113: + +CryptonightR_instruction_mov114: + +CryptonightR_instruction_mov115: + +CryptonightR_instruction_mov116: + +CryptonightR_instruction_mov117: + mov rcx, rbp +CryptonightR_instruction_mov118: + mov rcx, rbp +CryptonightR_instruction_mov119: + +CryptonightR_instruction_mov120: + +CryptonightR_instruction_mov121: + +CryptonightR_instruction_mov122: + +CryptonightR_instruction_mov123: + +CryptonightR_instruction_mov124: + +CryptonightR_instruction_mov125: + mov rcx, rbp +CryptonightR_instruction_mov126: + mov rcx, rbp +CryptonightR_instruction_mov127: + +CryptonightR_instruction_mov128: + +CryptonightR_instruction_mov129: + +CryptonightR_instruction_mov130: + +CryptonightR_instruction_mov131: + +CryptonightR_instruction_mov132: + +CryptonightR_instruction_mov133: + mov rcx, rsp +CryptonightR_instruction_mov134: + mov rcx, rsp +CryptonightR_instruction_mov135: + +CryptonightR_instruction_mov136: + +CryptonightR_instruction_mov137: + +CryptonightR_instruction_mov138: + +CryptonightR_instruction_mov139: + +CryptonightR_instruction_mov140: + +CryptonightR_instruction_mov141: + mov rcx, rsp +CryptonightR_instruction_mov142: + mov rcx, rsp +CryptonightR_instruction_mov143: + +CryptonightR_instruction_mov144: + +CryptonightR_instruction_mov145: + +CryptonightR_instruction_mov146: + +CryptonightR_instruction_mov147: + +CryptonightR_instruction_mov148: + +CryptonightR_instruction_mov149: + mov rcx, rsp +CryptonightR_instruction_mov150: + mov rcx, rsp +CryptonightR_instruction_mov151: + +CryptonightR_instruction_mov152: + +CryptonightR_instruction_mov153: + +CryptonightR_instruction_mov154: + +CryptonightR_instruction_mov155: + +CryptonightR_instruction_mov156: + +CryptonightR_instruction_mov157: + mov rcx, rsp +CryptonightR_instruction_mov158: + mov rcx, rsp +CryptonightR_instruction_mov159: + +CryptonightR_instruction_mov160: + +CryptonightR_instruction_mov161: + +CryptonightR_instruction_mov162: + +CryptonightR_instruction_mov163: + +CryptonightR_instruction_mov164: + +CryptonightR_instruction_mov165: + mov rcx, r15 +CryptonightR_instruction_mov166: + mov rcx, r15 +CryptonightR_instruction_mov167: + +CryptonightR_instruction_mov168: + +CryptonightR_instruction_mov169: + +CryptonightR_instruction_mov170: + +CryptonightR_instruction_mov171: + +CryptonightR_instruction_mov172: + +CryptonightR_instruction_mov173: + mov rcx, r15 +CryptonightR_instruction_mov174: + mov rcx, r15 +CryptonightR_instruction_mov175: + +CryptonightR_instruction_mov176: + +CryptonightR_instruction_mov177: + +CryptonightR_instruction_mov178: + +CryptonightR_instruction_mov179: + +CryptonightR_instruction_mov180: + +CryptonightR_instruction_mov181: + mov rcx, r15 +CryptonightR_instruction_mov182: + mov rcx, r15 +CryptonightR_instruction_mov183: + +CryptonightR_instruction_mov184: + +CryptonightR_instruction_mov185: + +CryptonightR_instruction_mov186: + +CryptonightR_instruction_mov187: + +CryptonightR_instruction_mov188: + +CryptonightR_instruction_mov189: + mov rcx, r15 +CryptonightR_instruction_mov190: + mov rcx, r15 +CryptonightR_instruction_mov191: + +CryptonightR_instruction_mov192: + +CryptonightR_instruction_mov193: + +CryptonightR_instruction_mov194: + +CryptonightR_instruction_mov195: + +CryptonightR_instruction_mov196: + +CryptonightR_instruction_mov197: + mov rcx, rax +CryptonightR_instruction_mov198: + mov rcx, rax +CryptonightR_instruction_mov199: + +CryptonightR_instruction_mov200: + +CryptonightR_instruction_mov201: + +CryptonightR_instruction_mov202: + +CryptonightR_instruction_mov203: + +CryptonightR_instruction_mov204: + +CryptonightR_instruction_mov205: + mov rcx, rax +CryptonightR_instruction_mov206: + mov rcx, rax +CryptonightR_instruction_mov207: + +CryptonightR_instruction_mov208: + +CryptonightR_instruction_mov209: + +CryptonightR_instruction_mov210: + +CryptonightR_instruction_mov211: + +CryptonightR_instruction_mov212: + +CryptonightR_instruction_mov213: + mov rcx, rax +CryptonightR_instruction_mov214: + mov rcx, rax +CryptonightR_instruction_mov215: + +CryptonightR_instruction_mov216: + +CryptonightR_instruction_mov217: + +CryptonightR_instruction_mov218: + +CryptonightR_instruction_mov219: + +CryptonightR_instruction_mov220: + +CryptonightR_instruction_mov221: + mov rcx, rax +CryptonightR_instruction_mov222: + mov rcx, rax +CryptonightR_instruction_mov223: + +CryptonightR_instruction_mov224: + +CryptonightR_instruction_mov225: + +CryptonightR_instruction_mov226: + +CryptonightR_instruction_mov227: + +CryptonightR_instruction_mov228: + +CryptonightR_instruction_mov229: + mov rcx, rdx +CryptonightR_instruction_mov230: + mov rcx, rdx +CryptonightR_instruction_mov231: + +CryptonightR_instruction_mov232: + +CryptonightR_instruction_mov233: + +CryptonightR_instruction_mov234: + +CryptonightR_instruction_mov235: + +CryptonightR_instruction_mov236: + +CryptonightR_instruction_mov237: + mov rcx, rdx +CryptonightR_instruction_mov238: + mov rcx, rdx +CryptonightR_instruction_mov239: + +CryptonightR_instruction_mov240: + +CryptonightR_instruction_mov241: + +CryptonightR_instruction_mov242: + +CryptonightR_instruction_mov243: + +CryptonightR_instruction_mov244: + +CryptonightR_instruction_mov245: + mov rcx, rdx +CryptonightR_instruction_mov246: + mov rcx, rdx +CryptonightR_instruction_mov247: + +CryptonightR_instruction_mov248: + +CryptonightR_instruction_mov249: + +CryptonightR_instruction_mov250: + +CryptonightR_instruction_mov251: + +CryptonightR_instruction_mov252: + +CryptonightR_instruction_mov253: + mov rcx, rdx +CryptonightR_instruction_mov254: + mov rcx, rdx +CryptonightR_instruction_mov255: + +CryptonightR_instruction_mov256: + +_TEXT_CN_TEMPLATE ENDS +END diff --git a/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.h b/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.h new file mode 100644 index 000000000..c2054705b --- /dev/null +++ b/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.h @@ -0,0 +1,1063 @@ +// Auto-generated file, do not edit + +extern "C" +{ + void CryptonightWOW_template_part1(); + void CryptonightWOW_template_mainloop(); + void CryptonightWOW_template_part2(); + void CryptonightWOW_template_part3(); + void CryptonightWOW_template_end(); + void CryptonightWOW_template_double_part1(); + void CryptonightWOW_template_double_mainloop(); + void CryptonightWOW_template_double_part2(); + void CryptonightWOW_template_double_part3(); + void CryptonightWOW_template_double_part4(); + void CryptonightWOW_template_double_end(); + + void CryptonightR_template_part1(); + void CryptonightR_template_mainloop(); + void CryptonightR_template_part2(); + void CryptonightR_template_part3(); + void CryptonightR_template_end(); + void CryptonightR_template_double_part1(); + void CryptonightR_template_double_mainloop(); + void CryptonightR_template_double_part2(); + void CryptonightR_template_double_part3(); + void CryptonightR_template_double_part4(); + void CryptonightR_template_double_end(); + + void CryptonightR_instruction0(); + void CryptonightR_instruction1(); + void CryptonightR_instruction2(); + void CryptonightR_instruction3(); + void CryptonightR_instruction4(); + void CryptonightR_instruction5(); + void CryptonightR_instruction6(); + void CryptonightR_instruction7(); + void CryptonightR_instruction8(); + void CryptonightR_instruction9(); + void CryptonightR_instruction10(); + void CryptonightR_instruction11(); + void CryptonightR_instruction12(); + void CryptonightR_instruction13(); + void CryptonightR_instruction14(); + void CryptonightR_instruction15(); + void CryptonightR_instruction16(); + void CryptonightR_instruction17(); + void CryptonightR_instruction18(); + void CryptonightR_instruction19(); + void CryptonightR_instruction20(); + void CryptonightR_instruction21(); + void CryptonightR_instruction22(); + void CryptonightR_instruction23(); + void CryptonightR_instruction24(); + void CryptonightR_instruction25(); + void CryptonightR_instruction26(); + void CryptonightR_instruction27(); + void CryptonightR_instruction28(); + void CryptonightR_instruction29(); + void CryptonightR_instruction30(); + void CryptonightR_instruction31(); + void CryptonightR_instruction32(); + void CryptonightR_instruction33(); + void CryptonightR_instruction34(); + void CryptonightR_instruction35(); + void CryptonightR_instruction36(); + void CryptonightR_instruction37(); + void CryptonightR_instruction38(); + void CryptonightR_instruction39(); + void CryptonightR_instruction40(); + void CryptonightR_instruction41(); + void CryptonightR_instruction42(); + void CryptonightR_instruction43(); + void CryptonightR_instruction44(); + void CryptonightR_instruction45(); + void CryptonightR_instruction46(); + void CryptonightR_instruction47(); + void CryptonightR_instruction48(); + void CryptonightR_instruction49(); + void CryptonightR_instruction50(); + void CryptonightR_instruction51(); + void CryptonightR_instruction52(); + void CryptonightR_instruction53(); + void CryptonightR_instruction54(); + void CryptonightR_instruction55(); + void CryptonightR_instruction56(); + void CryptonightR_instruction57(); + void CryptonightR_instruction58(); + void CryptonightR_instruction59(); + void CryptonightR_instruction60(); + void CryptonightR_instruction61(); + void CryptonightR_instruction62(); + void CryptonightR_instruction63(); + void CryptonightR_instruction64(); + void CryptonightR_instruction65(); + void CryptonightR_instruction66(); + void CryptonightR_instruction67(); + void CryptonightR_instruction68(); + void CryptonightR_instruction69(); + void CryptonightR_instruction70(); + void CryptonightR_instruction71(); + void CryptonightR_instruction72(); + void CryptonightR_instruction73(); + void CryptonightR_instruction74(); + void CryptonightR_instruction75(); + void CryptonightR_instruction76(); + void CryptonightR_instruction77(); + void CryptonightR_instruction78(); + void CryptonightR_instruction79(); + void CryptonightR_instruction80(); + void CryptonightR_instruction81(); + void CryptonightR_instruction82(); + void CryptonightR_instruction83(); + void CryptonightR_instruction84(); + void CryptonightR_instruction85(); + void CryptonightR_instruction86(); + void CryptonightR_instruction87(); + void CryptonightR_instruction88(); + void CryptonightR_instruction89(); + void CryptonightR_instruction90(); + void CryptonightR_instruction91(); + void CryptonightR_instruction92(); + void CryptonightR_instruction93(); + void CryptonightR_instruction94(); + void CryptonightR_instruction95(); + void CryptonightR_instruction96(); + void CryptonightR_instruction97(); + void CryptonightR_instruction98(); + void CryptonightR_instruction99(); + void CryptonightR_instruction100(); + void CryptonightR_instruction101(); + void CryptonightR_instruction102(); + void CryptonightR_instruction103(); + void CryptonightR_instruction104(); + void CryptonightR_instruction105(); + void CryptonightR_instruction106(); + void CryptonightR_instruction107(); + void CryptonightR_instruction108(); + void CryptonightR_instruction109(); + void CryptonightR_instruction110(); + void CryptonightR_instruction111(); + void CryptonightR_instruction112(); + void CryptonightR_instruction113(); + void CryptonightR_instruction114(); + void CryptonightR_instruction115(); + void CryptonightR_instruction116(); + void CryptonightR_instruction117(); + void CryptonightR_instruction118(); + void CryptonightR_instruction119(); + void CryptonightR_instruction120(); + void CryptonightR_instruction121(); + void CryptonightR_instruction122(); + void CryptonightR_instruction123(); + void CryptonightR_instruction124(); + void CryptonightR_instruction125(); + void CryptonightR_instruction126(); + void CryptonightR_instruction127(); + void CryptonightR_instruction128(); + void CryptonightR_instruction129(); + void CryptonightR_instruction130(); + void CryptonightR_instruction131(); + void CryptonightR_instruction132(); + void CryptonightR_instruction133(); + void CryptonightR_instruction134(); + void CryptonightR_instruction135(); + void CryptonightR_instruction136(); + void CryptonightR_instruction137(); + void CryptonightR_instruction138(); + void CryptonightR_instruction139(); + void CryptonightR_instruction140(); + void CryptonightR_instruction141(); + void CryptonightR_instruction142(); + void CryptonightR_instruction143(); + void CryptonightR_instruction144(); + void CryptonightR_instruction145(); + void CryptonightR_instruction146(); + void CryptonightR_instruction147(); + void CryptonightR_instruction148(); + void CryptonightR_instruction149(); + void CryptonightR_instruction150(); + void CryptonightR_instruction151(); + void CryptonightR_instruction152(); + void CryptonightR_instruction153(); + void CryptonightR_instruction154(); + void CryptonightR_instruction155(); + void CryptonightR_instruction156(); + void CryptonightR_instruction157(); + void CryptonightR_instruction158(); + void CryptonightR_instruction159(); + void CryptonightR_instruction160(); + void CryptonightR_instruction161(); + void CryptonightR_instruction162(); + void CryptonightR_instruction163(); + void CryptonightR_instruction164(); + void CryptonightR_instruction165(); + void CryptonightR_instruction166(); + void CryptonightR_instruction167(); + void CryptonightR_instruction168(); + void CryptonightR_instruction169(); + void CryptonightR_instruction170(); + void CryptonightR_instruction171(); + void CryptonightR_instruction172(); + void CryptonightR_instruction173(); + void CryptonightR_instruction174(); + void CryptonightR_instruction175(); + void CryptonightR_instruction176(); + void CryptonightR_instruction177(); + void CryptonightR_instruction178(); + void CryptonightR_instruction179(); + void CryptonightR_instruction180(); + void CryptonightR_instruction181(); + void CryptonightR_instruction182(); + void CryptonightR_instruction183(); + void CryptonightR_instruction184(); + void CryptonightR_instruction185(); + void CryptonightR_instruction186(); + void CryptonightR_instruction187(); + void CryptonightR_instruction188(); + void CryptonightR_instruction189(); + void CryptonightR_instruction190(); + void CryptonightR_instruction191(); + void CryptonightR_instruction192(); + void CryptonightR_instruction193(); + void CryptonightR_instruction194(); + void CryptonightR_instruction195(); + void CryptonightR_instruction196(); + void CryptonightR_instruction197(); + void CryptonightR_instruction198(); + void CryptonightR_instruction199(); + void CryptonightR_instruction200(); + void CryptonightR_instruction201(); + void CryptonightR_instruction202(); + void CryptonightR_instruction203(); + void CryptonightR_instruction204(); + void CryptonightR_instruction205(); + void CryptonightR_instruction206(); + void CryptonightR_instruction207(); + void CryptonightR_instruction208(); + void CryptonightR_instruction209(); + void CryptonightR_instruction210(); + void CryptonightR_instruction211(); + void CryptonightR_instruction212(); + void CryptonightR_instruction213(); + void CryptonightR_instruction214(); + void CryptonightR_instruction215(); + void CryptonightR_instruction216(); + void CryptonightR_instruction217(); + void CryptonightR_instruction218(); + void CryptonightR_instruction219(); + void CryptonightR_instruction220(); + void CryptonightR_instruction221(); + void CryptonightR_instruction222(); + void CryptonightR_instruction223(); + void CryptonightR_instruction224(); + void CryptonightR_instruction225(); + void CryptonightR_instruction226(); + void CryptonightR_instruction227(); + void CryptonightR_instruction228(); + void CryptonightR_instruction229(); + void CryptonightR_instruction230(); + void CryptonightR_instruction231(); + void CryptonightR_instruction232(); + void CryptonightR_instruction233(); + void CryptonightR_instruction234(); + void CryptonightR_instruction235(); + void CryptonightR_instruction236(); + void CryptonightR_instruction237(); + void CryptonightR_instruction238(); + void CryptonightR_instruction239(); + void CryptonightR_instruction240(); + void CryptonightR_instruction241(); + void CryptonightR_instruction242(); + void CryptonightR_instruction243(); + void CryptonightR_instruction244(); + void CryptonightR_instruction245(); + void CryptonightR_instruction246(); + void CryptonightR_instruction247(); + void CryptonightR_instruction248(); + void CryptonightR_instruction249(); + void CryptonightR_instruction250(); + void CryptonightR_instruction251(); + void CryptonightR_instruction252(); + void CryptonightR_instruction253(); + void CryptonightR_instruction254(); + void CryptonightR_instruction255(); + void CryptonightR_instruction256(); + void CryptonightR_instruction_mov0(); + void CryptonightR_instruction_mov1(); + void CryptonightR_instruction_mov2(); + void CryptonightR_instruction_mov3(); + void CryptonightR_instruction_mov4(); + void CryptonightR_instruction_mov5(); + void CryptonightR_instruction_mov6(); + void CryptonightR_instruction_mov7(); + void CryptonightR_instruction_mov8(); + void CryptonightR_instruction_mov9(); + void CryptonightR_instruction_mov10(); + void CryptonightR_instruction_mov11(); + void CryptonightR_instruction_mov12(); + void CryptonightR_instruction_mov13(); + void CryptonightR_instruction_mov14(); + void CryptonightR_instruction_mov15(); + void CryptonightR_instruction_mov16(); + void CryptonightR_instruction_mov17(); + void CryptonightR_instruction_mov18(); + void CryptonightR_instruction_mov19(); + void CryptonightR_instruction_mov20(); + void CryptonightR_instruction_mov21(); + void CryptonightR_instruction_mov22(); + void CryptonightR_instruction_mov23(); + void CryptonightR_instruction_mov24(); + void CryptonightR_instruction_mov25(); + void CryptonightR_instruction_mov26(); + void CryptonightR_instruction_mov27(); + void CryptonightR_instruction_mov28(); + void CryptonightR_instruction_mov29(); + void CryptonightR_instruction_mov30(); + void CryptonightR_instruction_mov31(); + void CryptonightR_instruction_mov32(); + void CryptonightR_instruction_mov33(); + void CryptonightR_instruction_mov34(); + void CryptonightR_instruction_mov35(); + void CryptonightR_instruction_mov36(); + void CryptonightR_instruction_mov37(); + void CryptonightR_instruction_mov38(); + void CryptonightR_instruction_mov39(); + void CryptonightR_instruction_mov40(); + void CryptonightR_instruction_mov41(); + void CryptonightR_instruction_mov42(); + void CryptonightR_instruction_mov43(); + void CryptonightR_instruction_mov44(); + void CryptonightR_instruction_mov45(); + void CryptonightR_instruction_mov46(); + void CryptonightR_instruction_mov47(); + void CryptonightR_instruction_mov48(); + void CryptonightR_instruction_mov49(); + void CryptonightR_instruction_mov50(); + void CryptonightR_instruction_mov51(); + void CryptonightR_instruction_mov52(); + void CryptonightR_instruction_mov53(); + void CryptonightR_instruction_mov54(); + void CryptonightR_instruction_mov55(); + void CryptonightR_instruction_mov56(); + void CryptonightR_instruction_mov57(); + void CryptonightR_instruction_mov58(); + void CryptonightR_instruction_mov59(); + void CryptonightR_instruction_mov60(); + void CryptonightR_instruction_mov61(); + void CryptonightR_instruction_mov62(); + void CryptonightR_instruction_mov63(); + void CryptonightR_instruction_mov64(); + void CryptonightR_instruction_mov65(); + void CryptonightR_instruction_mov66(); + void CryptonightR_instruction_mov67(); + void CryptonightR_instruction_mov68(); + void CryptonightR_instruction_mov69(); + void CryptonightR_instruction_mov70(); + void CryptonightR_instruction_mov71(); + void CryptonightR_instruction_mov72(); + void CryptonightR_instruction_mov73(); + void CryptonightR_instruction_mov74(); + void CryptonightR_instruction_mov75(); + void CryptonightR_instruction_mov76(); + void CryptonightR_instruction_mov77(); + void CryptonightR_instruction_mov78(); + void CryptonightR_instruction_mov79(); + void CryptonightR_instruction_mov80(); + void CryptonightR_instruction_mov81(); + void CryptonightR_instruction_mov82(); + void CryptonightR_instruction_mov83(); + void CryptonightR_instruction_mov84(); + void CryptonightR_instruction_mov85(); + void CryptonightR_instruction_mov86(); + void CryptonightR_instruction_mov87(); + void CryptonightR_instruction_mov88(); + void CryptonightR_instruction_mov89(); + void CryptonightR_instruction_mov90(); + void CryptonightR_instruction_mov91(); + void CryptonightR_instruction_mov92(); + void CryptonightR_instruction_mov93(); + void CryptonightR_instruction_mov94(); + void CryptonightR_instruction_mov95(); + void CryptonightR_instruction_mov96(); + void CryptonightR_instruction_mov97(); + void CryptonightR_instruction_mov98(); + void CryptonightR_instruction_mov99(); + void CryptonightR_instruction_mov100(); + void CryptonightR_instruction_mov101(); + void CryptonightR_instruction_mov102(); + void CryptonightR_instruction_mov103(); + void CryptonightR_instruction_mov104(); + void CryptonightR_instruction_mov105(); + void CryptonightR_instruction_mov106(); + void CryptonightR_instruction_mov107(); + void CryptonightR_instruction_mov108(); + void CryptonightR_instruction_mov109(); + void CryptonightR_instruction_mov110(); + void CryptonightR_instruction_mov111(); + void CryptonightR_instruction_mov112(); + void CryptonightR_instruction_mov113(); + void CryptonightR_instruction_mov114(); + void CryptonightR_instruction_mov115(); + void CryptonightR_instruction_mov116(); + void CryptonightR_instruction_mov117(); + void CryptonightR_instruction_mov118(); + void CryptonightR_instruction_mov119(); + void CryptonightR_instruction_mov120(); + void CryptonightR_instruction_mov121(); + void CryptonightR_instruction_mov122(); + void CryptonightR_instruction_mov123(); + void CryptonightR_instruction_mov124(); + void CryptonightR_instruction_mov125(); + void CryptonightR_instruction_mov126(); + void CryptonightR_instruction_mov127(); + void CryptonightR_instruction_mov128(); + void CryptonightR_instruction_mov129(); + void CryptonightR_instruction_mov130(); + void CryptonightR_instruction_mov131(); + void CryptonightR_instruction_mov132(); + void CryptonightR_instruction_mov133(); + void CryptonightR_instruction_mov134(); + void CryptonightR_instruction_mov135(); + void CryptonightR_instruction_mov136(); + void CryptonightR_instruction_mov137(); + void CryptonightR_instruction_mov138(); + void CryptonightR_instruction_mov139(); + void CryptonightR_instruction_mov140(); + void CryptonightR_instruction_mov141(); + void CryptonightR_instruction_mov142(); + void CryptonightR_instruction_mov143(); + void CryptonightR_instruction_mov144(); + void CryptonightR_instruction_mov145(); + void CryptonightR_instruction_mov146(); + void CryptonightR_instruction_mov147(); + void CryptonightR_instruction_mov148(); + void CryptonightR_instruction_mov149(); + void CryptonightR_instruction_mov150(); + void CryptonightR_instruction_mov151(); + void CryptonightR_instruction_mov152(); + void CryptonightR_instruction_mov153(); + void CryptonightR_instruction_mov154(); + void CryptonightR_instruction_mov155(); + void CryptonightR_instruction_mov156(); + void CryptonightR_instruction_mov157(); + void CryptonightR_instruction_mov158(); + void CryptonightR_instruction_mov159(); + void CryptonightR_instruction_mov160(); + void CryptonightR_instruction_mov161(); + void CryptonightR_instruction_mov162(); + void CryptonightR_instruction_mov163(); + void CryptonightR_instruction_mov164(); + void CryptonightR_instruction_mov165(); + void CryptonightR_instruction_mov166(); + void CryptonightR_instruction_mov167(); + void CryptonightR_instruction_mov168(); + void CryptonightR_instruction_mov169(); + void CryptonightR_instruction_mov170(); + void CryptonightR_instruction_mov171(); + void CryptonightR_instruction_mov172(); + void CryptonightR_instruction_mov173(); + void CryptonightR_instruction_mov174(); + void CryptonightR_instruction_mov175(); + void CryptonightR_instruction_mov176(); + void CryptonightR_instruction_mov177(); + void CryptonightR_instruction_mov178(); + void CryptonightR_instruction_mov179(); + void CryptonightR_instruction_mov180(); + void CryptonightR_instruction_mov181(); + void CryptonightR_instruction_mov182(); + void CryptonightR_instruction_mov183(); + void CryptonightR_instruction_mov184(); + void CryptonightR_instruction_mov185(); + void CryptonightR_instruction_mov186(); + void CryptonightR_instruction_mov187(); + void CryptonightR_instruction_mov188(); + void CryptonightR_instruction_mov189(); + void CryptonightR_instruction_mov190(); + void CryptonightR_instruction_mov191(); + void CryptonightR_instruction_mov192(); + void CryptonightR_instruction_mov193(); + void CryptonightR_instruction_mov194(); + void CryptonightR_instruction_mov195(); + void CryptonightR_instruction_mov196(); + void CryptonightR_instruction_mov197(); + void CryptonightR_instruction_mov198(); + void CryptonightR_instruction_mov199(); + void CryptonightR_instruction_mov200(); + void CryptonightR_instruction_mov201(); + void CryptonightR_instruction_mov202(); + void CryptonightR_instruction_mov203(); + void CryptonightR_instruction_mov204(); + void CryptonightR_instruction_mov205(); + void CryptonightR_instruction_mov206(); + void CryptonightR_instruction_mov207(); + void CryptonightR_instruction_mov208(); + void CryptonightR_instruction_mov209(); + void CryptonightR_instruction_mov210(); + void CryptonightR_instruction_mov211(); + void CryptonightR_instruction_mov212(); + void CryptonightR_instruction_mov213(); + void CryptonightR_instruction_mov214(); + void CryptonightR_instruction_mov215(); + void CryptonightR_instruction_mov216(); + void CryptonightR_instruction_mov217(); + void CryptonightR_instruction_mov218(); + void CryptonightR_instruction_mov219(); + void CryptonightR_instruction_mov220(); + void CryptonightR_instruction_mov221(); + void CryptonightR_instruction_mov222(); + void CryptonightR_instruction_mov223(); + void CryptonightR_instruction_mov224(); + void CryptonightR_instruction_mov225(); + void CryptonightR_instruction_mov226(); + void CryptonightR_instruction_mov227(); + void CryptonightR_instruction_mov228(); + void CryptonightR_instruction_mov229(); + void CryptonightR_instruction_mov230(); + void CryptonightR_instruction_mov231(); + void CryptonightR_instruction_mov232(); + void CryptonightR_instruction_mov233(); + void CryptonightR_instruction_mov234(); + void CryptonightR_instruction_mov235(); + void CryptonightR_instruction_mov236(); + void CryptonightR_instruction_mov237(); + void CryptonightR_instruction_mov238(); + void CryptonightR_instruction_mov239(); + void CryptonightR_instruction_mov240(); + void CryptonightR_instruction_mov241(); + void CryptonightR_instruction_mov242(); + void CryptonightR_instruction_mov243(); + void CryptonightR_instruction_mov244(); + void CryptonightR_instruction_mov245(); + void CryptonightR_instruction_mov246(); + void CryptonightR_instruction_mov247(); + void CryptonightR_instruction_mov248(); + void CryptonightR_instruction_mov249(); + void CryptonightR_instruction_mov250(); + void CryptonightR_instruction_mov251(); + void CryptonightR_instruction_mov252(); + void CryptonightR_instruction_mov253(); + void CryptonightR_instruction_mov254(); + void CryptonightR_instruction_mov255(); + void CryptonightR_instruction_mov256(); +} + +const void_func instructions[257] = { + CryptonightR_instruction0, + CryptonightR_instruction1, + CryptonightR_instruction2, + CryptonightR_instruction3, + CryptonightR_instruction4, + CryptonightR_instruction5, + CryptonightR_instruction6, + CryptonightR_instruction7, + CryptonightR_instruction8, + CryptonightR_instruction9, + CryptonightR_instruction10, + CryptonightR_instruction11, + CryptonightR_instruction12, + CryptonightR_instruction13, + CryptonightR_instruction14, + CryptonightR_instruction15, + CryptonightR_instruction16, + CryptonightR_instruction17, + CryptonightR_instruction18, + CryptonightR_instruction19, + CryptonightR_instruction20, + CryptonightR_instruction21, + CryptonightR_instruction22, + CryptonightR_instruction23, + CryptonightR_instruction24, + CryptonightR_instruction25, + CryptonightR_instruction26, + CryptonightR_instruction27, + CryptonightR_instruction28, + CryptonightR_instruction29, + CryptonightR_instruction30, + CryptonightR_instruction31, + CryptonightR_instruction32, + CryptonightR_instruction33, + CryptonightR_instruction34, + CryptonightR_instruction35, + CryptonightR_instruction36, + CryptonightR_instruction37, + CryptonightR_instruction38, + CryptonightR_instruction39, + CryptonightR_instruction40, + CryptonightR_instruction41, + CryptonightR_instruction42, + CryptonightR_instruction43, + CryptonightR_instruction44, + CryptonightR_instruction45, + CryptonightR_instruction46, + CryptonightR_instruction47, + CryptonightR_instruction48, + CryptonightR_instruction49, + CryptonightR_instruction50, + CryptonightR_instruction51, + CryptonightR_instruction52, + CryptonightR_instruction53, + CryptonightR_instruction54, + CryptonightR_instruction55, + CryptonightR_instruction56, + CryptonightR_instruction57, + CryptonightR_instruction58, + CryptonightR_instruction59, + CryptonightR_instruction60, + CryptonightR_instruction61, + CryptonightR_instruction62, + CryptonightR_instruction63, + CryptonightR_instruction64, + CryptonightR_instruction65, + CryptonightR_instruction66, + CryptonightR_instruction67, + CryptonightR_instruction68, + CryptonightR_instruction69, + CryptonightR_instruction70, + CryptonightR_instruction71, + CryptonightR_instruction72, + CryptonightR_instruction73, + CryptonightR_instruction74, + CryptonightR_instruction75, + CryptonightR_instruction76, + CryptonightR_instruction77, + CryptonightR_instruction78, + CryptonightR_instruction79, + CryptonightR_instruction80, + CryptonightR_instruction81, + CryptonightR_instruction82, + CryptonightR_instruction83, + CryptonightR_instruction84, + CryptonightR_instruction85, + CryptonightR_instruction86, + CryptonightR_instruction87, + CryptonightR_instruction88, + CryptonightR_instruction89, + CryptonightR_instruction90, + CryptonightR_instruction91, + CryptonightR_instruction92, + CryptonightR_instruction93, + CryptonightR_instruction94, + CryptonightR_instruction95, + CryptonightR_instruction96, + CryptonightR_instruction97, + CryptonightR_instruction98, + CryptonightR_instruction99, + CryptonightR_instruction100, + CryptonightR_instruction101, + CryptonightR_instruction102, + CryptonightR_instruction103, + CryptonightR_instruction104, + CryptonightR_instruction105, + CryptonightR_instruction106, + CryptonightR_instruction107, + CryptonightR_instruction108, + CryptonightR_instruction109, + CryptonightR_instruction110, + CryptonightR_instruction111, + CryptonightR_instruction112, + CryptonightR_instruction113, + CryptonightR_instruction114, + CryptonightR_instruction115, + CryptonightR_instruction116, + CryptonightR_instruction117, + CryptonightR_instruction118, + CryptonightR_instruction119, + CryptonightR_instruction120, + CryptonightR_instruction121, + CryptonightR_instruction122, + CryptonightR_instruction123, + CryptonightR_instruction124, + CryptonightR_instruction125, + CryptonightR_instruction126, + CryptonightR_instruction127, + CryptonightR_instruction128, + CryptonightR_instruction129, + CryptonightR_instruction130, + CryptonightR_instruction131, + CryptonightR_instruction132, + CryptonightR_instruction133, + CryptonightR_instruction134, + CryptonightR_instruction135, + CryptonightR_instruction136, + CryptonightR_instruction137, + CryptonightR_instruction138, + CryptonightR_instruction139, + CryptonightR_instruction140, + CryptonightR_instruction141, + CryptonightR_instruction142, + CryptonightR_instruction143, + CryptonightR_instruction144, + CryptonightR_instruction145, + CryptonightR_instruction146, + CryptonightR_instruction147, + CryptonightR_instruction148, + CryptonightR_instruction149, + CryptonightR_instruction150, + CryptonightR_instruction151, + CryptonightR_instruction152, + CryptonightR_instruction153, + CryptonightR_instruction154, + CryptonightR_instruction155, + CryptonightR_instruction156, + CryptonightR_instruction157, + CryptonightR_instruction158, + CryptonightR_instruction159, + CryptonightR_instruction160, + CryptonightR_instruction161, + CryptonightR_instruction162, + CryptonightR_instruction163, + CryptonightR_instruction164, + CryptonightR_instruction165, + CryptonightR_instruction166, + CryptonightR_instruction167, + CryptonightR_instruction168, + CryptonightR_instruction169, + CryptonightR_instruction170, + CryptonightR_instruction171, + CryptonightR_instruction172, + CryptonightR_instruction173, + CryptonightR_instruction174, + CryptonightR_instruction175, + CryptonightR_instruction176, + CryptonightR_instruction177, + CryptonightR_instruction178, + CryptonightR_instruction179, + CryptonightR_instruction180, + CryptonightR_instruction181, + CryptonightR_instruction182, + CryptonightR_instruction183, + CryptonightR_instruction184, + CryptonightR_instruction185, + CryptonightR_instruction186, + CryptonightR_instruction187, + CryptonightR_instruction188, + CryptonightR_instruction189, + CryptonightR_instruction190, + CryptonightR_instruction191, + CryptonightR_instruction192, + CryptonightR_instruction193, + CryptonightR_instruction194, + CryptonightR_instruction195, + CryptonightR_instruction196, + CryptonightR_instruction197, + CryptonightR_instruction198, + CryptonightR_instruction199, + CryptonightR_instruction200, + CryptonightR_instruction201, + CryptonightR_instruction202, + CryptonightR_instruction203, + CryptonightR_instruction204, + CryptonightR_instruction205, + CryptonightR_instruction206, + CryptonightR_instruction207, + CryptonightR_instruction208, + CryptonightR_instruction209, + CryptonightR_instruction210, + CryptonightR_instruction211, + CryptonightR_instruction212, + CryptonightR_instruction213, + CryptonightR_instruction214, + CryptonightR_instruction215, + CryptonightR_instruction216, + CryptonightR_instruction217, + CryptonightR_instruction218, + CryptonightR_instruction219, + CryptonightR_instruction220, + CryptonightR_instruction221, + CryptonightR_instruction222, + CryptonightR_instruction223, + CryptonightR_instruction224, + CryptonightR_instruction225, + CryptonightR_instruction226, + CryptonightR_instruction227, + CryptonightR_instruction228, + CryptonightR_instruction229, + CryptonightR_instruction230, + CryptonightR_instruction231, + CryptonightR_instruction232, + CryptonightR_instruction233, + CryptonightR_instruction234, + CryptonightR_instruction235, + CryptonightR_instruction236, + CryptonightR_instruction237, + CryptonightR_instruction238, + CryptonightR_instruction239, + CryptonightR_instruction240, + CryptonightR_instruction241, + CryptonightR_instruction242, + CryptonightR_instruction243, + CryptonightR_instruction244, + CryptonightR_instruction245, + CryptonightR_instruction246, + CryptonightR_instruction247, + CryptonightR_instruction248, + CryptonightR_instruction249, + CryptonightR_instruction250, + CryptonightR_instruction251, + CryptonightR_instruction252, + CryptonightR_instruction253, + CryptonightR_instruction254, + CryptonightR_instruction255, + CryptonightR_instruction256, +}; + +const void_func instructions_mov[257] = { + CryptonightR_instruction_mov0, + CryptonightR_instruction_mov1, + CryptonightR_instruction_mov2, + CryptonightR_instruction_mov3, + CryptonightR_instruction_mov4, + CryptonightR_instruction_mov5, + CryptonightR_instruction_mov6, + CryptonightR_instruction_mov7, + CryptonightR_instruction_mov8, + CryptonightR_instruction_mov9, + CryptonightR_instruction_mov10, + CryptonightR_instruction_mov11, + CryptonightR_instruction_mov12, + CryptonightR_instruction_mov13, + CryptonightR_instruction_mov14, + CryptonightR_instruction_mov15, + CryptonightR_instruction_mov16, + CryptonightR_instruction_mov17, + CryptonightR_instruction_mov18, + CryptonightR_instruction_mov19, + CryptonightR_instruction_mov20, + CryptonightR_instruction_mov21, + CryptonightR_instruction_mov22, + CryptonightR_instruction_mov23, + CryptonightR_instruction_mov24, + CryptonightR_instruction_mov25, + CryptonightR_instruction_mov26, + CryptonightR_instruction_mov27, + CryptonightR_instruction_mov28, + CryptonightR_instruction_mov29, + CryptonightR_instruction_mov30, + CryptonightR_instruction_mov31, + CryptonightR_instruction_mov32, + CryptonightR_instruction_mov33, + CryptonightR_instruction_mov34, + CryptonightR_instruction_mov35, + CryptonightR_instruction_mov36, + CryptonightR_instruction_mov37, + CryptonightR_instruction_mov38, + CryptonightR_instruction_mov39, + CryptonightR_instruction_mov40, + CryptonightR_instruction_mov41, + CryptonightR_instruction_mov42, + CryptonightR_instruction_mov43, + CryptonightR_instruction_mov44, + CryptonightR_instruction_mov45, + CryptonightR_instruction_mov46, + CryptonightR_instruction_mov47, + CryptonightR_instruction_mov48, + CryptonightR_instruction_mov49, + CryptonightR_instruction_mov50, + CryptonightR_instruction_mov51, + CryptonightR_instruction_mov52, + CryptonightR_instruction_mov53, + CryptonightR_instruction_mov54, + CryptonightR_instruction_mov55, + CryptonightR_instruction_mov56, + CryptonightR_instruction_mov57, + CryptonightR_instruction_mov58, + CryptonightR_instruction_mov59, + CryptonightR_instruction_mov60, + CryptonightR_instruction_mov61, + CryptonightR_instruction_mov62, + CryptonightR_instruction_mov63, + CryptonightR_instruction_mov64, + CryptonightR_instruction_mov65, + CryptonightR_instruction_mov66, + CryptonightR_instruction_mov67, + CryptonightR_instruction_mov68, + CryptonightR_instruction_mov69, + CryptonightR_instruction_mov70, + CryptonightR_instruction_mov71, + CryptonightR_instruction_mov72, + CryptonightR_instruction_mov73, + CryptonightR_instruction_mov74, + CryptonightR_instruction_mov75, + CryptonightR_instruction_mov76, + CryptonightR_instruction_mov77, + CryptonightR_instruction_mov78, + CryptonightR_instruction_mov79, + CryptonightR_instruction_mov80, + CryptonightR_instruction_mov81, + CryptonightR_instruction_mov82, + CryptonightR_instruction_mov83, + CryptonightR_instruction_mov84, + CryptonightR_instruction_mov85, + CryptonightR_instruction_mov86, + CryptonightR_instruction_mov87, + CryptonightR_instruction_mov88, + CryptonightR_instruction_mov89, + CryptonightR_instruction_mov90, + CryptonightR_instruction_mov91, + CryptonightR_instruction_mov92, + CryptonightR_instruction_mov93, + CryptonightR_instruction_mov94, + CryptonightR_instruction_mov95, + CryptonightR_instruction_mov96, + CryptonightR_instruction_mov97, + CryptonightR_instruction_mov98, + CryptonightR_instruction_mov99, + CryptonightR_instruction_mov100, + CryptonightR_instruction_mov101, + CryptonightR_instruction_mov102, + CryptonightR_instruction_mov103, + CryptonightR_instruction_mov104, + CryptonightR_instruction_mov105, + CryptonightR_instruction_mov106, + CryptonightR_instruction_mov107, + CryptonightR_instruction_mov108, + CryptonightR_instruction_mov109, + CryptonightR_instruction_mov110, + CryptonightR_instruction_mov111, + CryptonightR_instruction_mov112, + CryptonightR_instruction_mov113, + CryptonightR_instruction_mov114, + CryptonightR_instruction_mov115, + CryptonightR_instruction_mov116, + CryptonightR_instruction_mov117, + CryptonightR_instruction_mov118, + CryptonightR_instruction_mov119, + CryptonightR_instruction_mov120, + CryptonightR_instruction_mov121, + CryptonightR_instruction_mov122, + CryptonightR_instruction_mov123, + CryptonightR_instruction_mov124, + CryptonightR_instruction_mov125, + CryptonightR_instruction_mov126, + CryptonightR_instruction_mov127, + CryptonightR_instruction_mov128, + CryptonightR_instruction_mov129, + CryptonightR_instruction_mov130, + CryptonightR_instruction_mov131, + CryptonightR_instruction_mov132, + CryptonightR_instruction_mov133, + CryptonightR_instruction_mov134, + CryptonightR_instruction_mov135, + CryptonightR_instruction_mov136, + CryptonightR_instruction_mov137, + CryptonightR_instruction_mov138, + CryptonightR_instruction_mov139, + CryptonightR_instruction_mov140, + CryptonightR_instruction_mov141, + CryptonightR_instruction_mov142, + CryptonightR_instruction_mov143, + CryptonightR_instruction_mov144, + CryptonightR_instruction_mov145, + CryptonightR_instruction_mov146, + CryptonightR_instruction_mov147, + CryptonightR_instruction_mov148, + CryptonightR_instruction_mov149, + CryptonightR_instruction_mov150, + CryptonightR_instruction_mov151, + CryptonightR_instruction_mov152, + CryptonightR_instruction_mov153, + CryptonightR_instruction_mov154, + CryptonightR_instruction_mov155, + CryptonightR_instruction_mov156, + CryptonightR_instruction_mov157, + CryptonightR_instruction_mov158, + CryptonightR_instruction_mov159, + CryptonightR_instruction_mov160, + CryptonightR_instruction_mov161, + CryptonightR_instruction_mov162, + CryptonightR_instruction_mov163, + CryptonightR_instruction_mov164, + CryptonightR_instruction_mov165, + CryptonightR_instruction_mov166, + CryptonightR_instruction_mov167, + CryptonightR_instruction_mov168, + CryptonightR_instruction_mov169, + CryptonightR_instruction_mov170, + CryptonightR_instruction_mov171, + CryptonightR_instruction_mov172, + CryptonightR_instruction_mov173, + CryptonightR_instruction_mov174, + CryptonightR_instruction_mov175, + CryptonightR_instruction_mov176, + CryptonightR_instruction_mov177, + CryptonightR_instruction_mov178, + CryptonightR_instruction_mov179, + CryptonightR_instruction_mov180, + CryptonightR_instruction_mov181, + CryptonightR_instruction_mov182, + CryptonightR_instruction_mov183, + CryptonightR_instruction_mov184, + CryptonightR_instruction_mov185, + CryptonightR_instruction_mov186, + CryptonightR_instruction_mov187, + CryptonightR_instruction_mov188, + CryptonightR_instruction_mov189, + CryptonightR_instruction_mov190, + CryptonightR_instruction_mov191, + CryptonightR_instruction_mov192, + CryptonightR_instruction_mov193, + CryptonightR_instruction_mov194, + CryptonightR_instruction_mov195, + CryptonightR_instruction_mov196, + CryptonightR_instruction_mov197, + CryptonightR_instruction_mov198, + CryptonightR_instruction_mov199, + CryptonightR_instruction_mov200, + CryptonightR_instruction_mov201, + CryptonightR_instruction_mov202, + CryptonightR_instruction_mov203, + CryptonightR_instruction_mov204, + CryptonightR_instruction_mov205, + CryptonightR_instruction_mov206, + CryptonightR_instruction_mov207, + CryptonightR_instruction_mov208, + CryptonightR_instruction_mov209, + CryptonightR_instruction_mov210, + CryptonightR_instruction_mov211, + CryptonightR_instruction_mov212, + CryptonightR_instruction_mov213, + CryptonightR_instruction_mov214, + CryptonightR_instruction_mov215, + CryptonightR_instruction_mov216, + CryptonightR_instruction_mov217, + CryptonightR_instruction_mov218, + CryptonightR_instruction_mov219, + CryptonightR_instruction_mov220, + CryptonightR_instruction_mov221, + CryptonightR_instruction_mov222, + CryptonightR_instruction_mov223, + CryptonightR_instruction_mov224, + CryptonightR_instruction_mov225, + CryptonightR_instruction_mov226, + CryptonightR_instruction_mov227, + CryptonightR_instruction_mov228, + CryptonightR_instruction_mov229, + CryptonightR_instruction_mov230, + CryptonightR_instruction_mov231, + CryptonightR_instruction_mov232, + CryptonightR_instruction_mov233, + CryptonightR_instruction_mov234, + CryptonightR_instruction_mov235, + CryptonightR_instruction_mov236, + CryptonightR_instruction_mov237, + CryptonightR_instruction_mov238, + CryptonightR_instruction_mov239, + CryptonightR_instruction_mov240, + CryptonightR_instruction_mov241, + CryptonightR_instruction_mov242, + CryptonightR_instruction_mov243, + CryptonightR_instruction_mov244, + CryptonightR_instruction_mov245, + CryptonightR_instruction_mov246, + CryptonightR_instruction_mov247, + CryptonightR_instruction_mov248, + CryptonightR_instruction_mov249, + CryptonightR_instruction_mov250, + CryptonightR_instruction_mov251, + CryptonightR_instruction_mov252, + CryptonightR_instruction_mov253, + CryptonightR_instruction_mov254, + CryptonightR_instruction_mov255, + CryptonightR_instruction_mov256, +}; diff --git a/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.inc b/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.inc new file mode 100644 index 000000000..b54486a57 --- /dev/null +++ b/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.inc @@ -0,0 +1,529 @@ +PUBLIC FN_PREFIX(CryptonightR_template_part1) +PUBLIC FN_PREFIX(CryptonightR_template_mainloop) +PUBLIC FN_PREFIX(CryptonightR_template_part2) +PUBLIC FN_PREFIX(CryptonightR_template_part3) +PUBLIC FN_PREFIX(CryptonightR_template_end) +PUBLIC FN_PREFIX(CryptonightR_template_double_part1) +PUBLIC FN_PREFIX(CryptonightR_template_double_mainloop) +PUBLIC FN_PREFIX(CryptonightR_template_double_part2) +PUBLIC FN_PREFIX(CryptonightR_template_double_part3) +PUBLIC FN_PREFIX(CryptonightR_template_double_part4) +PUBLIC FN_PREFIX(CryptonightR_template_double_end) + +ALIGN(64) +FN_PREFIX(CryptonightR_template_part1): + mov QWORD PTR [rsp+16], rbx + mov QWORD PTR [rsp+24], rbp + mov QWORD PTR [rsp+32], rsi + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push rdi + sub rsp, 64 + mov r12, rcx + mov r8, QWORD PTR [r12+32] + mov rdx, r12 + xor r8, QWORD PTR [r12] + mov r15, QWORD PTR [r12+40] + mov r9, r8 + xor r15, QWORD PTR [r12+8] + mov r11, QWORD PTR [r12+224] + mov r12, QWORD PTR [r12+56] + xor r12, QWORD PTR [rdx+24] + mov rax, QWORD PTR [rdx+48] + xor rax, QWORD PTR [rdx+16] + movaps XMMWORD PTR [rsp+48], xmm6 + movq xmm0, r12 + movaps XMMWORD PTR [rsp+32], xmm7 + movaps XMMWORD PTR [rsp+16], xmm8 + movaps XMMWORD PTR [rsp], xmm9 + mov r12, QWORD PTR [rdx+88] + xor r12, QWORD PTR [rdx+72] + movq xmm6, rax + mov rax, QWORD PTR [rdx+80] + xor rax, QWORD PTR [rdx+64] + punpcklqdq xmm6, xmm0 + and r9d, 2097136 + movq xmm0, r12 + movq xmm7, rax + punpcklqdq xmm7, xmm0 + mov r10d, r9d + movq xmm9, rsp + mov rsp, r8 + mov r8d, 524288 + + mov ebx, [rdx+96] + mov esi, [rdx+100] + mov edi, [rdx+104] + mov ebp, [rdx+108] + + ALIGN(64) +FN_PREFIX(CryptonightR_template_mainloop): + movdqa xmm5, XMMWORD PTR [r9+r11] + movq xmm0, r15 + movq xmm4, rsp + punpcklqdq xmm4, xmm0 + lea rdx, QWORD PTR [r9+r11] + + aesenc xmm5, xmm4 + + mov r12d, r9d + mov eax, r9d + xor r9d, 48 + xor r12d, 16 + xor eax, 32 + movdqu xmm0, XMMWORD PTR [r9+r11] + movaps xmm3, xmm0 + movdqu xmm2, XMMWORD PTR [r12+r11] + movdqu xmm1, XMMWORD PTR [rax+r11] + pxor xmm0, xmm2 + pxor xmm5, xmm1 + pxor xmm5, xmm0 + paddq xmm3, xmm7 + paddq xmm2, xmm6 + paddq xmm1, xmm4 + movdqu XMMWORD PTR [r12+r11], xmm3 + movdqu XMMWORD PTR [rax+r11], xmm2 + movdqu XMMWORD PTR [r9+r11], xmm1 + + movq r12, xmm5 + movd r10d, xmm5 + and r10d, 2097136 + + movdqa xmm0, xmm5 + pxor xmm0, xmm6 + movdqu XMMWORD PTR [rdx], xmm0 + + lea r13d, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or r13, rdx + + xor r13, QWORD PTR [r10+r11] + mov r14, QWORD PTR [r10+r11+8] + + movd eax, xmm6 + movd edx, xmm7 + pextrd r9d, xmm7, 2 + +FN_PREFIX(CryptonightR_template_part2): + mov eax, edi + mov edx, ebp + shl rdx, 32 + or rax, rdx + xor rsp, rax + + mov eax, ebx + mov edx, esi + shl rdx, 32 + or rax, rdx + xor r15, rax + + mov rax, r13 + mul r12 + + mov r9d, r10d + mov r12d, r10d + xor r9d, 16 + xor r12d, 32 + xor r10d, 48 + movdqa xmm1, XMMWORD PTR [r12+r11] + movaps xmm3, xmm1 + movdqa xmm2, XMMWORD PTR [r9+r11] + movdqa xmm0, XMMWORD PTR [r10+r11] + pxor xmm1, xmm2 + pxor xmm5, xmm0 + pxor xmm5, xmm1 + paddq xmm3, xmm4 + paddq xmm2, xmm6 + paddq xmm0, xmm7 + movdqu XMMWORD PTR [r9+r11], xmm0 + movdqu XMMWORD PTR [r12+r11], xmm2 + movdqu XMMWORD PTR [r10+r11], xmm3 + + movdqa xmm7, xmm6 + add r15, rax + add rsp, rdx + xor r10, 48 + mov QWORD PTR [r10+r11], rsp + xor rsp, r13 + mov r9d, esp + mov QWORD PTR [r10+r11+8], r15 + and r9d, 2097136 + xor r15, r14 + movdqa xmm6, xmm5 + dec r8d + jnz FN_PREFIX(CryptonightR_template_mainloop) + +FN_PREFIX(CryptonightR_template_part3): + movq rsp, xmm9 + + mov rbx, QWORD PTR [rsp+136] + mov rbp, QWORD PTR [rsp+144] + mov rsi, QWORD PTR [rsp+152] + movaps xmm6, XMMWORD PTR [rsp+48] + movaps xmm7, XMMWORD PTR [rsp+32] + movaps xmm8, XMMWORD PTR [rsp+16] + movaps xmm9, XMMWORD PTR [rsp] + add rsp, 64 + pop rdi + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + ret 0 +FN_PREFIX(CryptonightR_template_end): + +ALIGN(64) +FN_PREFIX(CryptonightR_template_double_part1): + mov QWORD PTR [rsp+24], rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 320 + mov r14, QWORD PTR [rcx+32] + mov r8, rcx + xor r14, QWORD PTR [rcx] + mov r12, QWORD PTR [rcx+40] + mov ebx, r14d + mov rsi, QWORD PTR [rcx+224] + and ebx, 2097136 + xor r12, QWORD PTR [rcx+8] + mov rcx, QWORD PTR [rcx+56] + xor rcx, QWORD PTR [r8+24] + mov rax, QWORD PTR [r8+48] + xor rax, QWORD PTR [r8+16] + mov r15, QWORD PTR [rdx+32] + xor r15, QWORD PTR [rdx] + movq xmm0, rcx + mov rcx, QWORD PTR [r8+88] + xor rcx, QWORD PTR [r8+72] + mov r13, QWORD PTR [rdx+40] + mov rdi, QWORD PTR [rdx+224] + xor r13, QWORD PTR [rdx+8] + movaps XMMWORD PTR [rsp+160], xmm6 + movaps XMMWORD PTR [rsp+176], xmm7 + movaps XMMWORD PTR [rsp+192], xmm8 + movaps XMMWORD PTR [rsp+208], xmm9 + movaps XMMWORD PTR [rsp+224], xmm10 + movaps XMMWORD PTR [rsp+240], xmm11 + movaps XMMWORD PTR [rsp+256], xmm12 + movaps XMMWORD PTR [rsp+272], xmm13 + movaps XMMWORD PTR [rsp+288], xmm14 + movaps XMMWORD PTR [rsp+304], xmm15 + movq xmm7, rax + mov rax, QWORD PTR [r8+80] + xor rax, QWORD PTR [r8+64] + + movaps xmm1, XMMWORD PTR [rdx+96] + movaps xmm2, XMMWORD PTR [r8+96] + movaps XMMWORD PTR [rsp], xmm1 + movaps XMMWORD PTR [rsp+16], xmm2 + + mov r8d, r15d + punpcklqdq xmm7, xmm0 + movq xmm0, rcx + mov rcx, QWORD PTR [rdx+56] + xor rcx, QWORD PTR [rdx+24] + movq xmm9, rax + mov QWORD PTR [rsp+128], rsi + mov rax, QWORD PTR [rdx+48] + xor rax, QWORD PTR [rdx+16] + punpcklqdq xmm9, xmm0 + movq xmm0, rcx + mov rcx, QWORD PTR [rdx+88] + xor rcx, QWORD PTR [rdx+72] + movq xmm8, rax + mov QWORD PTR [rsp+136], rdi + mov rax, QWORD PTR [rdx+80] + xor rax, QWORD PTR [rdx+64] + punpcklqdq xmm8, xmm0 + and r8d, 2097136 + movq xmm0, rcx + mov r11d, 524288 + movq xmm10, rax + punpcklqdq xmm10, xmm0 + + movq xmm14, QWORD PTR [rsp+128] + movq xmm15, QWORD PTR [rsp+136] + + ALIGN(64) +FN_PREFIX(CryptonightR_template_double_mainloop): + movdqu xmm6, XMMWORD PTR [rbx+rsi] + movq xmm0, r12 + mov ecx, ebx + movq xmm3, r14 + punpcklqdq xmm3, xmm0 + xor ebx, 16 + aesenc xmm6, xmm3 + movq xmm4, r15 + movdqu xmm0, XMMWORD PTR [rbx+rsi] + pxor xmm6, xmm0 + xor ebx, 48 + paddq xmm0, xmm7 + movdqu xmm1, XMMWORD PTR [rbx+rsi] + pxor xmm6, xmm1 + movdqu XMMWORD PTR [rbx+rsi], xmm0 + paddq xmm1, xmm3 + xor ebx, 16 + mov eax, ebx + xor rax, 32 + movdqu xmm0, XMMWORD PTR [rbx+rsi] + pxor xmm6, xmm0 + movq rdx, xmm6 + movdqu XMMWORD PTR [rbx+rsi], xmm1 + paddq xmm0, xmm9 + movdqu XMMWORD PTR [rax+rsi], xmm0 + movdqa xmm0, xmm6 + pxor xmm0, xmm7 + movdqu XMMWORD PTR [rcx+rsi], xmm0 + mov esi, edx + movdqu xmm5, XMMWORD PTR [r8+rdi] + and esi, 2097136 + mov ecx, r8d + movq xmm0, r13 + punpcklqdq xmm4, xmm0 + xor r8d, 16 + aesenc xmm5, xmm4 + movdqu xmm0, XMMWORD PTR [r8+rdi] + pxor xmm5, xmm0 + xor r8d, 48 + paddq xmm0, xmm8 + movdqu xmm1, XMMWORD PTR [r8+rdi] + pxor xmm5, xmm1 + movdqu XMMWORD PTR [r8+rdi], xmm0 + paddq xmm1, xmm4 + xor r8d, 16 + mov eax, r8d + xor rax, 32 + movdqu xmm0, XMMWORD PTR [r8+rdi] + pxor xmm5, xmm0 + movdqu XMMWORD PTR [r8+rdi], xmm1 + paddq xmm0, xmm10 + movdqu XMMWORD PTR [rax+rdi], xmm0 + movdqa xmm0, xmm5 + pxor xmm0, xmm8 + movdqu XMMWORD PTR [rcx+rdi], xmm0 + movq rdi, xmm5 + movq rcx, xmm14 + mov ebp, edi + mov r8, QWORD PTR [rcx+rsi] + mov r10, QWORD PTR [rcx+rsi+8] + lea r9, QWORD PTR [rcx+rsi] + xor esi, 16 + + movq xmm0, rsp + movq xmm1, rsi + movq xmm2, rdi + movq xmm11, rbp + movq xmm12, r15 + movq xmm13, rdx + mov [rsp+104], rcx + mov [rsp+112], r9 + + mov ebx, DWORD PTR [rsp+16] + mov esi, DWORD PTR [rsp+20] + mov edi, DWORD PTR [rsp+24] + mov ebp, DWORD PTR [rsp+28] + + lea eax, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or rax, rdx + xor r8, rax + + movd esp, xmm3 + pextrd r15d, xmm3, 2 + movd eax, xmm7 + movd edx, xmm9 + pextrd r9d, xmm9, 2 + +FN_PREFIX(CryptonightR_template_double_part2): + + mov eax, edi + mov edx, ebp + shl rdx, 32 + or rax, rdx + xor r14, rax + + mov eax, ebx + mov edx, esi + shl rdx, 32 + or rax, rdx + xor r12, rax + + movq rsp, xmm0 + mov DWORD PTR [rsp+16], ebx + mov DWORD PTR [rsp+20], esi + mov DWORD PTR [rsp+24], edi + mov DWORD PTR [rsp+28], ebp + + movq rsi, xmm1 + movq rdi, xmm2 + movq rbp, xmm11 + movq r15, xmm12 + movq rdx, xmm13 + mov rcx, [rsp+104] + mov r9, [rsp+112] + + mov rbx, r8 + mov rax, r8 + mul rdx + and ebp, 2097136 + mov r8, rax + movdqu xmm1, XMMWORD PTR [rcx+rsi] + pxor xmm6, xmm1 + xor esi, 48 + paddq xmm1, xmm7 + movdqu xmm2, XMMWORD PTR [rsi+rcx] + pxor xmm6, xmm2 + paddq xmm2, xmm3 + movdqu XMMWORD PTR [rsi+rcx], xmm1 + xor esi, 16 + mov eax, esi + mov rsi, rcx + movdqu xmm0, XMMWORD PTR [rax+rcx] + pxor xmm6, xmm0 + movdqu XMMWORD PTR [rax+rcx], xmm2 + paddq xmm0, xmm9 + add r12, r8 + xor rax, 32 + add r14, rdx + movdqa xmm9, xmm7 + movdqa xmm7, xmm6 + movdqu XMMWORD PTR [rax+rcx], xmm0 + mov QWORD PTR [r9+8], r12 + xor r12, r10 + mov QWORD PTR [r9], r14 + movq rcx, xmm15 + xor r14, rbx + mov r10d, ebp + mov ebx, r14d + xor ebp, 16 + and ebx, 2097136 + mov r8, QWORD PTR [r10+rcx] + mov r9, QWORD PTR [r10+rcx+8] + + movq xmm0, rsp + movq xmm1, rbx + movq xmm2, rsi + movq xmm11, rdi + movq xmm12, rbp + movq xmm13, r15 + mov [rsp+104], rcx + mov [rsp+112], r9 + + mov ebx, DWORD PTR [rsp] + mov esi, DWORD PTR [rsp+4] + mov edi, DWORD PTR [rsp+8] + mov ebp, DWORD PTR [rsp+12] + + lea eax, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or rax, rdx + + xor r8, rax + movq xmm3, r8 + + movd esp, xmm4 + pextrd r15d, xmm4, 2 + movd eax, xmm8 + movd edx, xmm10 + pextrd r9d, xmm10, 2 + +FN_PREFIX(CryptonightR_template_double_part3): + + movq r15, xmm13 + + mov eax, edi + mov edx, ebp + shl rdx, 32 + or rax, rdx + xor r15, rax + + mov eax, ebx + mov edx, esi + shl rdx, 32 + or rax, rdx + xor r13, rax + + movq rsp, xmm0 + mov DWORD PTR [rsp], ebx + mov DWORD PTR [rsp+4], esi + mov DWORD PTR [rsp+8], edi + mov DWORD PTR [rsp+12], ebp + + movq rbx, xmm1 + movq rsi, xmm2 + movq rdi, xmm11 + movq rbp, xmm12 + mov rcx, [rsp+104] + mov r9, [rsp+112] + + mov rax, r8 + mul rdi + mov rdi, rcx + mov r8, rax + movdqu xmm1, XMMWORD PTR [rbp+rcx] + pxor xmm5, xmm1 + xor ebp, 48 + paddq xmm1, xmm8 + add r13, r8 + movdqu xmm2, XMMWORD PTR [rbp+rcx] + pxor xmm5, xmm2 + add r15, rdx + movdqu XMMWORD PTR [rbp+rcx], xmm1 + paddq xmm2, xmm4 + xor ebp, 16 + mov eax, ebp + xor rax, 32 + movdqu xmm0, XMMWORD PTR [rbp+rcx] + pxor xmm5, xmm0 + movdqu XMMWORD PTR [rbp+rcx], xmm2 + paddq xmm0, xmm10 + movdqu XMMWORD PTR [rax+rcx], xmm0 + movq rax, xmm3 + movdqa xmm10, xmm8 + mov QWORD PTR [r10+rcx], r15 + movdqa xmm8, xmm5 + xor r15, rax + mov QWORD PTR [r10+rcx+8], r13 + mov r8d, r15d + xor r13, r9 + and r8d, 2097136 + dec r11d + jnz FN_PREFIX(CryptonightR_template_double_mainloop) + +FN_PREFIX(CryptonightR_template_double_part4): + + mov rbx, QWORD PTR [rsp+400] + movaps xmm6, XMMWORD PTR [rsp+160] + movaps xmm7, XMMWORD PTR [rsp+176] + movaps xmm8, XMMWORD PTR [rsp+192] + movaps xmm9, XMMWORD PTR [rsp+208] + movaps xmm10, XMMWORD PTR [rsp+224] + movaps xmm11, XMMWORD PTR [rsp+240] + movaps xmm12, XMMWORD PTR [rsp+256] + movaps xmm13, XMMWORD PTR [rsp+272] + movaps xmm14, XMMWORD PTR [rsp+288] + movaps xmm15, XMMWORD PTR [rsp+304] + add rsp, 320 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + ret 0 +FN_PREFIX(CryptonightR_template_double_end): diff --git a/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template_win.inc b/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template_win.inc new file mode 100644 index 000000000..2f2d71a25 --- /dev/null +++ b/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template_win.inc @@ -0,0 +1,529 @@ +PUBLIC CryptonightR_template_part1 +PUBLIC CryptonightR_template_mainloop +PUBLIC CryptonightR_template_part2 +PUBLIC CryptonightR_template_part3 +PUBLIC CryptonightR_template_end +PUBLIC CryptonightR_template_double_part1 +PUBLIC CryptonightR_template_double_mainloop +PUBLIC CryptonightR_template_double_part2 +PUBLIC CryptonightR_template_double_part3 +PUBLIC CryptonightR_template_double_part4 +PUBLIC CryptonightR_template_double_end + +ALIGN(64) +CryptonightR_template_part1: + mov QWORD PTR [rsp+16], rbx + mov QWORD PTR [rsp+24], rbp + mov QWORD PTR [rsp+32], rsi + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push rdi + sub rsp, 64 + mov r12, rcx + mov r8, QWORD PTR [r12+32] + mov rdx, r12 + xor r8, QWORD PTR [r12] + mov r15, QWORD PTR [r12+40] + mov r9, r8 + xor r15, QWORD PTR [r12+8] + mov r11, QWORD PTR [r12+224] + mov r12, QWORD PTR [r12+56] + xor r12, QWORD PTR [rdx+24] + mov rax, QWORD PTR [rdx+48] + xor rax, QWORD PTR [rdx+16] + movaps XMMWORD PTR [rsp+48], xmm6 + movd xmm0, r12 + movaps XMMWORD PTR [rsp+32], xmm7 + movaps XMMWORD PTR [rsp+16], xmm8 + movaps XMMWORD PTR [rsp], xmm9 + mov r12, QWORD PTR [rdx+88] + xor r12, QWORD PTR [rdx+72] + movd xmm6, rax + mov rax, QWORD PTR [rdx+80] + xor rax, QWORD PTR [rdx+64] + punpcklqdq xmm6, xmm0 + and r9d, 2097136 + movd xmm0, r12 + movd xmm7, rax + punpcklqdq xmm7, xmm0 + mov r10d, r9d + movd xmm9, rsp + mov rsp, r8 + mov r8d, 524288 + + mov ebx, [rdx+96] + mov esi, [rdx+100] + mov edi, [rdx+104] + mov ebp, [rdx+108] + + ALIGN(64) +CryptonightR_template_mainloop: + movdqa xmm5, XMMWORD PTR [r9+r11] + movd xmm0, r15 + movd xmm4, rsp + punpcklqdq xmm4, xmm0 + lea rdx, QWORD PTR [r9+r11] + + aesenc xmm5, xmm4 + + mov r12d, r9d + mov eax, r9d + xor r9d, 48 + xor r12d, 16 + xor eax, 32 + movdqu xmm0, XMMWORD PTR [r9+r11] + movaps xmm3, xmm0 + movdqu xmm2, XMMWORD PTR [r12+r11] + movdqu xmm1, XMMWORD PTR [rax+r11] + pxor xmm0, xmm2 + pxor xmm5, xmm1 + pxor xmm5, xmm0 + paddq xmm3, xmm7 + paddq xmm2, xmm6 + paddq xmm1, xmm4 + movdqu XMMWORD PTR [r12+r11], xmm3 + movdqu XMMWORD PTR [rax+r11], xmm2 + movdqu XMMWORD PTR [r9+r11], xmm1 + + movd r12, xmm5 + movd r10d, xmm5 + and r10d, 2097136 + + movdqa xmm0, xmm5 + pxor xmm0, xmm6 + movdqu XMMWORD PTR [rdx], xmm0 + + lea r13d, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or r13, rdx + + xor r13, QWORD PTR [r10+r11] + mov r14, QWORD PTR [r10+r11+8] + + movd eax, xmm6 + movd edx, xmm7 + pextrd r9d, xmm7, 2 + +CryptonightR_template_part2: + mov eax, edi + mov edx, ebp + shl rdx, 32 + or rax, rdx + xor rsp, rax + + mov eax, ebx + mov edx, esi + shl rdx, 32 + or rax, rdx + xor r15, rax + + mov rax, r13 + mul r12 + + mov r9d, r10d + mov r12d, r10d + xor r9d, 16 + xor r12d, 32 + xor r10d, 48 + movdqa xmm1, XMMWORD PTR [r12+r11] + movaps xmm3, xmm1 + movdqa xmm2, XMMWORD PTR [r9+r11] + movdqa xmm0, XMMWORD PTR [r10+r11] + pxor xmm1, xmm2 + pxor xmm5, xmm0 + pxor xmm5, xmm1 + paddq xmm3, xmm4 + paddq xmm2, xmm6 + paddq xmm0, xmm7 + movdqu XMMWORD PTR [r9+r11], xmm0 + movdqu XMMWORD PTR [r12+r11], xmm2 + movdqu XMMWORD PTR [r10+r11], xmm3 + + movdqa xmm7, xmm6 + add r15, rax + add rsp, rdx + xor r10, 48 + mov QWORD PTR [r10+r11], rsp + xor rsp, r13 + mov r9d, esp + mov QWORD PTR [r10+r11+8], r15 + and r9d, 2097136 + xor r15, r14 + movdqa xmm6, xmm5 + dec r8d + jnz CryptonightR_template_mainloop + +CryptonightR_template_part3: + movd rsp, xmm9 + + mov rbx, QWORD PTR [rsp+136] + mov rbp, QWORD PTR [rsp+144] + mov rsi, QWORD PTR [rsp+152] + movaps xmm6, XMMWORD PTR [rsp+48] + movaps xmm7, XMMWORD PTR [rsp+32] + movaps xmm8, XMMWORD PTR [rsp+16] + movaps xmm9, XMMWORD PTR [rsp] + add rsp, 64 + pop rdi + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + ret 0 +CryptonightR_template_end: + +ALIGN(64) +CryptonightR_template_double_part1: + mov QWORD PTR [rsp+24], rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 320 + mov r14, QWORD PTR [rcx+32] + mov r8, rcx + xor r14, QWORD PTR [rcx] + mov r12, QWORD PTR [rcx+40] + mov ebx, r14d + mov rsi, QWORD PTR [rcx+224] + and ebx, 2097136 + xor r12, QWORD PTR [rcx+8] + mov rcx, QWORD PTR [rcx+56] + xor rcx, QWORD PTR [r8+24] + mov rax, QWORD PTR [r8+48] + xor rax, QWORD PTR [r8+16] + mov r15, QWORD PTR [rdx+32] + xor r15, QWORD PTR [rdx] + movd xmm0, rcx + mov rcx, QWORD PTR [r8+88] + xor rcx, QWORD PTR [r8+72] + mov r13, QWORD PTR [rdx+40] + mov rdi, QWORD PTR [rdx+224] + xor r13, QWORD PTR [rdx+8] + movaps XMMWORD PTR [rsp+160], xmm6 + movaps XMMWORD PTR [rsp+176], xmm7 + movaps XMMWORD PTR [rsp+192], xmm8 + movaps XMMWORD PTR [rsp+208], xmm9 + movaps XMMWORD PTR [rsp+224], xmm10 + movaps XMMWORD PTR [rsp+240], xmm11 + movaps XMMWORD PTR [rsp+256], xmm12 + movaps XMMWORD PTR [rsp+272], xmm13 + movaps XMMWORD PTR [rsp+288], xmm14 + movaps XMMWORD PTR [rsp+304], xmm15 + movd xmm7, rax + mov rax, QWORD PTR [r8+80] + xor rax, QWORD PTR [r8+64] + + movaps xmm1, XMMWORD PTR [rdx+96] + movaps xmm2, XMMWORD PTR [r8+96] + movaps XMMWORD PTR [rsp], xmm1 + movaps XMMWORD PTR [rsp+16], xmm2 + + mov r8d, r15d + punpcklqdq xmm7, xmm0 + movd xmm0, rcx + mov rcx, QWORD PTR [rdx+56] + xor rcx, QWORD PTR [rdx+24] + movd xmm9, rax + mov QWORD PTR [rsp+128], rsi + mov rax, QWORD PTR [rdx+48] + xor rax, QWORD PTR [rdx+16] + punpcklqdq xmm9, xmm0 + movd xmm0, rcx + mov rcx, QWORD PTR [rdx+88] + xor rcx, QWORD PTR [rdx+72] + movd xmm8, rax + mov QWORD PTR [rsp+136], rdi + mov rax, QWORD PTR [rdx+80] + xor rax, QWORD PTR [rdx+64] + punpcklqdq xmm8, xmm0 + and r8d, 2097136 + movd xmm0, rcx + mov r11d, 524288 + movd xmm10, rax + punpcklqdq xmm10, xmm0 + + movd xmm14, QWORD PTR [rsp+128] + movd xmm15, QWORD PTR [rsp+136] + + ALIGN(64) +CryptonightR_template_double_mainloop: + movdqu xmm6, XMMWORD PTR [rbx+rsi] + movd xmm0, r12 + mov ecx, ebx + movd xmm3, r14 + punpcklqdq xmm3, xmm0 + xor ebx, 16 + aesenc xmm6, xmm3 + movd xmm4, r15 + movdqu xmm0, XMMWORD PTR [rbx+rsi] + pxor xmm6, xmm0 + xor ebx, 48 + paddq xmm0, xmm7 + movdqu xmm1, XMMWORD PTR [rbx+rsi] + pxor xmm6, xmm1 + movdqu XMMWORD PTR [rbx+rsi], xmm0 + paddq xmm1, xmm3 + xor ebx, 16 + mov eax, ebx + xor rax, 32 + movdqu xmm0, XMMWORD PTR [rbx+rsi] + pxor xmm6, xmm0 + movd rdx, xmm6 + movdqu XMMWORD PTR [rbx+rsi], xmm1 + paddq xmm0, xmm9 + movdqu XMMWORD PTR [rax+rsi], xmm0 + movdqa xmm0, xmm6 + pxor xmm0, xmm7 + movdqu XMMWORD PTR [rcx+rsi], xmm0 + mov esi, edx + movdqu xmm5, XMMWORD PTR [r8+rdi] + and esi, 2097136 + mov ecx, r8d + movd xmm0, r13 + punpcklqdq xmm4, xmm0 + xor r8d, 16 + aesenc xmm5, xmm4 + movdqu xmm0, XMMWORD PTR [r8+rdi] + pxor xmm5, xmm0 + xor r8d, 48 + paddq xmm0, xmm8 + movdqu xmm1, XMMWORD PTR [r8+rdi] + pxor xmm5, xmm1 + movdqu XMMWORD PTR [r8+rdi], xmm0 + paddq xmm1, xmm4 + xor r8d, 16 + mov eax, r8d + xor rax, 32 + movdqu xmm0, XMMWORD PTR [r8+rdi] + pxor xmm5, xmm0 + movdqu XMMWORD PTR [r8+rdi], xmm1 + paddq xmm0, xmm10 + movdqu XMMWORD PTR [rax+rdi], xmm0 + movdqa xmm0, xmm5 + pxor xmm0, xmm8 + movdqu XMMWORD PTR [rcx+rdi], xmm0 + movd rdi, xmm5 + movd rcx, xmm14 + mov ebp, edi + mov r8, QWORD PTR [rcx+rsi] + mov r10, QWORD PTR [rcx+rsi+8] + lea r9, QWORD PTR [rcx+rsi] + xor esi, 16 + + movd xmm0, rsp + movd xmm1, rsi + movd xmm2, rdi + movd xmm11, rbp + movd xmm12, r15 + movd xmm13, rdx + mov [rsp+104], rcx + mov [rsp+112], r9 + + mov ebx, DWORD PTR [rsp+16] + mov esi, DWORD PTR [rsp+20] + mov edi, DWORD PTR [rsp+24] + mov ebp, DWORD PTR [rsp+28] + + lea eax, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or rax, rdx + xor r8, rax + + movd esp, xmm3 + pextrd r15d, xmm3, 2 + movd eax, xmm7 + movd edx, xmm9 + pextrd r9d, xmm9, 2 + +CryptonightR_template_double_part2: + + mov eax, edi + mov edx, ebp + shl rdx, 32 + or rax, rdx + xor r14, rax + + mov eax, ebx + mov edx, esi + shl rdx, 32 + or rax, rdx + xor r12, rax + + movd rsp, xmm0 + mov DWORD PTR [rsp+16], ebx + mov DWORD PTR [rsp+20], esi + mov DWORD PTR [rsp+24], edi + mov DWORD PTR [rsp+28], ebp + + movd rsi, xmm1 + movd rdi, xmm2 + movd rbp, xmm11 + movd r15, xmm12 + movd rdx, xmm13 + mov rcx, [rsp+104] + mov r9, [rsp+112] + + mov rbx, r8 + mov rax, r8 + mul rdx + and ebp, 2097136 + mov r8, rax + movdqu xmm1, XMMWORD PTR [rcx+rsi] + pxor xmm6, xmm1 + xor esi, 48 + paddq xmm1, xmm7 + movdqu xmm2, XMMWORD PTR [rsi+rcx] + pxor xmm6, xmm2 + paddq xmm2, xmm3 + movdqu XMMWORD PTR [rsi+rcx], xmm1 + xor esi, 16 + mov eax, esi + mov rsi, rcx + movdqu xmm0, XMMWORD PTR [rax+rcx] + pxor xmm6, xmm0 + movdqu XMMWORD PTR [rax+rcx], xmm2 + paddq xmm0, xmm9 + add r12, r8 + xor rax, 32 + add r14, rdx + movdqa xmm9, xmm7 + movdqa xmm7, xmm6 + movdqu XMMWORD PTR [rax+rcx], xmm0 + mov QWORD PTR [r9+8], r12 + xor r12, r10 + mov QWORD PTR [r9], r14 + movd rcx, xmm15 + xor r14, rbx + mov r10d, ebp + mov ebx, r14d + xor ebp, 16 + and ebx, 2097136 + mov r8, QWORD PTR [r10+rcx] + mov r9, QWORD PTR [r10+rcx+8] + + movd xmm0, rsp + movd xmm1, rbx + movd xmm2, rsi + movd xmm11, rdi + movd xmm12, rbp + movd xmm13, r15 + mov [rsp+104], rcx + mov [rsp+112], r9 + + mov ebx, DWORD PTR [rsp] + mov esi, DWORD PTR [rsp+4] + mov edi, DWORD PTR [rsp+8] + mov ebp, DWORD PTR [rsp+12] + + lea eax, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or rax, rdx + + xor r8, rax + movd xmm3, r8 + + movd esp, xmm4 + pextrd r15d, xmm4, 2 + movd eax, xmm8 + movd edx, xmm10 + pextrd r9d, xmm10, 2 + +CryptonightR_template_double_part3: + + movd r15, xmm13 + + mov eax, edi + mov edx, ebp + shl rdx, 32 + or rax, rdx + xor r15, rax + + mov eax, ebx + mov edx, esi + shl rdx, 32 + or rax, rdx + xor r13, rax + + movd rsp, xmm0 + mov DWORD PTR [rsp], ebx + mov DWORD PTR [rsp+4], esi + mov DWORD PTR [rsp+8], edi + mov DWORD PTR [rsp+12], ebp + + movd rbx, xmm1 + movd rsi, xmm2 + movd rdi, xmm11 + movd rbp, xmm12 + mov rcx, [rsp+104] + mov r9, [rsp+112] + + mov rax, r8 + mul rdi + mov rdi, rcx + mov r8, rax + movdqu xmm1, XMMWORD PTR [rbp+rcx] + pxor xmm5, xmm1 + xor ebp, 48 + paddq xmm1, xmm8 + add r13, r8 + movdqu xmm2, XMMWORD PTR [rbp+rcx] + pxor xmm5, xmm2 + add r15, rdx + movdqu XMMWORD PTR [rbp+rcx], xmm1 + paddq xmm2, xmm4 + xor ebp, 16 + mov eax, ebp + xor rax, 32 + movdqu xmm0, XMMWORD PTR [rbp+rcx] + pxor xmm5, xmm0 + movdqu XMMWORD PTR [rbp+rcx], xmm2 + paddq xmm0, xmm10 + movdqu XMMWORD PTR [rax+rcx], xmm0 + movd rax, xmm3 + movdqa xmm10, xmm8 + mov QWORD PTR [r10+rcx], r15 + movdqa xmm8, xmm5 + xor r15, rax + mov QWORD PTR [r10+rcx+8], r13 + mov r8d, r15d + xor r13, r9 + and r8d, 2097136 + dec r11d + jnz CryptonightR_template_double_mainloop + +CryptonightR_template_double_part4: + + mov rbx, QWORD PTR [rsp+400] + movaps xmm6, XMMWORD PTR [rsp+160] + movaps xmm7, XMMWORD PTR [rsp+176] + movaps xmm8, XMMWORD PTR [rsp+192] + movaps xmm9, XMMWORD PTR [rsp+208] + movaps xmm10, XMMWORD PTR [rsp+224] + movaps xmm11, XMMWORD PTR [rsp+240] + movaps xmm12, XMMWORD PTR [rsp+256] + movaps xmm13, XMMWORD PTR [rsp+272] + movaps xmm14, XMMWORD PTR [rsp+288] + movaps xmm15, XMMWORD PTR [rsp+304] + add rsp, 320 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + ret 0 +CryptonightR_template_double_end: diff --git a/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightWOW_template.inc b/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightWOW_template.inc new file mode 100644 index 000000000..7183a659f --- /dev/null +++ b/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightWOW_template.inc @@ -0,0 +1,486 @@ +PUBLIC FN_PREFIX(CryptonightWOW_template_part1) +PUBLIC FN_PREFIX(CryptonightWOW_template_mainloop) +PUBLIC FN_PREFIX(CryptonightWOW_template_part2) +PUBLIC FN_PREFIX(CryptonightWOW_template_part3) +PUBLIC FN_PREFIX(CryptonightWOW_template_end) +PUBLIC FN_PREFIX(CryptonightWOW_template_double_part1) +PUBLIC FN_PREFIX(CryptonightWOW_template_double_mainloop) +PUBLIC FN_PREFIX(CryptonightWOW_template_double_part2) +PUBLIC FN_PREFIX(CryptonightWOW_template_double_part3) +PUBLIC FN_PREFIX(CryptonightWOW_template_double_part4) +PUBLIC FN_PREFIX(CryptonightWOW_template_double_end) + +ALIGN(64) +FN_PREFIX(CryptonightWOW_template_part1): + mov QWORD PTR [rsp+16], rbx + mov QWORD PTR [rsp+24], rbp + mov QWORD PTR [rsp+32], rsi + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push rdi + sub rsp, 64 + mov r12, rcx + mov r8, QWORD PTR [r12+32] + mov rdx, r12 + xor r8, QWORD PTR [r12] + mov r15, QWORD PTR [r12+40] + mov r9, r8 + xor r15, QWORD PTR [r12+8] + mov r11, QWORD PTR [r12+224] + mov r12, QWORD PTR [r12+56] + xor r12, QWORD PTR [rdx+24] + mov rax, QWORD PTR [rdx+48] + xor rax, QWORD PTR [rdx+16] + movaps XMMWORD PTR [rsp+48], xmm6 + movq xmm0, r12 + movaps XMMWORD PTR [rsp+32], xmm7 + movaps XMMWORD PTR [rsp+16], xmm8 + movaps XMMWORD PTR [rsp], xmm9 + mov r12, QWORD PTR [rdx+88] + xor r12, QWORD PTR [rdx+72] + movq xmm6, rax + mov rax, QWORD PTR [rdx+80] + xor rax, QWORD PTR [rdx+64] + punpcklqdq xmm6, xmm0 + and r9d, 2097136 + movq xmm0, r12 + movq xmm7, rax + punpcklqdq xmm7, xmm0 + mov r10d, r9d + movq xmm9, rsp + mov rsp, r8 + mov r8d, 524288 + + mov ebx, [rdx+96] + mov esi, [rdx+100] + mov edi, [rdx+104] + mov ebp, [rdx+108] + + ALIGN(64) +FN_PREFIX(CryptonightWOW_template_mainloop): + movdqa xmm5, XMMWORD PTR [r9+r11] + movq xmm0, r15 + movq xmm4, rsp + punpcklqdq xmm4, xmm0 + lea rdx, QWORD PTR [r9+r11] + + aesenc xmm5, xmm4 + movd r10d, xmm5 + and r10d, 2097136 + + mov r12d, r9d + mov eax, r9d + xor r9d, 48 + xor r12d, 16 + xor eax, 32 + movdqu xmm0, XMMWORD PTR [r9+r11] + movdqu xmm2, XMMWORD PTR [r12+r11] + movdqu xmm1, XMMWORD PTR [rax+r11] + paddq xmm0, xmm7 + paddq xmm2, xmm6 + paddq xmm1, xmm4 + movdqu XMMWORD PTR [r12+r11], xmm0 + movq r12, xmm5 + movdqu XMMWORD PTR [rax+r11], xmm2 + movdqu XMMWORD PTR [r9+r11], xmm1 + + movdqa xmm0, xmm5 + pxor xmm0, xmm6 + movdqu XMMWORD PTR [rdx], xmm0 + + lea r13d, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or r13, rdx + + xor r13, QWORD PTR [r10+r11] + mov r14, QWORD PTR [r10+r11+8] + + movd eax, xmm6 + movd edx, xmm7 + pextrd r9d, xmm7, 2 + +FN_PREFIX(CryptonightWOW_template_part2): + mov rax, r13 + mul r12 + movq xmm0, rax + movq xmm3, rdx + punpcklqdq xmm3, xmm0 + + mov r9d, r10d + mov r12d, r10d + xor r9d, 16 + xor r12d, 32 + xor r10d, 48 + movdqa xmm1, XMMWORD PTR [r12+r11] + xor rdx, QWORD PTR [r12+r11] + xor rax, QWORD PTR [r11+r12+8] + movdqa xmm2, XMMWORD PTR [r9+r11] + pxor xmm3, xmm2 + paddq xmm7, XMMWORD PTR [r10+r11] + paddq xmm1, xmm4 + paddq xmm3, xmm6 + movdqu XMMWORD PTR [r9+r11], xmm7 + movdqu XMMWORD PTR [r12+r11], xmm3 + movdqu XMMWORD PTR [r10+r11], xmm1 + + movdqa xmm7, xmm6 + add r15, rax + add rsp, rdx + xor r10, 48 + mov QWORD PTR [r10+r11], rsp + xor rsp, r13 + mov r9d, esp + mov QWORD PTR [r10+r11+8], r15 + and r9d, 2097136 + xor r15, r14 + movdqa xmm6, xmm5 + dec r8d + jnz FN_PREFIX(CryptonightWOW_template_mainloop) + +FN_PREFIX(CryptonightWOW_template_part3): + movq rsp, xmm9 + + mov rbx, QWORD PTR [rsp+136] + mov rbp, QWORD PTR [rsp+144] + mov rsi, QWORD PTR [rsp+152] + movaps xmm6, XMMWORD PTR [rsp+48] + movaps xmm7, XMMWORD PTR [rsp+32] + movaps xmm8, XMMWORD PTR [rsp+16] + movaps xmm9, XMMWORD PTR [rsp] + add rsp, 64 + pop rdi + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + ret 0 +FN_PREFIX(CryptonightWOW_template_end): + +ALIGN(64) +FN_PREFIX(CryptonightWOW_template_double_part1): + mov QWORD PTR [rsp+24], rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 320 + mov r14, QWORD PTR [rcx+32] + mov r8, rcx + xor r14, QWORD PTR [rcx] + mov r12, QWORD PTR [rcx+40] + mov ebx, r14d + mov rsi, QWORD PTR [rcx+224] + and ebx, 2097136 + xor r12, QWORD PTR [rcx+8] + mov rcx, QWORD PTR [rcx+56] + xor rcx, QWORD PTR [r8+24] + mov rax, QWORD PTR [r8+48] + xor rax, QWORD PTR [r8+16] + mov r15, QWORD PTR [rdx+32] + xor r15, QWORD PTR [rdx] + movq xmm0, rcx + mov rcx, QWORD PTR [r8+88] + xor rcx, QWORD PTR [r8+72] + mov r13, QWORD PTR [rdx+40] + mov rdi, QWORD PTR [rdx+224] + xor r13, QWORD PTR [rdx+8] + movaps XMMWORD PTR [rsp+160], xmm6 + movaps XMMWORD PTR [rsp+176], xmm7 + movaps XMMWORD PTR [rsp+192], xmm8 + movaps XMMWORD PTR [rsp+208], xmm9 + movaps XMMWORD PTR [rsp+224], xmm10 + movaps XMMWORD PTR [rsp+240], xmm11 + movaps XMMWORD PTR [rsp+256], xmm12 + movaps XMMWORD PTR [rsp+272], xmm13 + movaps XMMWORD PTR [rsp+288], xmm14 + movaps XMMWORD PTR [rsp+304], xmm15 + movq xmm7, rax + mov rax, QWORD PTR [r8+80] + xor rax, QWORD PTR [r8+64] + + movaps xmm1, XMMWORD PTR [rdx+96] + movaps xmm2, XMMWORD PTR [r8+96] + movaps XMMWORD PTR [rsp], xmm1 + movaps XMMWORD PTR [rsp+16], xmm2 + + mov r8d, r15d + punpcklqdq xmm7, xmm0 + movq xmm0, rcx + mov rcx, QWORD PTR [rdx+56] + xor rcx, QWORD PTR [rdx+24] + movq xmm9, rax + mov QWORD PTR [rsp+128], rsi + mov rax, QWORD PTR [rdx+48] + xor rax, QWORD PTR [rdx+16] + punpcklqdq xmm9, xmm0 + movq xmm0, rcx + mov rcx, QWORD PTR [rdx+88] + xor rcx, QWORD PTR [rdx+72] + movq xmm8, rax + mov QWORD PTR [rsp+136], rdi + mov rax, QWORD PTR [rdx+80] + xor rax, QWORD PTR [rdx+64] + punpcklqdq xmm8, xmm0 + and r8d, 2097136 + movq xmm0, rcx + mov r11d, 524288 + movq xmm10, rax + punpcklqdq xmm10, xmm0 + + movq xmm14, QWORD PTR [rsp+128] + movq xmm15, QWORD PTR [rsp+136] + + ALIGN(64) +FN_PREFIX(CryptonightWOW_template_double_mainloop): + movdqu xmm6, XMMWORD PTR [rbx+rsi] + movq xmm0, r12 + mov ecx, ebx + movq xmm3, r14 + punpcklqdq xmm3, xmm0 + xor ebx, 16 + aesenc xmm6, xmm3 + movq rdx, xmm6 + movq xmm4, r15 + movdqu xmm0, XMMWORD PTR [rbx+rsi] + xor ebx, 48 + paddq xmm0, xmm7 + movdqu xmm1, XMMWORD PTR [rbx+rsi] + movdqu XMMWORD PTR [rbx+rsi], xmm0 + paddq xmm1, xmm3 + xor ebx, 16 + mov eax, ebx + xor rax, 32 + movdqu xmm0, XMMWORD PTR [rbx+rsi] + movdqu XMMWORD PTR [rbx+rsi], xmm1 + paddq xmm0, xmm9 + movdqu XMMWORD PTR [rax+rsi], xmm0 + movdqa xmm0, xmm6 + pxor xmm0, xmm7 + movdqu XMMWORD PTR [rcx+rsi], xmm0 + mov esi, edx + movdqu xmm5, XMMWORD PTR [r8+rdi] + and esi, 2097136 + mov ecx, r8d + movq xmm0, r13 + punpcklqdq xmm4, xmm0 + xor r8d, 16 + aesenc xmm5, xmm4 + movdqu xmm0, XMMWORD PTR [r8+rdi] + xor r8d, 48 + paddq xmm0, xmm8 + movdqu xmm1, XMMWORD PTR [r8+rdi] + movdqu XMMWORD PTR [r8+rdi], xmm0 + paddq xmm1, xmm4 + xor r8d, 16 + mov eax, r8d + xor rax, 32 + movdqu xmm0, XMMWORD PTR [r8+rdi] + movdqu XMMWORD PTR [r8+rdi], xmm1 + paddq xmm0, xmm10 + movdqu XMMWORD PTR [rax+rdi], xmm0 + movdqa xmm0, xmm5 + pxor xmm0, xmm8 + movdqu XMMWORD PTR [rcx+rdi], xmm0 + movq rdi, xmm5 + movq rcx, xmm14 + mov ebp, edi + mov r8, QWORD PTR [rcx+rsi] + mov r10, QWORD PTR [rcx+rsi+8] + lea r9, QWORD PTR [rcx+rsi] + xor esi, 16 + + movq xmm0, rsp + movq xmm1, rsi + movq xmm2, rdi + movq xmm11, rbp + movq xmm12, r15 + movq xmm13, rdx + mov [rsp+104], rcx + mov [rsp+112], r9 + + mov ebx, DWORD PTR [rsp+16] + mov esi, DWORD PTR [rsp+20] + mov edi, DWORD PTR [rsp+24] + mov ebp, DWORD PTR [rsp+28] + + lea eax, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or rax, rdx + xor r8, rax + + movd esp, xmm3 + pextrd r15d, xmm3, 2 + movd eax, xmm7 + movd edx, xmm9 + pextrd r9d, xmm9, 2 + +FN_PREFIX(CryptonightWOW_template_double_part2): + + movq rsp, xmm0 + mov DWORD PTR [rsp+16], ebx + mov DWORD PTR [rsp+20], esi + mov DWORD PTR [rsp+24], edi + mov DWORD PTR [rsp+28], ebp + + movq rsi, xmm1 + movq rdi, xmm2 + movq rbp, xmm11 + movq r15, xmm12 + movq rdx, xmm13 + mov rcx, [rsp+104] + mov r9, [rsp+112] + + mov rbx, r8 + mov rax, r8 + mul rdx + and ebp, 2097136 + mov r8, rax + movq xmm1, rdx + movq xmm0, r8 + punpcklqdq xmm1, xmm0 + pxor xmm1, XMMWORD PTR [rcx+rsi] + xor esi, 48 + paddq xmm1, xmm7 + movdqu xmm2, XMMWORD PTR [rsi+rcx] + xor rdx, QWORD PTR [rsi+rcx] + paddq xmm2, xmm3 + xor r8, QWORD PTR [rsi+rcx+8] + movdqu XMMWORD PTR [rsi+rcx], xmm1 + xor esi, 16 + mov eax, esi + mov rsi, rcx + movdqu xmm0, XMMWORD PTR [rax+rcx] + movdqu XMMWORD PTR [rax+rcx], xmm2 + paddq xmm0, xmm9 + add r12, r8 + xor rax, 32 + add r14, rdx + movdqa xmm9, xmm7 + movdqa xmm7, xmm6 + movdqu XMMWORD PTR [rax+rcx], xmm0 + mov QWORD PTR [r9+8], r12 + xor r12, r10 + mov QWORD PTR [r9], r14 + movq rcx, xmm15 + xor r14, rbx + mov r10d, ebp + mov ebx, r14d + xor ebp, 16 + and ebx, 2097136 + mov r8, QWORD PTR [r10+rcx] + mov r9, QWORD PTR [r10+rcx+8] + + movq xmm0, rsp + movq xmm1, rbx + movq xmm2, rsi + movq xmm11, rdi + movq xmm12, rbp + movq xmm13, r15 + mov [rsp+104], rcx + mov [rsp+112], r9 + + mov ebx, DWORD PTR [rsp] + mov esi, DWORD PTR [rsp+4] + mov edi, DWORD PTR [rsp+8] + mov ebp, DWORD PTR [rsp+12] + + lea eax, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or rax, rdx + + xor r8, rax + movq xmm3, r8 + + movd esp, xmm4 + pextrd r15d, xmm4, 2 + movd eax, xmm8 + movd edx, xmm10 + pextrd r9d, xmm10, 2 + +FN_PREFIX(CryptonightWOW_template_double_part3): + + movq rsp, xmm0 + mov DWORD PTR [rsp], ebx + mov DWORD PTR [rsp+4], esi + mov DWORD PTR [rsp+8], edi + mov DWORD PTR [rsp+12], ebp + + movq rbx, xmm1 + movq rsi, xmm2 + movq rdi, xmm11 + movq rbp, xmm12 + movq r15, xmm13 + mov rcx, [rsp+104] + mov r9, [rsp+112] + + mov rax, r8 + mul rdi + movq xmm1, rdx + movq xmm0, rax + punpcklqdq xmm1, xmm0 + mov rdi, rcx + mov r8, rax + pxor xmm1, XMMWORD PTR [rbp+rcx] + xor ebp, 48 + paddq xmm1, xmm8 + xor r8, QWORD PTR [rbp+rcx+8] + xor rdx, QWORD PTR [rbp+rcx] + add r13, r8 + movdqu xmm2, XMMWORD PTR [rbp+rcx] + add r15, rdx + movdqu XMMWORD PTR [rbp+rcx], xmm1 + paddq xmm2, xmm4 + xor ebp, 16 + mov eax, ebp + xor rax, 32 + movdqu xmm0, XMMWORD PTR [rbp+rcx] + movdqu XMMWORD PTR [rbp+rcx], xmm2 + paddq xmm0, xmm10 + movdqu XMMWORD PTR [rax+rcx], xmm0 + movq rax, xmm3 + movdqa xmm10, xmm8 + mov QWORD PTR [r10+rcx], r15 + movdqa xmm8, xmm5 + xor r15, rax + mov QWORD PTR [r10+rcx+8], r13 + mov r8d, r15d + xor r13, r9 + and r8d, 2097136 + dec r11d + jnz FN_PREFIX(CryptonightWOW_template_double_mainloop) + +FN_PREFIX(CryptonightWOW_template_double_part4): + + mov rbx, QWORD PTR [rsp+400] + movaps xmm6, XMMWORD PTR [rsp+160] + movaps xmm7, XMMWORD PTR [rsp+176] + movaps xmm8, XMMWORD PTR [rsp+192] + movaps xmm9, XMMWORD PTR [rsp+208] + movaps xmm10, XMMWORD PTR [rsp+224] + movaps xmm11, XMMWORD PTR [rsp+240] + movaps xmm12, XMMWORD PTR [rsp+256] + movaps xmm13, XMMWORD PTR [rsp+272] + movaps xmm14, XMMWORD PTR [rsp+288] + movaps xmm15, XMMWORD PTR [rsp+304] + add rsp, 320 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + ret 0 +FN_PREFIX(CryptonightWOW_template_double_end): diff --git a/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightWOW_template_win.inc b/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightWOW_template_win.inc new file mode 100644 index 000000000..9db2cf397 --- /dev/null +++ b/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightWOW_template_win.inc @@ -0,0 +1,486 @@ +PUBLIC CryptonightWOW_template_part1 +PUBLIC CryptonightWOW_template_mainloop +PUBLIC CryptonightWOW_template_part2 +PUBLIC CryptonightWOW_template_part3 +PUBLIC CryptonightWOW_template_end +PUBLIC CryptonightWOW_template_double_part1 +PUBLIC CryptonightWOW_template_double_mainloop +PUBLIC CryptonightWOW_template_double_part2 +PUBLIC CryptonightWOW_template_double_part3 +PUBLIC CryptonightWOW_template_double_part4 +PUBLIC CryptonightWOW_template_double_end + +ALIGN(64) +CryptonightWOW_template_part1: + mov QWORD PTR [rsp+16], rbx + mov QWORD PTR [rsp+24], rbp + mov QWORD PTR [rsp+32], rsi + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push rdi + sub rsp, 64 + mov r12, rcx + mov r8, QWORD PTR [r12+32] + mov rdx, r12 + xor r8, QWORD PTR [r12] + mov r15, QWORD PTR [r12+40] + mov r9, r8 + xor r15, QWORD PTR [r12+8] + mov r11, QWORD PTR [r12+224] + mov r12, QWORD PTR [r12+56] + xor r12, QWORD PTR [rdx+24] + mov rax, QWORD PTR [rdx+48] + xor rax, QWORD PTR [rdx+16] + movaps XMMWORD PTR [rsp+48], xmm6 + movd xmm0, r12 + movaps XMMWORD PTR [rsp+32], xmm7 + movaps XMMWORD PTR [rsp+16], xmm8 + movaps XMMWORD PTR [rsp], xmm9 + mov r12, QWORD PTR [rdx+88] + xor r12, QWORD PTR [rdx+72] + movd xmm6, rax + mov rax, QWORD PTR [rdx+80] + xor rax, QWORD PTR [rdx+64] + punpcklqdq xmm6, xmm0 + and r9d, 2097136 + movd xmm0, r12 + movd xmm7, rax + punpcklqdq xmm7, xmm0 + mov r10d, r9d + movd xmm9, rsp + mov rsp, r8 + mov r8d, 524288 + + mov ebx, [rdx+96] + mov esi, [rdx+100] + mov edi, [rdx+104] + mov ebp, [rdx+108] + + ALIGN(64) +CryptonightWOW_template_mainloop: + movdqa xmm5, XMMWORD PTR [r9+r11] + movd xmm0, r15 + movd xmm4, rsp + punpcklqdq xmm4, xmm0 + lea rdx, QWORD PTR [r9+r11] + + aesenc xmm5, xmm4 + movd r10d, xmm5 + and r10d, 2097136 + + mov r12d, r9d + mov eax, r9d + xor r9d, 48 + xor r12d, 16 + xor eax, 32 + movdqu xmm0, XMMWORD PTR [r9+r11] + movdqu xmm2, XMMWORD PTR [r12+r11] + movdqu xmm1, XMMWORD PTR [rax+r11] + paddq xmm0, xmm7 + paddq xmm2, xmm6 + paddq xmm1, xmm4 + movdqu XMMWORD PTR [r12+r11], xmm0 + movd r12, xmm5 + movdqu XMMWORD PTR [rax+r11], xmm2 + movdqu XMMWORD PTR [r9+r11], xmm1 + + movdqa xmm0, xmm5 + pxor xmm0, xmm6 + movdqu XMMWORD PTR [rdx], xmm0 + + lea r13d, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or r13, rdx + + xor r13, QWORD PTR [r10+r11] + mov r14, QWORD PTR [r10+r11+8] + + movd eax, xmm6 + movd edx, xmm7 + pextrd r9d, xmm7, 2 + +CryptonightWOW_template_part2: + mov rax, r13 + mul r12 + movd xmm0, rax + movd xmm3, rdx + punpcklqdq xmm3, xmm0 + + mov r9d, r10d + mov r12d, r10d + xor r9d, 16 + xor r12d, 32 + xor r10d, 48 + movdqa xmm1, XMMWORD PTR [r12+r11] + xor rdx, QWORD PTR [r12+r11] + xor rax, QWORD PTR [r11+r12+8] + movdqa xmm2, XMMWORD PTR [r9+r11] + pxor xmm3, xmm2 + paddq xmm7, XMMWORD PTR [r10+r11] + paddq xmm1, xmm4 + paddq xmm3, xmm6 + movdqu XMMWORD PTR [r9+r11], xmm7 + movdqu XMMWORD PTR [r12+r11], xmm3 + movdqu XMMWORD PTR [r10+r11], xmm1 + + movdqa xmm7, xmm6 + add r15, rax + add rsp, rdx + xor r10, 48 + mov QWORD PTR [r10+r11], rsp + xor rsp, r13 + mov r9d, esp + mov QWORD PTR [r10+r11+8], r15 + and r9d, 2097136 + xor r15, r14 + movdqa xmm6, xmm5 + dec r8d + jnz CryptonightWOW_template_mainloop + +CryptonightWOW_template_part3: + movd rsp, xmm9 + + mov rbx, QWORD PTR [rsp+136] + mov rbp, QWORD PTR [rsp+144] + mov rsi, QWORD PTR [rsp+152] + movaps xmm6, XMMWORD PTR [rsp+48] + movaps xmm7, XMMWORD PTR [rsp+32] + movaps xmm8, XMMWORD PTR [rsp+16] + movaps xmm9, XMMWORD PTR [rsp] + add rsp, 64 + pop rdi + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + ret 0 +CryptonightWOW_template_end: + +ALIGN(64) +CryptonightWOW_template_double_part1: + mov QWORD PTR [rsp+24], rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 320 + mov r14, QWORD PTR [rcx+32] + mov r8, rcx + xor r14, QWORD PTR [rcx] + mov r12, QWORD PTR [rcx+40] + mov ebx, r14d + mov rsi, QWORD PTR [rcx+224] + and ebx, 2097136 + xor r12, QWORD PTR [rcx+8] + mov rcx, QWORD PTR [rcx+56] + xor rcx, QWORD PTR [r8+24] + mov rax, QWORD PTR [r8+48] + xor rax, QWORD PTR [r8+16] + mov r15, QWORD PTR [rdx+32] + xor r15, QWORD PTR [rdx] + movd xmm0, rcx + mov rcx, QWORD PTR [r8+88] + xor rcx, QWORD PTR [r8+72] + mov r13, QWORD PTR [rdx+40] + mov rdi, QWORD PTR [rdx+224] + xor r13, QWORD PTR [rdx+8] + movaps XMMWORD PTR [rsp+160], xmm6 + movaps XMMWORD PTR [rsp+176], xmm7 + movaps XMMWORD PTR [rsp+192], xmm8 + movaps XMMWORD PTR [rsp+208], xmm9 + movaps XMMWORD PTR [rsp+224], xmm10 + movaps XMMWORD PTR [rsp+240], xmm11 + movaps XMMWORD PTR [rsp+256], xmm12 + movaps XMMWORD PTR [rsp+272], xmm13 + movaps XMMWORD PTR [rsp+288], xmm14 + movaps XMMWORD PTR [rsp+304], xmm15 + movd xmm7, rax + mov rax, QWORD PTR [r8+80] + xor rax, QWORD PTR [r8+64] + + movaps xmm1, XMMWORD PTR [rdx+96] + movaps xmm2, XMMWORD PTR [r8+96] + movaps XMMWORD PTR [rsp], xmm1 + movaps XMMWORD PTR [rsp+16], xmm2 + + mov r8d, r15d + punpcklqdq xmm7, xmm0 + movd xmm0, rcx + mov rcx, QWORD PTR [rdx+56] + xor rcx, QWORD PTR [rdx+24] + movd xmm9, rax + mov QWORD PTR [rsp+128], rsi + mov rax, QWORD PTR [rdx+48] + xor rax, QWORD PTR [rdx+16] + punpcklqdq xmm9, xmm0 + movd xmm0, rcx + mov rcx, QWORD PTR [rdx+88] + xor rcx, QWORD PTR [rdx+72] + movd xmm8, rax + mov QWORD PTR [rsp+136], rdi + mov rax, QWORD PTR [rdx+80] + xor rax, QWORD PTR [rdx+64] + punpcklqdq xmm8, xmm0 + and r8d, 2097136 + movd xmm0, rcx + mov r11d, 524288 + movd xmm10, rax + punpcklqdq xmm10, xmm0 + + movd xmm14, QWORD PTR [rsp+128] + movd xmm15, QWORD PTR [rsp+136] + + ALIGN(64) +CryptonightWOW_template_double_mainloop: + movdqu xmm6, XMMWORD PTR [rbx+rsi] + movd xmm0, r12 + mov ecx, ebx + movd xmm3, r14 + punpcklqdq xmm3, xmm0 + xor ebx, 16 + aesenc xmm6, xmm3 + movd rdx, xmm6 + movd xmm4, r15 + movdqu xmm0, XMMWORD PTR [rbx+rsi] + xor ebx, 48 + paddq xmm0, xmm7 + movdqu xmm1, XMMWORD PTR [rbx+rsi] + movdqu XMMWORD PTR [rbx+rsi], xmm0 + paddq xmm1, xmm3 + xor ebx, 16 + mov eax, ebx + xor rax, 32 + movdqu xmm0, XMMWORD PTR [rbx+rsi] + movdqu XMMWORD PTR [rbx+rsi], xmm1 + paddq xmm0, xmm9 + movdqu XMMWORD PTR [rax+rsi], xmm0 + movdqa xmm0, xmm6 + pxor xmm0, xmm7 + movdqu XMMWORD PTR [rcx+rsi], xmm0 + mov esi, edx + movdqu xmm5, XMMWORD PTR [r8+rdi] + and esi, 2097136 + mov ecx, r8d + movd xmm0, r13 + punpcklqdq xmm4, xmm0 + xor r8d, 16 + aesenc xmm5, xmm4 + movdqu xmm0, XMMWORD PTR [r8+rdi] + xor r8d, 48 + paddq xmm0, xmm8 + movdqu xmm1, XMMWORD PTR [r8+rdi] + movdqu XMMWORD PTR [r8+rdi], xmm0 + paddq xmm1, xmm4 + xor r8d, 16 + mov eax, r8d + xor rax, 32 + movdqu xmm0, XMMWORD PTR [r8+rdi] + movdqu XMMWORD PTR [r8+rdi], xmm1 + paddq xmm0, xmm10 + movdqu XMMWORD PTR [rax+rdi], xmm0 + movdqa xmm0, xmm5 + pxor xmm0, xmm8 + movdqu XMMWORD PTR [rcx+rdi], xmm0 + movd rdi, xmm5 + movd rcx, xmm14 + mov ebp, edi + mov r8, QWORD PTR [rcx+rsi] + mov r10, QWORD PTR [rcx+rsi+8] + lea r9, QWORD PTR [rcx+rsi] + xor esi, 16 + + movd xmm0, rsp + movd xmm1, rsi + movd xmm2, rdi + movd xmm11, rbp + movd xmm12, r15 + movd xmm13, rdx + mov [rsp+104], rcx + mov [rsp+112], r9 + + mov ebx, DWORD PTR [rsp+16] + mov esi, DWORD PTR [rsp+20] + mov edi, DWORD PTR [rsp+24] + mov ebp, DWORD PTR [rsp+28] + + lea eax, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or rax, rdx + xor r8, rax + + movd esp, xmm3 + pextrd r15d, xmm3, 2 + movd eax, xmm7 + movd edx, xmm9 + pextrd r9d, xmm9, 2 + +CryptonightWOW_template_double_part2: + + movd rsp, xmm0 + mov DWORD PTR [rsp+16], ebx + mov DWORD PTR [rsp+20], esi + mov DWORD PTR [rsp+24], edi + mov DWORD PTR [rsp+28], ebp + + movd rsi, xmm1 + movd rdi, xmm2 + movd rbp, xmm11 + movd r15, xmm12 + movd rdx, xmm13 + mov rcx, [rsp+104] + mov r9, [rsp+112] + + mov rbx, r8 + mov rax, r8 + mul rdx + and ebp, 2097136 + mov r8, rax + movd xmm1, rdx + movd xmm0, r8 + punpcklqdq xmm1, xmm0 + pxor xmm1, XMMWORD PTR [rcx+rsi] + xor esi, 48 + paddq xmm1, xmm7 + movdqu xmm2, XMMWORD PTR [rsi+rcx] + xor rdx, QWORD PTR [rsi+rcx] + paddq xmm2, xmm3 + xor r8, QWORD PTR [rsi+rcx+8] + movdqu XMMWORD PTR [rsi+rcx], xmm1 + xor esi, 16 + mov eax, esi + mov rsi, rcx + movdqu xmm0, XMMWORD PTR [rax+rcx] + movdqu XMMWORD PTR [rax+rcx], xmm2 + paddq xmm0, xmm9 + add r12, r8 + xor rax, 32 + add r14, rdx + movdqa xmm9, xmm7 + movdqa xmm7, xmm6 + movdqu XMMWORD PTR [rax+rcx], xmm0 + mov QWORD PTR [r9+8], r12 + xor r12, r10 + mov QWORD PTR [r9], r14 + movd rcx, xmm15 + xor r14, rbx + mov r10d, ebp + mov ebx, r14d + xor ebp, 16 + and ebx, 2097136 + mov r8, QWORD PTR [r10+rcx] + mov r9, QWORD PTR [r10+rcx+8] + + movd xmm0, rsp + movd xmm1, rbx + movd xmm2, rsi + movd xmm11, rdi + movd xmm12, rbp + movd xmm13, r15 + mov [rsp+104], rcx + mov [rsp+112], r9 + + mov ebx, DWORD PTR [rsp] + mov esi, DWORD PTR [rsp+4] + mov edi, DWORD PTR [rsp+8] + mov ebp, DWORD PTR [rsp+12] + + lea eax, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or rax, rdx + + xor r8, rax + movd xmm3, r8 + + movd esp, xmm4 + pextrd r15d, xmm4, 2 + movd eax, xmm8 + movd edx, xmm10 + pextrd r9d, xmm10, 2 + +CryptonightWOW_template_double_part3: + + movd rsp, xmm0 + mov DWORD PTR [rsp], ebx + mov DWORD PTR [rsp+4], esi + mov DWORD PTR [rsp+8], edi + mov DWORD PTR [rsp+12], ebp + + movd rbx, xmm1 + movd rsi, xmm2 + movd rdi, xmm11 + movd rbp, xmm12 + movd r15, xmm13 + mov rcx, [rsp+104] + mov r9, [rsp+112] + + mov rax, r8 + mul rdi + movd xmm1, rdx + movd xmm0, rax + punpcklqdq xmm1, xmm0 + mov rdi, rcx + mov r8, rax + pxor xmm1, XMMWORD PTR [rbp+rcx] + xor ebp, 48 + paddq xmm1, xmm8 + xor r8, QWORD PTR [rbp+rcx+8] + xor rdx, QWORD PTR [rbp+rcx] + add r13, r8 + movdqu xmm2, XMMWORD PTR [rbp+rcx] + add r15, rdx + movdqu XMMWORD PTR [rbp+rcx], xmm1 + paddq xmm2, xmm4 + xor ebp, 16 + mov eax, ebp + xor rax, 32 + movdqu xmm0, XMMWORD PTR [rbp+rcx] + movdqu XMMWORD PTR [rbp+rcx], xmm2 + paddq xmm0, xmm10 + movdqu XMMWORD PTR [rax+rcx], xmm0 + movd rax, xmm3 + movdqa xmm10, xmm8 + mov QWORD PTR [r10+rcx], r15 + movdqa xmm8, xmm5 + xor r15, rax + mov QWORD PTR [r10+rcx+8], r13 + mov r8d, r15d + xor r13, r9 + and r8d, 2097136 + dec r11d + jnz CryptonightWOW_template_double_mainloop + +CryptonightWOW_template_double_part4: + + mov rbx, QWORD PTR [rsp+400] + movaps xmm6, XMMWORD PTR [rsp+160] + movaps xmm7, XMMWORD PTR [rsp+176] + movaps xmm8, XMMWORD PTR [rsp+192] + movaps xmm9, XMMWORD PTR [rsp+208] + movaps xmm10, XMMWORD PTR [rsp+224] + movaps xmm11, XMMWORD PTR [rsp+240] + movaps xmm12, XMMWORD PTR [rsp+256] + movaps xmm13, XMMWORD PTR [rsp+272] + movaps xmm14, XMMWORD PTR [rsp+288] + movaps xmm15, XMMWORD PTR [rsp+304] + add rsp, 320 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + ret 0 +CryptonightWOW_template_double_end: diff --git a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_double_main_loop_sandybridge_linux.inc b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_double_main_loop_sandybridge_linux.inc index 79adab671..9b168c9e6 100644 --- a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_double_main_loop_sandybridge_linux.inc +++ b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_double_main_loop_sandybridge_linux.inc @@ -94,7 +94,7 @@ lea r9, QWORD PTR [rdx+r13] movdqu xmm15, XMMWORD PTR [r9] -ALIGN 16 +ALIGN(64) main_loop_double_sandybridge: movdqu xmm9, xmm15 mov eax, edx @@ -407,4 +407,4 @@ sqrt_fix_2_sandybridge: punpcklqdq xmm5, xmm0 jmp sqrt_fix_2_ret_sandybridge -cnv2_double_mainloop_asm_sandybridge_endp: +cnv2_double_mainloop_asm_sandybridge_endp: \ No newline at end of file diff --git a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_double_main_loop_sandybridge_win64.inc b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_double_main_loop_sandybridge_win64.inc index ad8f18233..0450ba3ea 100644 --- a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_double_main_loop_sandybridge_win64.inc +++ b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_double_main_loop_sandybridge_win64.inc @@ -94,7 +94,7 @@ lea r9, QWORD PTR [rdx+r13] movdqu xmm15, XMMWORD PTR [r9] - ALIGN 64 + ALIGN(64) main_loop_double_sandybridge: movdqu xmm9, xmm15 mov eax, edx diff --git a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.S b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.S index c0a3d0b41..9cb61bdd9 100644 --- a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.S +++ b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.S @@ -1,4 +1,8 @@ -#define ALIGN .align +#ifdef __APPLE__ +# define ALIGN(x) .align 6 +#else +# define ALIGN(x) .align x +#endif .intel_syntax noprefix #ifdef __APPLE__ # define FN_PREFIX(fn) _ ## fn @@ -11,23 +15,25 @@ .global FN_PREFIX(cryptonight_v8_mainloop_ryzen_asm) .global FN_PREFIX(cryptonight_v8_double_mainloop_sandybridge_asm) -ALIGN 8 +ALIGN(64) FN_PREFIX(cryptonight_v8_mainloop_ivybridge_asm): sub rsp, 48 mov rcx, rdi #include "cryptonight_v8_main_loop_ivybridge_linux.inc" add rsp, 48 ret 0 + mov eax, 3735929054 -ALIGN 8 +ALIGN(64) FN_PREFIX(cryptonight_v8_mainloop_ryzen_asm): sub rsp, 48 mov rcx, rdi #include "cryptonight_v8_main_loop_ryzen_linux.inc" add rsp, 48 ret 0 + mov eax, 3735929054 -ALIGN 16 +ALIGN(64) FN_PREFIX(cryptonight_v8_double_mainloop_sandybridge_asm): sub rsp, 48 mov rcx, rdi @@ -35,3 +41,4 @@ FN_PREFIX(cryptonight_v8_double_mainloop_sandybridge_asm): #include "cryptonight_v8_double_main_loop_sandybridge_linux.inc" add rsp, 48 ret 0 + mov eax, 3735929054 \ No newline at end of file diff --git a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.asm b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.asm index 1f3d2e15c..f744a1de2 100644 --- a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.asm +++ b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.asm @@ -3,23 +3,26 @@ PUBLIC cryptonight_v8_mainloop_ivybridge_asm PUBLIC cryptonight_v8_mainloop_ryzen_asm PUBLIC cryptonight_v8_double_mainloop_sandybridge_asm -ALIGN 8 +ALIGN(64) cryptonight_v8_mainloop_ivybridge_asm PROC INCLUDE cryptonight_v8_main_loop_ivybridge_win64.inc ret 0 + mov eax, 3735929054 cryptonight_v8_mainloop_ivybridge_asm ENDP -ALIGN 8 +ALIGN(64) cryptonight_v8_mainloop_ryzen_asm PROC INCLUDE cryptonight_v8_main_loop_ryzen_win64.inc ret 0 + mov eax, 3735929054 cryptonight_v8_mainloop_ryzen_asm ENDP -ALIGN 8 +ALIGN(64) cryptonight_v8_double_mainloop_sandybridge_asm PROC INCLUDE cryptonight_v8_double_main_loop_sandybridge_win64.inc ret 0 + mov eax, 3735929054 cryptonight_v8_double_mainloop_sandybridge_asm ENDP _TEXT_CNV8_MAINLOOP ENDS -END +END \ No newline at end of file diff --git a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ivybridge_linux.inc b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ivybridge_linux.inc index cbe43b0d3..4bba454ab 100644 --- a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ivybridge_linux.inc +++ b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ivybridge_linux.inc @@ -50,7 +50,7 @@ punpcklqdq xmm5, xmm0 movdqu xmm6, XMMWORD PTR [r10+rbx] - ALIGN 8 + ALIGN(64) main_loop_ivybridge: lea rdx, QWORD PTR [r10+rbx] mov ecx, r10d diff --git a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ivybridge_win64.inc b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ivybridge_win64.inc index 8d49c5db7..8a2d92d7e 100755 --- a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ivybridge_win64.inc +++ b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ivybridge_win64.inc @@ -50,7 +50,7 @@ punpcklqdq xmm5, xmm0 movdqu xmm6, XMMWORD PTR [r10+rbx] - ALIGN 8 + ALIGN(64) main_loop_ivybridge: lea rdx, QWORD PTR [r10+rbx] mov ecx, r10d diff --git a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ryzen_linux.inc b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ryzen_linux.inc index cd8b43477..5dbf5917f 100644 --- a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ryzen_linux.inc +++ b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ryzen_linux.inc @@ -45,7 +45,7 @@ movq xmm0, rcx punpcklqdq xmm4, xmm0 - ALIGN 8 + ALIGN(64) main_loop_ryzen: movdqa xmm5, XMMWORD PTR [r10+rbx] movq xmm0, r11 diff --git a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ryzen_win64.inc b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ryzen_win64.inc index d103cc2ee..7e5c127f8 100755 --- a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ryzen_win64.inc +++ b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ryzen_win64.inc @@ -45,7 +45,7 @@ movd xmm0, rcx punpcklqdq xmm4, xmm0 - ALIGN 8 + ALIGN(64) main_loop_ryzen: movdqa xmm5, XMMWORD PTR [r10+rbx] movd xmm0, r11 diff --git a/xmrstak/backend/cpu/crypto/cryptonight.h b/xmrstak/backend/cpu/crypto/cryptonight.h index a7c77cdac..bd0c4967e 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight.h +++ b/xmrstak/backend/cpu/crypto/cryptonight.h @@ -4,6 +4,20 @@ #include "variant4_random_math.h" +#if defined _MSC_VER +#define ABI_ATTRIBUTE +#else +#define ABI_ATTRIBUTE __attribute__((ms_abi)) +#endif + +struct cryptonight_ctx; + +typedef void (*cn_mainloop_fun)(cryptonight_ctx *ctx); +typedef void (*cn_double_mainloop_fun)(cryptonight_ctx*, cryptonight_ctx*); +typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx**, const xmrstak_algo&); + +void v4_compile_code(cryptonight_ctx* ctx, int code_size); + struct extra_ctx_r { uint64_t height = 0; @@ -16,7 +30,17 @@ struct cryptonight_ctx uint8_t hash_state[224]; // Need only 200, explicit align uint8_t* long_state; uint8_t ctx_info[24]; //Use some of the extra memory for flags - extra_ctx_r cn_r_ctx; + cn_mainloop_fun loop_fn = nullptr; + cn_hash_fun hash_fn = nullptr; + uint8_t* fun_data = nullptr; + int asm_version = 0; + xmrstak_algo last_algo = invalid_algo; + + union + { + extra_ctx_r cn_r_ctx; + }; + }; struct alloc_msg diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index 43f719873..71c3d4101 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -24,6 +24,15 @@ #include #include +#ifdef _WIN64 +# include +# include +# include +# include +#else +# include +#endif + #ifdef __GNUC__ #include static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi) @@ -1056,37 +1065,46 @@ struct Cryptonight_hash<5> } }; -extern "C" void cryptonight_v8_mainloop_ivybridge_asm(cryptonight_ctx* ctx0); -extern "C" void cryptonight_v8_mainloop_ryzen_asm(cryptonight_ctx* ctx0); +extern "C" void cryptonight_v8_mainloop_ivybridge_asm(cryptonight_ctx* ctx0); +extern "C" void cryptonight_v8_mainloop_ryzen_asm(cryptonight_ctx* ctx0); extern "C" void cryptonight_v8_double_mainloop_sandybridge_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1); template< size_t N, size_t asm_version> -struct Cryptonight_hash_asm; - -template -struct Cryptonight_hash_asm<1, asm_version> +struct Cryptonight_hash_asm { - static constexpr size_t N = 1; - template static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx, const xmrstak_algo& algo) { - keccak((const uint8_t *)input, len, ctx[0]->hash_state, 200); - cn_explode_scratchpad((__m128i*)ctx[0]->hash_state, (__m128i*)ctx[0]->long_state, algo); + for(size_t i = 0; i < N; ++i) + { + keccak((const uint8_t *)input + len * i, len, ctx[i]->hash_state, 200); + cn_explode_scratchpad((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state, algo); + } - if(asm_version == 0) - cryptonight_v8_mainloop_ivybridge_asm(ctx[0]); - else if(asm_version == 1) - cryptonight_v8_mainloop_ryzen_asm(ctx[0]); + if(ALGO == cryptonight_r) + { + // API ATTRIBUTE is only required for cryptonight_r + typedef void ABI_ATTRIBUTE (*cn_r_mainloop_fun)(cryptonight_ctx *ctx); + for(size_t i = 0; i < N; ++i) + reinterpret_cast(ctx[0]->loop_fn)(ctx[i]); // use always loop_fn from ctx[0]!! + } + else + { + for(size_t i = 0; i < N; ++i) + ctx[0]->loop_fn(ctx[i]); // use always loop_fn from ctx[0]!! + } - cn_implode_scratchpad((__m128i*)ctx[0]->long_state, (__m128i*)ctx[0]->hash_state, algo); - keccakf((uint64_t*)ctx[0]->hash_state, 24); - extra_hashes[ctx[0]->hash_state[0] & 3](ctx[0]->hash_state, 200, (char*)output); + for(size_t i = 0; i < N; ++i) + { + cn_implode_scratchpad((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state, algo); + keccakf((uint64_t*)ctx[i]->hash_state, 24); + extra_hashes[ctx[i]->hash_state[0] & 3](ctx[i]->hash_state, 200, (char*)output + 32 * i); + } } }; -// double hash only for intel +// double hash with specialized asm only for intel template< > struct Cryptonight_hash_asm<2, 0> { @@ -1104,7 +1122,7 @@ struct Cryptonight_hash_asm<2, 0> cn_explode_scratchpad((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state, algo); } - cryptonight_v8_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); + reinterpret_cast(ctx[0]->loop_fn)(ctx[0], ctx[1]); for(size_t i = 0; i < N; ++i) { @@ -1117,6 +1135,141 @@ struct Cryptonight_hash_asm<2, 0> } }; +namespace +{ + +template +static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t mask) +{ + const uint8_t* p = reinterpret_cast(src); + + // Workaround for Visual Studio placing trampoline in debug builds. +# if defined(_MSC_VER) + if (p[0] == 0xE9) { + p += *(int32_t*)(p + 1) + 5; + } +# endif + + size_t size = 0; + while (*(uint32_t*)(p + size) != 0xDEADC0DE) { + ++size; + } + size += sizeof(uint32_t); + + memcpy((void*) dst, (const void*) src, size); + + uint8_t* patched_data = reinterpret_cast(dst); + for (size_t i = 0; i + sizeof(uint32_t) <= size; ++i) { + switch (*(uint32_t*)(patched_data + i)) { + case CN_ITER: + *(uint32_t*)(patched_data + i) = iterations; + break; + + case CN_MASK: + *(uint32_t*)(patched_data + i) = mask; + break; + } + } +} + + +void* allocateExecutableMemory(size_t size) +{ + +#ifdef _WIN64 +return VirtualAlloc(0, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); +#else +# if defined(__APPLE__) + return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0); +# else + return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +# endif +#endif +} + + +void protectExecutableMemory(void *p, size_t size) +{ +#ifdef _WIN64 + DWORD oldProtect; + VirtualProtect(p, size, PAGE_EXECUTE_READ, &oldProtect); +#else + mprotect(p, size, PROT_READ | PROT_EXEC); +#endif +} + +void unprotectExecutableMemory(void *p, size_t size) +{ +#ifdef _WIN64 + DWORD oldProtect; + VirtualProtect(p, size, PAGE_EXECUTE_READWRITE, &oldProtect); +#else + mprotect(p, size, PROT_WRITE | PROT_EXEC); +#endif +} + + +void flushInstructionCache(void *p, size_t size) +{ +#ifdef _WIN64 + ::FlushInstructionCache(GetCurrentProcess(), p, size); +#else +# ifndef __FreeBSD__ + __builtin___clear_cache(reinterpret_cast(p), reinterpret_cast(p) + size); +# endif +#endif +} + +template +void patchAsmVariants(std::string selected_asm, cryptonight_ctx** ctx, const xmrstak_algo& algo) +{ + const uint32_t Iter = algo.Iter(); + const uint32_t Mask = algo.Mask(); + + const int allocation_size = 65536; + + if(ctx[0]->fun_data == nullptr) + ctx[0]->fun_data = static_cast(allocateExecutableMemory(allocation_size)); + else + unprotectExecutableMemory(ctx[0]->fun_data, allocation_size); + + cn_mainloop_fun src_code = nullptr; + + if(selected_asm == "intel_avx") + { + // Intel Ivy Bridge (Xeon v2, Core i7/i5/i3 3xxx, Pentium G2xxx, Celeron G1xxx) + if(N == 2) + src_code = reinterpret_cast(cryptonight_v8_double_mainloop_sandybridge_asm); + else + src_code = cryptonight_v8_mainloop_ivybridge_asm;; + } + // supports only 1 thread per hash + if(selected_asm == "amd_avx") + { + // AMD Ryzen (1xxx and 2xxx series) + src_code = cryptonight_v8_mainloop_ryzen_asm; + } + + if(src_code != nullptr && ctx[0]->fun_data != nullptr) + { + patchCode(ctx[0]->fun_data, src_code, Iter, Mask); + ctx[0]->loop_fn = reinterpret_cast(ctx[0]->fun_data); + for(size_t i = 1; i < N; ++i) + ctx[i]->loop_fn = ctx[0]->loop_fn; + + if(selected_asm == "intel_avx" && N == 2) + ctx[0]->hash_fn = Cryptonight_hash_asm<2u, 0u>::template hash; + else + ctx[0]->hash_fn = Cryptonight_hash_asm::template hash; + + protectExecutableMemory(ctx[0]->fun_data, allocation_size); + flushInstructionCache(ctx[0]->fun_data, allocation_size); + } +} +} // namespace (anonymous) + + + struct Cryptonight_hash_gpu { static constexpr size_t N = 1; @@ -1144,13 +1297,24 @@ struct Cryptonight_R_generator template static void cn_on_new_job(const xmrstak::miner_work& work, cryptonight_ctx** ctx) { - if(ctx[0]->cn_r_ctx.height == work.iBlockHeight) + if(ctx[0]->cn_r_ctx.height == work.iBlockHeight && ctx[0]->last_algo == POW(cryptonight_r)) return; + ctx[0]->last_algo = POW(cryptonight_r); + ctx[0]->cn_r_ctx.height = work.iBlockHeight; - v4_random_math_init(ctx[0]->cn_r_ctx.code, work.iBlockHeight); + int code_size = v4_random_math_init(ctx[0]->cn_r_ctx.code, work.iBlockHeight); + if(ctx[0]->asm_version != 0) + { + v4_compile_code(ctx[0], code_size); + ctx[0]->hash_fn = Cryptonight_hash_asm::template hash; + } for(size_t i=1; i < N; i++) + { ctx[i]->cn_r_ctx = ctx[0]->cn_r_ctx; + ctx[i]->loop_fn = ctx[0]->loop_fn; + ctx[i]->hash_fn = ctx[0]->hash_fn; + } } }; diff --git a/xmrstak/backend/cpu/crypto/variant4_random_math.h b/xmrstak/backend/cpu/crypto/variant4_random_math.h index 07dd3cf61..50228adf2 100644 --- a/xmrstak/backend/cpu/crypto/variant4_random_math.h +++ b/xmrstak/backend/cpu/crypto/variant4_random_math.h @@ -2,6 +2,7 @@ #include #include "../../cryptonight.hpp" +#include "xmrstak/misc/console.hpp" extern "C" { @@ -190,6 +191,7 @@ static FORCEINLINE void check_data(size_t* data_index, const size_t bytes_needed template static int v4_random_math_init(struct V4_Instruction* code, const uint64_t height) { + printer::inst()->print_msg(LDEBUG, "CryptonightR create random math for block %llu", height); // MUL is 3 cycles, 3-way addition and rotations are 2 cycles, SUB/XOR are 1 cycle // These latencies match real-life instruction latencies for Intel CPUs starting from Sandy Bridge and up to Skylake/Coffee lake // diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 064b07339..8d67a73d1 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -158,12 +158,28 @@ cryptonight_ctx* minethd::minethd_alloc_ctx() ctx = cryptonight_alloc_ctx(1, 1, &msg); if (ctx == NULL) printer::inst()->print_msg(L0, "MEMORY ALLOC FAILED: %s", msg.warning); + else + { + ctx->hash_fn = nullptr; + ctx->loop_fn = nullptr; + ctx->fun_data = nullptr; + ctx->asm_version = 0; + ctx->last_algo = invalid_algo; + } return ctx; case ::jconf::no_mlck: ctx = cryptonight_alloc_ctx(1, 0, &msg); if (ctx == NULL) printer::inst()->print_msg(L0, "MEMORY ALLOC FAILED: %s", msg.warning); + else + { + ctx->hash_fn = nullptr; + ctx->loop_fn = nullptr; + ctx->fun_data = nullptr; + ctx->asm_version = 0; + ctx->last_algo = invalid_algo; + } return ctx; case ::jconf::print_warning: @@ -172,10 +188,27 @@ cryptonight_ctx* minethd::minethd_alloc_ctx() printer::inst()->print_msg(L0, "MEMORY ALLOC FAILED: %s", msg.warning); if (ctx == NULL) ctx = cryptonight_alloc_ctx(0, 0, NULL); + + if (ctx != NULL) + { + ctx->hash_fn = nullptr; + ctx->loop_fn = nullptr; + ctx->fun_data = nullptr; + ctx->asm_version = 0; + ctx->last_algo = invalid_algo; + } return ctx; case ::jconf::always_use: - return cryptonight_alloc_ctx(0, 0, NULL); + ctx = cryptonight_alloc_ctx(0, 0, NULL); + + ctx->hash_fn = nullptr; + ctx->loop_fn = nullptr; + ctx->fun_data = nullptr; + ctx->asm_version = 0; + ctx->last_algo = invalid_algo; + + return ctx; case ::jconf::unknown_value: return NULL; //Shut up compiler @@ -237,8 +270,6 @@ bool minethd::self_test() bool bResult = true; unsigned char out[32 * MAX_N]; - cn_hash_fun hashf; - cn_hash_fun hashf_multi; auto neededAlgorithms = ::jconf::inst()->GetCurrentCoinSelection().GetAllAlgorithms(); @@ -246,41 +277,40 @@ bool minethd::self_test() { if(algo == POW(cryptonight)) { - std::cout<HaveHardwareAes(), false, algo); - hashf("This is a test", 14, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), false, algo); + ctx[0]->hash_fn("This is a test", 14, out, ctx, algo); bResult = bResult && memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0; minethd::cn_on_new_job dm; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); - hashf("This is a test", 14, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), true, algo); + ctx[0]->hash_fn("This is a test", 14, out, ctx, algo); bResult = bResult && memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0; - func_multi_selector<2>(hashf_multi, dm, ::jconf::inst()->HaveHardwareAes(), false, algo); - hashf_multi("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx, algo); + func_multi_selector<2>(ctx, dm, ::jconf::inst()->HaveHardwareAes(), false, algo); + ctx[0]->hash_fn("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx, algo); bResult = bResult && memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59" "\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0; - func_multi_selector<2>(hashf_multi, dm, ::jconf::inst()->HaveHardwareAes(), true, algo); - hashf_multi("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx, algo); + func_multi_selector<2>(ctx, dm, ::jconf::inst()->HaveHardwareAes(), true, algo); + ctx[0]->hash_fn("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx, algo); bResult = bResult && memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59" "\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0; - func_multi_selector<3>(hashf_multi, dm, ::jconf::inst()->HaveHardwareAes(), false, algo); - hashf_multi("This is a testThis is a testThis is a test", 14, out, ctx, algo); + func_multi_selector<3>(ctx, dm, ::jconf::inst()->HaveHardwareAes(), false, algo); + ctx[0]->hash_fn("This is a testThis is a testThis is a test", 14, out, ctx, algo); bResult = bResult && memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 96) == 0; - func_multi_selector<4>(hashf_multi, dm, ::jconf::inst()->HaveHardwareAes(), false, algo); - hashf_multi("This is a testThis is a testThis is a testThis is a test", 14, out, ctx, algo); + func_multi_selector<4>(ctx, dm, ::jconf::inst()->HaveHardwareAes(), false, algo); + ctx[0]->hash_fn("This is a testThis is a testThis is a testThis is a test", 14, out, ctx, algo); bResult = bResult && memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 128) == 0; - func_multi_selector<5>(hashf_multi, dm, ::jconf::inst()->HaveHardwareAes(), false, algo); - hashf_multi("This is a testThis is a testThis is a testThis is a testThis is a test", 14, out, ctx, algo); + func_multi_selector<5>(ctx, dm, ::jconf::inst()->HaveHardwareAes(), false, algo); + ctx[0]->hash_fn("This is a testThis is a testThis is a testThis is a testThis is a test", 14, out, ctx, algo); bResult = bResult && memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" @@ -289,92 +319,92 @@ bool minethd::self_test() } else if(algo == POW(cryptonight_lite)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); - hashf("This is a test This is a test This is a test", 44, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), false, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\x5a\x24\xa0\x29\xde\x1c\x39\x3f\x3d\x52\x7a\x2f\x9b\x39\xdc\x3d\xb3\xbc\x87\x11\x8b\x84\x52\x9b\x9f\x0\x88\x49\x25\x4b\x5\xce", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); - hashf("This is a test This is a test This is a test", 44, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), true, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\x5a\x24\xa0\x29\xde\x1c\x39\x3f\x3d\x52\x7a\x2f\x9b\x39\xdc\x3d\xb3\xbc\x87\x11\x8b\x84\x52\x9b\x9f\x0\x88\x49\x25\x4b\x5\xce", 32) == 0; } else if(algo == POW(cryptonight_monero)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); - hashf("This is a test This is a test This is a test", 44, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), false, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\x1\x57\xc5\xee\x18\x8b\xbe\xc8\x97\x52\x85\xa3\x6\x4e\xe9\x20\x65\x21\x76\x72\xfd\x69\xa1\xae\xbd\x7\x66\xc7\xb5\x6e\xe0\xbd", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); - hashf("This is a test This is a test This is a test", 44, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), true, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\x1\x57\xc5\xee\x18\x8b\xbe\xc8\x97\x52\x85\xa3\x6\x4e\xe9\x20\x65\x21\x76\x72\xfd\x69\xa1\xae\xbd\x7\x66\xc7\xb5\x6e\xe0\xbd", 32) == 0; } else if(algo == POW(cryptonight_monero_v8)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); - hashf("This is a test This is a test This is a test", 44, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), false, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = memcmp(out, "\x35\x3f\xdc\x06\x8f\xd4\x7b\x03\xc0\x4b\x94\x31\xe0\x05\xe0\x0b\x68\xc2\x16\x8a\x3c\xc7\x33\x5c\x8b\x9b\x30\x81\x56\x59\x1a\x4f", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); - hashf("This is a test This is a test This is a test", 44, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), true, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); bResult &= memcmp(out, "\x35\x3f\xdc\x06\x8f\xd4\x7b\x03\xc0\x4b\x94\x31\xe0\x05\xe0\x0b\x68\xc2\x16\x8a\x3c\xc7\x33\x5c\x8b\x9b\x30\x81\x56\x59\x1a\x4f", 32) == 0; } else if(algo == POW(cryptonight_aeon)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); - hashf("This is a test This is a test This is a test", 44, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), false, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xfc\xa1\x7d\x44\x37\x70\x9b\x4a\x3b\xd7\x1e\xf3\xed\x21\xb4\x17\xca\x93\xdc\x86\x79\xce\x81\xdf\xd3\xcb\xdd\xa\x22\xd7\x58\xba", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); - hashf("This is a test This is a test This is a test", 44, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), true, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xfc\xa1\x7d\x44\x37\x70\x9b\x4a\x3b\xd7\x1e\xf3\xed\x21\xb4\x17\xca\x93\xdc\x86\x79\xce\x81\xdf\xd3\xcb\xdd\xa\x22\xd7\x58\xba", 32) == 0; } else if(algo == POW(cryptonight_ipbc)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); - hashf("This is a test This is a test This is a test", 44, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), false, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xbc\xe7\x48\xaf\xc5\x31\xff\xc9\x33\x7f\xcf\x51\x1b\xe3\x20\xa3\xaa\x8d\x4\x55\xf9\x14\x2a\x61\xe8\x38\xdf\xdc\x3b\x28\x3e\x0xb0", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); - hashf("This is a test This is a test This is a test", 44, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), true, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xbc\xe7\x48\xaf\xc5\x31\xff\xc9\x33\x7f\xcf\x51\x1b\xe3\x20\xa3\xaa\x8d\x4\x55\xf9\x14\x2a\x61\xe8\x38\xdf\xdc\x3b\x28\x3e\x0", 32) == 0; } else if(algo == POW(cryptonight_stellite)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); - hashf("This is a test This is a test This is a test", 44, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), false, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xb9\x9d\x6c\xee\x50\x3c\x6f\xa6\x3f\x30\x69\x24\x4a\x0\x9f\xe4\xd4\x69\x3f\x68\x92\xa4\x5c\xc2\x51\xae\x46\x87\x7c\x6b\x98\xae", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); - hashf("This is a test This is a test This is a test", 44, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), true, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xb9\x9d\x6c\xee\x50\x3c\x6f\xa6\x3f\x30\x69\x24\x4a\x0\x9f\xe4\xd4\x69\x3f\x68\x92\xa4\x5c\xc2\x51\xae\x46\x87\x7c\x6b\x98\xae", 32) == 0; } else if(algo == POW(cryptonight_masari)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); - hashf("This is a test This is a test This is a test", 44, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), false, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xbf\x5f\xd\xf3\x5a\x65\x7c\x89\xb0\x41\xcf\xf0\xd\x46\x6a\xb6\x30\xf9\x77\x7f\xd9\xc6\x3\xd7\x3b\xd8\xf1\xb5\x4b\x49\xed\x28", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); - hashf("This is a test This is a test This is a test", 44, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), true, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xbf\x5f\xd\xf3\x5a\x65\x7c\x89\xb0\x41\xcf\xf0\xd\x46\x6a\xb6\x30\xf9\x77\x7f\xd9\xc6\x3\xd7\x3b\xd8\xf1\xb5\x4b\x49\xed\x28", 32) == 0; } else if(algo == POW(cryptonight_heavy)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); - hashf("This is a test This is a test This is a test", 44, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), false, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xf9\x44\x97\xce\xb4\xf0\xd9\x84\xb\x9b\xfc\x45\x94\x74\x55\x25\xcf\x26\x83\x16\x4f\xc\xf8\x2d\xf5\xf\x25\xff\x45\x28\x2e\x85", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); - hashf("This is a test This is a test This is a test", 44, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), true, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xf9\x44\x97\xce\xb4\xf0\xd9\x84\xb\x9b\xfc\x45\x94\x74\x55\x25\xcf\x26\x83\x16\x4f\xc\xf8\x2d\xf5\xf\x25\xff\x45\x28\x2e\x85", 32) == 0; } else if(algo == POW(cryptonight_haven)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); - hashf("This is a test This is a test This is a test", 44, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), false, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xc7\xd4\x52\x9\x2b\x48\xa5\xaf\xae\x11\xaf\x40\x9a\x87\xe5\x88\xf0\x29\x35\xa3\x68\xd\xe3\x6b\xce\x43\xf6\xc8\xdf\xd3\xe3\x9", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); - hashf("This is a test This is a test This is a test", 44, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), true, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xc7\xd4\x52\x9\x2b\x48\xa5\xaf\xae\x11\xaf\x40\x9a\x87\xe5\x88\xf0\x29\x35\xa3\x68\xd\xe3\x6b\xce\x43\xf6\xc8\xdf\xd3\xe3\x9", 32) == 0; } else if(algo == POW(cryptonight_bittube2)) @@ -382,61 +412,61 @@ bool minethd::self_test() unsigned char out[32 * MAX_N]; cn_hash_fun hashf; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), false, algo); - hashf("\x38\x27\x4c\x97\xc4\x5a\x17\x2c\xfc\x97\x67\x98\x70\x42\x2e\x3a\x1a\xb0\x78\x49\x60\xc6\x05\x14\xd8\x16\x27\x14\x15\xc3\x06\xee\x3a\x3e\xd1\xa7\x7e\x31\xf6\xa8\x85\xc3\xcb\xff\x01\x02\x03\x04", 48, out, ctx, algo); + ctx[0]->hash_fn("\x38\x27\x4c\x97\xc4\x5a\x17\x2c\xfc\x97\x67\x98\x70\x42\x2e\x3a\x1a\xb0\x78\x49\x60\xc6\x05\x14\xd8\x16\x27\x14\x15\xc3\x06\xee\x3a\x3e\xd1\xa7\x7e\x31\xf6\xa8\x85\xc3\xcb\xff\x01\x02\x03\x04", 48, out, ctx, algo); bResult = bResult && memcmp(out, "\x18\x2c\x30\x41\x93\x1a\x14\x73\xc6\xbf\x7e\x77\xfe\xb5\x17\x9b\xa8\xbe\xa9\x68\xba\x9e\xe1\xe8\x24\x1a\x12\x7a\xac\x81\xb4\x24", 32) == 0; - hashf("\x04\x04\xb4\x94\xce\xd9\x05\x18\xe7\x25\x5d\x01\x28\x63\xde\x8a\x4d\x27\x72\xb1\xff\x78\x8c\xd0\x56\x20\x38\x98\x3e\xd6\x8c\x94\xea\x00\xfe\x43\x66\x68\x83\x00\x00\x00\x00\x18\x7c\x2e\x0f\x66\xf5\x6b\xb9\xef\x67\xed\x35\x14\x5c\x69\xd4\x69\x0d\x1f\x98\x22\x44\x01\x2b\xea\x69\x6e\xe8\xb3\x3c\x42\x12\x01", 76, out, ctx, algo); + ctx[0]->hash_fn("\x04\x04\xb4\x94\xce\xd9\x05\x18\xe7\x25\x5d\x01\x28\x63\xde\x8a\x4d\x27\x72\xb1\xff\x78\x8c\xd0\x56\x20\x38\x98\x3e\xd6\x8c\x94\xea\x00\xfe\x43\x66\x68\x83\x00\x00\x00\x00\x18\x7c\x2e\x0f\x66\xf5\x6b\xb9\xef\x67\xed\x35\x14\x5c\x69\xd4\x69\x0d\x1f\x98\x22\x44\x01\x2b\xea\x69\x6e\xe8\xb3\x3c\x42\x12\x01", 76, out, ctx, algo); bResult = bResult && memcmp(out, "\x7f\xbe\xb9\x92\x76\x87\x5a\x3c\x43\xc2\xbe\x5a\x73\x36\x06\xb5\xdc\x79\xcc\x9c\xf3\x7c\x43\x3e\xb4\x18\x56\x17\xfb\x9b\xc9\x36", 32) == 0; - hashf("\x85\x19\xe0\x39\x17\x2b\x0d\x70\xe5\xca\x7b\x33\x83\xd6\xb3\x16\x73\x15\xa4\x22\x74\x7b\x73\xf0\x19\xcf\x95\x28\xf0\xfd\xe3\x41\xfd\x0f\x2a\x63\x03\x0b\xa6\x45\x05\x25\xcf\x6d\xe3\x18\x37\x66\x9a\xf6\xf1\xdf\x81\x31\xfa\xf5\x0a\xaa\xb8\xd3\xa7\x40\x55\x89", 64, out, ctx, algo); + ctx[0]->hash_fn("\x85\x19\xe0\x39\x17\x2b\x0d\x70\xe5\xca\x7b\x33\x83\xd6\xb3\x16\x73\x15\xa4\x22\x74\x7b\x73\xf0\x19\xcf\x95\x28\xf0\xfd\xe3\x41\xfd\x0f\x2a\x63\x03\x0b\xa6\x45\x05\x25\xcf\x6d\xe3\x18\x37\x66\x9a\xf6\xf1\xdf\x81\x31\xfa\xf5\x0a\xaa\xb8\xd3\xa7\x40\x55\x89", 64, out, ctx, algo); bResult = bResult && memcmp(out, "\x90\xdc\x65\x53\x8d\xb0\x00\xea\xa2\x52\xcd\xd4\x1c\x17\x7a\x64\xfe\xff\x95\x36\xe7\x71\x68\x35\xd4\xcf\x5c\x73\x56\xb1\x2f\xcd", 32) == 0; } else if(algo == POW(cryptonight_superfast)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); - hashf("\x03\x05\xa0\xdb\xd6\xbf\x05\xcf\x16\xe5\x03\xf3\xa6\x6f\x78\x00\x7c\xbf\x34\x14\x43\x32\xec\xbf\xc2\x2e\xd9\x5c\x87\x00\x38\x3b\x30\x9a\xce\x19\x23\xa0\x96\x4b\x00\x00\x00\x08\xba\x93\x9a\x62\x72\x4c\x0d\x75\x81\xfc\xe5\x76\x1e\x9d\x8a\x0e\x6a\x1c\x3f\x92\x4f\xdd\x84\x93\xd1\x11\x56\x49\xc0\x5e\xb6\x01", 76, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), false, algo); + ctx[0]->hash_fn("\x03\x05\xa0\xdb\xd6\xbf\x05\xcf\x16\xe5\x03\xf3\xa6\x6f\x78\x00\x7c\xbf\x34\x14\x43\x32\xec\xbf\xc2\x2e\xd9\x5c\x87\x00\x38\x3b\x30\x9a\xce\x19\x23\xa0\x96\x4b\x00\x00\x00\x08\xba\x93\x9a\x62\x72\x4c\x0d\x75\x81\xfc\xe5\x76\x1e\x9d\x8a\x0e\x6a\x1c\x3f\x92\x4f\xdd\x84\x93\xd1\x11\x56\x49\xc0\x5e\xb6\x01", 76, out, ctx, algo); bResult = bResult && memcmp(out, "\x40\x86\x5a\xa8\x87\x41\xec\x1d\xcc\xbd\x2b\xc6\xff\x36\xb9\x4d\x54\x71\x58\xdb\x94\x69\x8e\x3c\xa0\x3d\xe4\x81\x9a\x65\x9f\xef", 32) == 0; } else if(algo == POW(cryptonight_gpu)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); - hashf("", 0, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), false, algo); + ctx[0]->hash_fn("", 0, out, ctx, algo); bResult = bResult && memcmp(out, "\x55\x5e\x0a\xee\x78\x79\x31\x6d\x7d\xef\xf7\x72\x97\x3c\xb9\x11\x8e\x38\x95\x70\x9d\xb2\x54\x7a\xc0\x72\xd5\xb9\x13\x10\x01\xd8", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); - hashf("", 0, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), true, algo); + ctx[0]->hash_fn("", 0, out, ctx, algo); bResult = bResult && memcmp(out, "\x55\x5e\x0a\xee\x78\x79\x31\x6d\x7d\xef\xf7\x72\x97\x3c\xb9\x11\x8e\x38\x95\x70\x9d\xb2\x54\x7a\xc0\x72\xd5\xb9\x13\x10\x01\xd8", 32) == 0; } else if(algo == POW(cryptonight_conceal)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); - hashf("", 0, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), false, algo); + ctx[0]->hash_fn("", 0, out, ctx, algo); bResult = bResult && memcmp(out, "\xb5\x54\x4b\x58\x16\x70\x26\x47\x63\x47\xe4\x1f\xb6\x5e\x57\xc9\x7c\xa5\x93\xfe\x0e\xb1\x0f\xb9\x2f\xa7\x3e\x5b\xae\xef\x79\x8c", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); - hashf("", 0, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), true, algo); + ctx[0]->hash_fn("", 0, out, ctx, algo); bResult = bResult && memcmp(out, "\xb5\x54\x4b\x58\x16\x70\x26\x47\x63\x47\xe4\x1f\xb6\x5e\x57\xc9\x7c\xa5\x93\xfe\x0e\xb1\x0f\xb9\x2f\xa7\x3e\x5b\xae\xef\x79\x8c", 32) == 0; } else if (algo == POW(cryptonight_turtle)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); - hashf("This is a test This is a test This is a test", 44, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), false, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\x30\x5f\x66\xfe\xbb\xf3\x60\x0e\xda\xbb\x60\xf7\xf1\xc9\xb9\x0a\x3a\xe8\x5a\x31\xd4\x76\xca\x38\x1d\x56\x18\xa6\xc6\x27\x60\xd7", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); - hashf("This is a test This is a test This is a test", 44, out, ctx, algo); + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), true, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\x30\x5f\x66\xfe\xbb\xf3\x60\x0e\xda\xbb\x60\xf7\xf1\xc9\xb9\x0a\x3a\xe8\x5a\x31\xd4\x76\xca\x38\x1d\x56\x18\xa6\xc6\x27\x60\xd7", 32) == 0; } else if(algo == POW(cryptonight_r)) { minethd::cn_on_new_job set_job; - func_multi_selector<1>(hashf, set_job, ::jconf::inst()->HaveHardwareAes(), false, algo); + func_multi_selector<1>(ctx, set_job, ::jconf::inst()->HaveHardwareAes(), false, algo); miner_work work; work.iBlockHeight = 1806260; set_job(work, ctx); - hashf("\x54\x68\x69\x73\x20\x69\x73\x20\x61\x20\x74\x65\x73\x74\x20\x54\x68\x69\x73\x20\x69\x73\x20\x61\x20\x74\x65\x73\x74\x20\x54\x68\x69\x73\x20\x69\x73\x20\x61\x20\x74\x65\x73\x74", 44, out, ctx, algo); + ctx[0]->hash_fn("\x54\x68\x69\x73\x20\x69\x73\x20\x61\x20\x74\x65\x73\x74\x20\x54\x68\x69\x73\x20\x69\x73\x20\x61\x20\x74\x65\x73\x74\x20\x54\x68\x69\x73\x20\x69\x73\x20\x61\x20\x74\x65\x73\x74", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xf7\x59\x58\x8a\xd5\x7e\x75\x84\x67\x29\x54\x43\xa9\xbd\x71\x49\x0a\xbf\xf8\xe9\xda\xd1\xb9\x5b\x6b\xf2\xf5\xd0\xd7\x83\x87\xbc", 32) == 0; } else @@ -515,7 +545,7 @@ static std::string getAsmName(const uint32_t num_hashes) { if(cpu_model.type_name.find("Intel") != std::string::npos) asm_type = "intel_avx"; - else if(cpu_model.type_name.find("AMD") != std::string::npos && num_hashes == 1) + else if(cpu_model.type_name.find("AMD") != std::string::npos) asm_type = "amd_avx"; } } @@ -523,7 +553,7 @@ static std::string getAsmName(const uint32_t num_hashes) } template -void minethd::func_multi_selector(minethd::cn_hash_fun& hash_fun, minethd::cn_on_new_job& on_new_job, +void minethd::func_multi_selector(cryptonight_ctx** ctx, minethd::cn_on_new_job& on_new_job, bool bHaveAes, bool bNoPrefetch, const xmrstak_algo& algo, const std::string& asm_version_str) { static_assert(N >= 1, "number of threads must be >= 1" ); @@ -666,10 +696,10 @@ void minethd::func_multi_selector(minethd::cn_hash_fun& hash_fun, minethd::cn_on digit.set(0, !bHaveAes); digit.set(1, !bNoPrefetch); - hash_fun = func_table[ algv << 2 | digit.to_ulong() ]; + ctx[0]->hash_fn = func_table[ algv << 2 | digit.to_ulong() ]; // check for asm optimized version for cryptonight_v8 - if(N <= 2 && algo == cryptonight_monero_v8 && bHaveAes && algo.Mem() == CN_MEMORY && algo.Iter() == CN_ITER) + if(algo == cryptonight_monero_v8) { std::string selected_asm = asm_version_str; if(selected_asm == "auto") @@ -677,26 +707,26 @@ void minethd::func_multi_selector(minethd::cn_hash_fun& hash_fun, minethd::cn_on if(selected_asm != "off") { - if(selected_asm == "intel_avx") - { - // Intel Ivy Bridge (Xeon v2, Core i7/i5/i3 3xxx, Pentium G2xxx, Celeron G1xxx) - if(N == 1) - hash_fun = Cryptonight_hash_asm<1u, 0u>::template hash; - else if(N == 2) - hash_fun = Cryptonight_hash_asm<2u, 0u>::template hash; - } - // supports only 1 thread per hash - if(N == 1 && selected_asm == "amd_avx") - { - // AMD Ryzen (1xxx and 2xxx series) - hash_fun = Cryptonight_hash_asm<1u, 1u>::template hash; - } + patchAsmVariants(selected_asm, ctx, algo); + if(asm_version_str == "auto" && (selected_asm != "intel_avx" || selected_asm != "amd_avx")) printer::inst()->print_msg(L3, "Switch to assembler version for '%s' cpu's", selected_asm.c_str()); else if(selected_asm != "intel_avx" && selected_asm != "amd_avx") // unknown asm type printer::inst()->print_msg(L1, "Assembler '%s' unknown, fallback to non asm version of cryptonight_v8", selected_asm.c_str()); } } + else if(algo == cryptonight_r && asm_version_str != "off") + { + std::string selected_asm = asm_version_str; + if(selected_asm == "auto") + selected_asm = cpu::getAsmName(N); + printer::inst()->print_msg(L0, "enable cryptonight_r asm '%s' cpu's", selected_asm.c_str()); + for(int h = 0; h < N; ++h) + ctx[h]->asm_version = selected_asm == "intel_avx" ? 1 : 2; // 1 == Intel; 2 == AMD + } + + for(int h = 1; h < N; ++h) + ctx[h]->hash_fn = ctx[0]->hash_fn; static const std::unordered_map on_new_job_map = { {cryptonight_r, Cryptonight_R_generator::template cn_on_new_job}, @@ -709,12 +739,10 @@ void minethd::func_multi_selector(minethd::cn_hash_fun& hash_fun, minethd::cn_on on_new_job = nullptr; } -minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, const xmrstak_algo& algo) +void minethd::func_selector(cryptonight_ctx** ctx, bool bHaveAes, bool bNoPrefetch, const xmrstak_algo& algo) { - minethd::cn_hash_fun fun; minethd::cn_on_new_job dm; - func_multi_selector<1>(fun, dm, bHaveAes, bNoPrefetch, algo); - return fun; + func_multi_selector<1>(ctx, dm, bHaveAes, bNoPrefetch, algo); // for testing us eauto, must be removed before the release } void minethd::work_main() @@ -794,12 +822,11 @@ void minethd::multiway_work_main() // start with root algorithm and switch later if fork version is reached auto miner_algo = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot(); - cn_hash_fun hash_fun_multi; cn_on_new_job on_new_job; uint8_t version = 0; size_t lastPoolId = 0; - func_multi_selector(hash_fun_multi, on_new_job, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo, asm_version_str); + func_multi_selector(ctx, on_new_job, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo, asm_version_str); while (bQuit == 0) { if (oWork.bStall) @@ -831,12 +858,12 @@ void minethd::multiway_work_main() if(new_version >= coinDesc.GetMiningForkVersion()) { miner_algo = coinDesc.GetMiningAlgo(); - func_multi_selector(hash_fun_multi, on_new_job, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo, asm_version_str); + func_multi_selector(ctx, on_new_job, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo, asm_version_str); } else { miner_algo = coinDesc.GetMiningAlgoRoot(); - func_multi_selector(hash_fun_multi, on_new_job, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo, asm_version_str); + func_multi_selector(ctx, on_new_job, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo, asm_version_str); } lastPoolId = oWork.iPoolId; version = new_version; @@ -867,7 +894,7 @@ void minethd::multiway_work_main() for (size_t i = 0; i < N; i++) *piNonce[i] = iNonce++; - hash_fun_multi(bWorkBlob, oWork.iWorkSize, bHashOut, ctx, miner_algo); + ctx[0]->hash_fn(bWorkBlob, oWork.iWorkSize, bHashOut, ctx, miner_algo); for (size_t i = 0; i < N; i++) { diff --git a/xmrstak/backend/cpu/minethd.hpp b/xmrstak/backend/cpu/minethd.hpp index ca89e5b52..2d128a080 100644 --- a/xmrstak/backend/cpu/minethd.hpp +++ b/xmrstak/backend/cpu/minethd.hpp @@ -23,19 +23,18 @@ class minethd : public iBackend static bool self_test(); typedef void (*cn_on_new_job)(const miner_work&, cryptonight_ctx**); - typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx**, const xmrstak_algo&); - static cn_hash_fun func_selector(bool bHaveAes, bool bNoPrefetch, const xmrstak_algo& algo); + static void func_selector(cryptonight_ctx**, bool bHaveAes, bool bNoPrefetch, const xmrstak_algo& algo); static bool thd_setaffinity(std::thread::native_handle_type h, uint64_t cpu_id); static cryptonight_ctx* minethd_alloc_ctx(); template - static void func_multi_selector(minethd::cn_hash_fun& hash_fun, minethd::cn_on_new_job& on_new_job, + static void func_multi_selector(cryptonight_ctx**, minethd::cn_on_new_job& on_new_job, bool bHaveAes, bool bNoPrefetch, const xmrstak_algo& algo, const std::string& asm_version_str = "off"); private: - + minethd(miner_work& pWork, size_t iNo, int iMultiway, bool no_prefetch, int64_t affinity, const std::string& asm_version); template diff --git a/xmrstak/backend/cryptonight.hpp b/xmrstak/backend/cryptonight.hpp index 00311bb93..addf8ea48 100644 --- a/xmrstak/backend/cryptonight.hpp +++ b/xmrstak/backend/cryptonight.hpp @@ -166,6 +166,7 @@ struct xmrstak_algo // default cryptonight constexpr size_t CN_MEMORY = 2 * 1024 * 1024; constexpr uint32_t CN_ITER = 0x80000; +constexpr uint32_t CN_MASK = ((CN_MEMORY - 1) / 16) * 16; // crptonight gpu constexpr uint32_t CN_GPU_MASK = 0x1FFFC0; diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp index 794e68d11..a50dd30cc 100644 --- a/xmrstak/backend/nvidia/minethd.cpp +++ b/xmrstak/backend/nvidia/minethd.cpp @@ -104,28 +104,7 @@ void minethd::start_mining() bool minethd::self_test() { - cryptonight_ctx* ctx0; - unsigned char out[32]; - bool bResult = true; - - ctx0 = new cryptonight_ctx; - if(::jconf::inst()->HaveHardwareAes()) - { - //cryptonight_hash_ctx("This is a test", 14, out, ctx0); - bResult = memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0; - } - else - { - //cryptonight_hash_ctx_soft("This is a test", 14, out, ctx0); - bResult = memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0; - } - delete ctx0; - - //if(!bResult) - // printer::inst()->print_msg(L0, - // "Cryptonight hash self-test failed. This might be caused by bad compiler optimizations."); - - return bResult; + return true; } @@ -229,9 +208,8 @@ void minethd::work_main() // start with root algorithm and switch later if fork version is reached auto miner_algo = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot(); - cn_hash_fun hash_fun; cpu::minethd::cn_on_new_job set_job; - cpu::minethd::func_multi_selector<1>(hash_fun, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); + cpu::minethd::func_multi_selector<1>(&cpu_ctx, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); uint32_t iNonce; @@ -260,12 +238,12 @@ void minethd::work_main() if(new_version >= coinDesc.GetMiningForkVersion()) { miner_algo = coinDesc.GetMiningAlgo(); - cpu::minethd::func_multi_selector<1>(hash_fun, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); + cpu::minethd::func_multi_selector<1>(&cpu_ctx, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); } else { miner_algo = coinDesc.GetMiningAlgoRoot(); - cpu::minethd::func_multi_selector<1>(hash_fun, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); + cpu::minethd::func_multi_selector<1>(&cpu_ctx, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo); } lastPoolId = oWork.iPoolId; version = new_version; @@ -315,7 +293,7 @@ void minethd::work_main() *(uint32_t*)(bWorkBlob + 39) = foundNonce[i]; - hash_fun(bWorkBlob, oWork.iWorkSize, bResult, &cpu_ctx, miner_algo); + cpu_ctx->hash_fn(bWorkBlob, oWork.iWorkSize, bResult, &cpu_ctx, miner_algo); if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget) executor::inst()->push_event(ex_event(job_result(oWork.sJobID, foundNonce[i], bResult, iThreadNo, miner_algo), oWork.iPoolId)); else From cd36058edea2d27198a5dfcbdde152ddd7459c0a Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Thu, 7 Mar 2019 20:31:27 +0100 Subject: [PATCH 6/9] fix CUDA compile - fix linker issues with CUDA8 - fix device selection --- .../cpu/crypto/asm/cnR/CryptonightR_template.inc | 12 +++++++----- .../cpu/crypto/asm/cnR/CryptonightWOW_template.inc | 12 +++++++----- xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp | 1 - xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu | 6 +++--- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.inc b/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.inc index b54486a57..e4e52f038 100644 --- a/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.inc +++ b/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.inc @@ -60,8 +60,9 @@ FN_PREFIX(CryptonightR_template_part1): mov edi, [rdx+104] mov ebp, [rdx+108] - ALIGN(64) +ALIGN(64) FN_PREFIX(CryptonightR_template_mainloop): +FN_PREFIX(CryptonightR_template_mainloop2): movdqa xmm5, XMMWORD PTR [r9+r11] movq xmm0, r15 movq xmm4, rsp @@ -156,7 +157,7 @@ FN_PREFIX(CryptonightR_template_part2): xor r15, r14 movdqa xmm6, xmm5 dec r8d - jnz FN_PREFIX(CryptonightR_template_mainloop) + jnz FN_PREFIX(CryptonightR_template_mainloop2) FN_PREFIX(CryptonightR_template_part3): movq rsp, xmm9 @@ -252,12 +253,13 @@ FN_PREFIX(CryptonightR_template_double_part1): mov r11d, 524288 movq xmm10, rax punpcklqdq xmm10, xmm0 - + movq xmm14, QWORD PTR [rsp+128] movq xmm15, QWORD PTR [rsp+136] - ALIGN(64) +ALIGN(64) FN_PREFIX(CryptonightR_template_double_mainloop): +FN_PREFIX(CryptonightR_template_double_mainloop2): movdqu xmm6, XMMWORD PTR [rbx+rsi] movq xmm0, r12 mov ecx, ebx @@ -502,7 +504,7 @@ FN_PREFIX(CryptonightR_template_double_part3): xor r13, r9 and r8d, 2097136 dec r11d - jnz FN_PREFIX(CryptonightR_template_double_mainloop) + jnz FN_PREFIX(CryptonightR_template_double_mainloop2) FN_PREFIX(CryptonightR_template_double_part4): diff --git a/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightWOW_template.inc b/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightWOW_template.inc index 7183a659f..e2b5dd347 100644 --- a/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightWOW_template.inc +++ b/xmrstak/backend/cpu/crypto/asm/cnR/CryptonightWOW_template.inc @@ -60,8 +60,9 @@ FN_PREFIX(CryptonightWOW_template_part1): mov edi, [rdx+104] mov ebp, [rdx+108] - ALIGN(64) +ALIGN(64) FN_PREFIX(CryptonightWOW_template_mainloop): +FN_PREFIX(CryptonightWOW_template_mainloop2): movdqa xmm5, XMMWORD PTR [r9+r11] movq xmm0, r15 movq xmm4, rsp @@ -140,7 +141,7 @@ FN_PREFIX(CryptonightWOW_template_part2): xor r15, r14 movdqa xmm6, xmm5 dec r8d - jnz FN_PREFIX(CryptonightWOW_template_mainloop) + jnz FN_PREFIX(CryptonightWOW_template_mainloop2) FN_PREFIX(CryptonightWOW_template_part3): movq rsp, xmm9 @@ -236,12 +237,13 @@ FN_PREFIX(CryptonightWOW_template_double_part1): mov r11d, 524288 movq xmm10, rax punpcklqdq xmm10, xmm0 - + movq xmm14, QWORD PTR [rsp+128] movq xmm15, QWORD PTR [rsp+136] - ALIGN(64) +ALIGN(64) FN_PREFIX(CryptonightWOW_template_double_mainloop): +FN_PREFIX(CryptonightWOW_template_double_mainloop2): movdqu xmm6, XMMWORD PTR [rbx+rsi] movq xmm0, r12 mov ecx, ebx @@ -459,7 +461,7 @@ FN_PREFIX(CryptonightWOW_template_double_part3): xor r13, r9 and r8d, 2097136 dec r11d - jnz FN_PREFIX(CryptonightWOW_template_double_mainloop) + jnz FN_PREFIX(CryptonightWOW_template_double_mainloop2) FN_PREFIX(CryptonightWOW_template_double_part4): diff --git a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp index fe77b6f81..906701893 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp +++ b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp @@ -36,7 +36,6 @@ typedef struct { size_t free_device_memory; size_t total_device_memory; - CUdevice cuDevice; CUcontext cuContext; CUmodule module = nullptr; CUfunction kernel = nullptr; diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index 7a9ccddc2..32d6eeadb 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -267,9 +267,6 @@ extern "C" void cryptonight_extra_cpu_set_data( nvid_ctx* ctx, const void *data, extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) { - CU_CHECK(ctx->device_id, cuDeviceGet(&ctx->cuDevice, ctx->device_id)); - CU_CHECK(ctx->device_id, cuCtxCreate(&ctx->cuContext, 0, ctx->cuDevice)); - cudaError_t err; err = cudaSetDevice(ctx->device_id); if(err != cudaSuccess) @@ -309,6 +306,9 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) size_t wsize = ctx->device_blocks * ctx->device_threads; CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_state, 50 * sizeof(uint32_t) * wsize)); + // get the cudaRT context + CU_CHECK(ctx->device_id, cuCtxGetCurrent(&ctx->cuContext)); + size_t ctx_b_size = 4 * sizeof(uint32_t) * wsize; if( std::find(neededAlgorithms.begin(), neededAlgorithms.end(), cryptonight_heavy) != neededAlgorithms.end() || From 2d9087c74b562117719944f8865c0f57cc9fb1e6 Mon Sep 17 00:00:00 2001 From: EDDragonWolf Date: Thu, 7 Mar 2019 21:50:57 +0100 Subject: [PATCH 7/9] Support of CryptoNight v8 ReverseWaltz rebased version of #2261 Added support of CryptoNight v8 Reverse Waltz (named cryptonight_v8_reversewaltz here) - equal to CryptoNight v8 but with 3/4 iterations of CryptoNight v8 and with reversed shuffle operation We plan to use CryptoNight v8 Reverse Waltz as new PoW algorithm for Graft (graft-project/GraftNetwork#234). --- README.md | 1 + xmrstak/backend/amd/amd_gpu/gpu.cpp | 2 +- .../backend/amd/amd_gpu/opencl/cryptonight.cl | 33 +++++++++---- .../amd/amd_gpu/opencl/fast_int_math_v2.cl | 2 +- .../backend/cpu/crypto/cryptonight_aesni.h | 31 ++++++++++-- xmrstak/backend/cpu/minethd.cpp | 20 +++++++- xmrstak/backend/cpu/minethd.hpp | 2 +- xmrstak/backend/cryptonight.hpp | 13 +++-- xmrstak/backend/nvidia/nvcc_code/cuda_core.cu | 47 ++++++++++++++++--- .../backend/nvidia/nvcc_code/cuda_extra.cu | 13 +++-- xmrstak/jconf.cpp | 3 +- xmrstak/pools.tpl | 1 + 12 files changed, 135 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index ff87dcead..a7eee6041 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,7 @@ If your prefered coin is not listed, you can choose one of the following algorit - cryptonight_v7_stellite - cryptonight_v8 - cryptonight_v8_half (used by masari and stellite) + - cryptonight_v8_reversewaltz (used by graft) - cryptonight_v8_zelerius - 4MiB scratchpad memory - cryptonight_haven diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index 1b016e84f..ace1c34bb 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -294,7 +294,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ * this is required if the dev pool is mining monero * but the user tuned there settings for another currency */ - if(miner_algo == cryptonight_monero_v8) + if(miner_algo == cryptonight_monero_v8 || miner_algo == cryptonight_v8_reversewaltz) { if(ctx->memChunk < 2) mem_chunk_exp = 1u << 2; diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl index 2ca09c31c..b78f2bcf7 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl @@ -30,6 +30,7 @@ R"===( #define cryptonight_superfast 12 #define cryptonight_gpu 13 #define cryptonight_conceal 14 +#define cryptonight_v8_reversewaltz 17 /* For Mesa clover support */ #ifdef cl_clang_storage_class_specifiers @@ -639,7 +640,7 @@ __kernel void JOIN(cn0,ALGO)(__global ulong *input, __global uint4 *Scratchpad, R"===( // __NV_CL_C_VERSION checks if NVIDIA opencl is used -#if(ALGO == cryptonight_monero_v8 && defined(__NV_CL_C_VERSION)) +#if((ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) && defined(__NV_CL_C_VERSION)) # define SCRATCHPAD_CHUNK(N) (*(__local uint4*)((__local uchar*)(scratchpad_line) + (idxS ^ (N << 4)))) # define SCRATCHPAD_CHUNK_GLOBAL (*((__global uint16*)(Scratchpad + (IDX((idx0 & 0x1FFFC0U) >> 4))))) #else @@ -659,7 +660,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states float4 conc_var = (float4)(0.0f); #endif -#if(ALGO == cryptonight_monero_v8) +#if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) ulong b[4]; uint4 b_x[2]; // NVIDIA @@ -673,7 +674,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states #endif __local uint AES0[256], AES1[256]; -#if(ALGO == cryptonight_monero_v8) +#if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) # if defined(__clang__) && !defined(__NV_CL_C_VERSION) __local uint RCP[256]; # endif @@ -689,7 +690,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states AES0[i] = tmp; AES1[i] = rotate(tmp, 8U); -#if(ALGO == cryptonight_monero_v8 && (defined(__clang__) && !defined(__NV_CL_C_VERSION))) +#if((ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) && (defined(__clang__) && !defined(__NV_CL_C_VERSION))) RCP[i] = RCP_C[i]; #endif } @@ -723,7 +724,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states b_x[0] = ((uint4 *)b)[0]; -#if(ALGO == cryptonight_monero_v8) +#if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) a[1] = states[1] ^ states[5]; b[2] = states[8] ^ states[10]; b[3] = states[9] ^ states[11]; @@ -755,7 +756,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states { ulong c[2]; -#if(ALGO == cryptonight_monero_v8 && defined(__NV_CL_C_VERSION)) +#if((ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) && defined(__NV_CL_C_VERSION)) uint idxS = idx0 & 0x30U; *scratchpad_line = SCRATCHPAD_CHUNK_GLOBAL; #endif @@ -792,6 +793,15 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states SCRATCHPAD_CHUNK(2) = as_uint4(chunk1 + ((ulong2 *)b_x)[0]); SCRATCHPAD_CHUNK(3) = as_uint4(chunk2 + ((ulong2 *)a)[0]); } +#elif(ALGO == cryptonight_v8_reversewaltz) + { + ulong2 chunk3 = as_ulong2(SCRATCHPAD_CHUNK(1)); + ulong2 chunk2 = as_ulong2(SCRATCHPAD_CHUNK(2)); + ulong2 chunk1 = as_ulong2(SCRATCHPAD_CHUNK(3)); + SCRATCHPAD_CHUNK(1) = as_uint4(chunk3 + ((ulong2 *)(b_x + 1))[0]); + SCRATCHPAD_CHUNK(2) = as_uint4(chunk1 + ((ulong2 *)b_x)[0]); + SCRATCHPAD_CHUNK(3) = as_uint4(chunk2 + ((ulong2 *)a)[0]); + } #endif #if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) @@ -807,7 +817,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states SCRATCHPAD_CHUNK(0) = b_x[0]; idx0 = as_uint2(c[0]).s0 & MASK; -#elif(ALGO == cryptonight_monero_v8) +#elif(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) SCRATCHPAD_CHUNK(0) = b_x[0] ^ ((uint4 *)c)[0]; # ifdef __NV_CL_C_VERSION // flush shuffled data @@ -826,7 +836,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states uint4 tmp; tmp = SCRATCHPAD_CHUNK(0); -#if(ALGO == cryptonight_monero_v8) +#if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) // Use division and square root results from the _previous_ iteration to hide the latency tmp.s0 ^= division_result.s0; tmp.s1 ^= division_result.s1 ^ sqrt_result; @@ -853,8 +863,13 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states ulong2 chunk2 = as_ulong2(SCRATCHPAD_CHUNK(2)); result_mul ^= chunk2; ulong2 chunk3 = as_ulong2(SCRATCHPAD_CHUNK(3)); +#if(ALGO == cryptonight_v8_reversewaltz) + SCRATCHPAD_CHUNK(1) = as_uint4(chunk1 + ((ulong2 *)(b_x + 1))[0]); + SCRATCHPAD_CHUNK(2) = as_uint4(chunk3 + ((ulong2 *)b_x)[0]); +#else SCRATCHPAD_CHUNK(1) = as_uint4(chunk3 + ((ulong2 *)(b_x + 1))[0]); SCRATCHPAD_CHUNK(2) = as_uint4(chunk1 + ((ulong2 *)b_x)[0]); +#endif SCRATCHPAD_CHUNK(3) = as_uint4(chunk2 + ((ulong2 *)a)[0]); a[0] += result_mul.s0; a[1] += result_mul.s1; @@ -882,7 +897,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states ((uint4 *)a)[0] ^= tmp; -#if (ALGO == cryptonight_monero_v8) +#if (ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) # if defined(__NV_CL_C_VERSION) // flush shuffled data SCRATCHPAD_CHUNK_GLOBAL = *scratchpad_line; diff --git a/xmrstak/backend/amd/amd_gpu/opencl/fast_int_math_v2.cl b/xmrstak/backend/amd/amd_gpu/opencl/fast_int_math_v2.cl index 8878db618..2b34b761c 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/fast_int_math_v2.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/fast_int_math_v2.cl @@ -3,7 +3,7 @@ R"===( * @author SChernykh */ -#if(ALGO == cryptonight_monero_v8) +#if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) static const __constant uint RCP_C[256] = { diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index ed8ed8ace..22fd0f481 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -638,6 +638,16 @@ inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var) _mm_store_si128((__m128i *)&l0[idx1 ^ 0x30], _mm_add_epi64(chunk2, ax0)); \ if (ALGO == cryptonight_r) \ cx = _mm_xor_si128(_mm_xor_si128(cx, chunk3), _mm_xor_si128(chunk1, chunk2)); \ + } \ + if(ALGO == cryptonight_v8_reversewaltz) \ + { \ + const uint64_t idx1 = idx0 & MASK; \ + const __m128i chunk3 = _mm_load_si128((__m128i *)&l0[idx1 ^ 0x10]); \ + const __m128i chunk2 = _mm_load_si128((__m128i *)&l0[idx1 ^ 0x20]); \ + const __m128i chunk1 = _mm_load_si128((__m128i *)&l0[idx1 ^ 0x30]); \ + _mm_store_si128((__m128i *)&l0[idx1 ^ 0x10], _mm_add_epi64(chunk3, bx1)); \ + _mm_store_si128((__m128i *)&l0[idx1 ^ 0x20], _mm_add_epi64(chunk1, bx0)); \ + _mm_store_si128((__m128i *)&l0[idx1 ^ 0x30], _mm_add_epi64(chunk2, ax0)); \ } #define CN_MONERO_V8_SHUFFLE_1(n, l0, idx0, ax0, bx0, bx1, lo, hi) \ @@ -653,10 +663,22 @@ inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var) _mm_store_si128((__m128i *)&l0[idx1 ^ 0x10], _mm_add_epi64(chunk3, bx1)); \ _mm_store_si128((__m128i *)&l0[idx1 ^ 0x20], _mm_add_epi64(chunk1, bx0)); \ _mm_store_si128((__m128i *)&l0[idx1 ^ 0x30], _mm_add_epi64(chunk2, ax0)); \ + } \ + if(ALGO == cryptonight_v8_reversewaltz) \ + { \ + const uint64_t idx1 = idx0 & MASK; \ + const __m128i chunk3 = _mm_xor_si128(_mm_load_si128((__m128i *)&l0[idx1 ^ 0x10]), _mm_set_epi64x(lo, hi)); \ + const __m128i chunk2 = _mm_load_si128((__m128i *)&l0[idx1 ^ 0x20]); \ + hi ^= ((uint64_t*)&chunk2)[0]; \ + lo ^= ((uint64_t*)&chunk2)[1]; \ + const __m128i chunk1 = _mm_load_si128((__m128i *)&l0[idx1 ^ 0x30]); \ + _mm_store_si128((__m128i *)&l0[idx1 ^ 0x10], _mm_add_epi64(chunk3, bx1)); \ + _mm_store_si128((__m128i *)&l0[idx1 ^ 0x20], _mm_add_epi64(chunk1, bx0)); \ + _mm_store_si128((__m128i *)&l0[idx1 ^ 0x30], _mm_add_epi64(chunk2, ax0)); \ } #define CN_MONERO_V8_DIV(n, cx, sqrt_result, division_result_xmm, cl) \ - if(ALGO == cryptonight_monero_v8) \ + if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) \ { \ uint64_t sqrt_result_tmp; \ assign(sqrt_result_tmp, sqrt_result); \ @@ -735,7 +757,7 @@ inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var) idx0 = h0[0] ^ h0[4]; \ ax0 = _mm_set_epi64x(h0[1] ^ h0[5], idx0); \ bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); \ - if(ALGO == cryptonight_monero_v8) \ + if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) \ { \ bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]); \ division_result_xmm = _mm_cvtsi64_si128(h0[12]); \ @@ -782,7 +804,7 @@ inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var) ptr0 = (__m128i *)&l0[idx0 & MASK]; \ if(PREFETCH) \ _mm_prefetch((const char*)ptr0, _MM_HINT_T0); \ - if(ALGO != cryptonight_monero_v8 && ALGO != cryptonight_r && ALGO != cryptonight_r_wow) \ + if(ALGO != cryptonight_monero_v8 && ALGO != cryptonight_r && ALGO != cryptonight_r_wow && ALGO != cryptonight_v8_reversewaltz) \ bx0 = cx #define CN_STEP3(n, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0, cx, bx1, sqrt_result, division_result_xmm, cn_r_data) \ @@ -807,7 +829,7 @@ inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var) ah0 += lo; \ al0 += hi; \ } \ - if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_r || ALGO == cryptonight_r_wow) \ + if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_r || ALGO == cryptonight_r_wow || ALGO == cryptonight_v8_reversewaltz) \ { \ bx1 = bx0; \ bx0 = cx; \ @@ -1081,7 +1103,6 @@ struct Cryptonight_hash_asm keccak((const uint8_t *)input + len * i, len, ctx[i]->hash_state, 200); cn_explode_scratchpad((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state, algo); } - if(ALGO == cryptonight_r) { // API ATTRIBUTE is only required for cryptonight_r diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 8d67a73d1..2b8b0e18d 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -469,6 +469,16 @@ bool minethd::self_test() ctx[0]->hash_fn("\x54\x68\x69\x73\x20\x69\x73\x20\x61\x20\x74\x65\x73\x74\x20\x54\x68\x69\x73\x20\x69\x73\x20\x61\x20\x74\x65\x73\x74\x20\x54\x68\x69\x73\x20\x69\x73\x20\x61\x20\x74\x65\x73\x74", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xf7\x59\x58\x8a\xd5\x7e\x75\x84\x67\x29\x54\x43\xa9\xbd\x71\x49\x0a\xbf\xf8\xe9\xda\xd1\xb9\x5b\x6b\xf2\xf5\xd0\xd7\x83\x87\xbc", 32) == 0; } + else if(algo == POW(cryptonight_v8_reversewaltz)) + { + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), false, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); + bResult = memcmp(out, "\x32\xf7\x36\xec\x1d\x2f\x3f\xc5\x4c\x49\xbe\xb8\xa0\x47\x6c\xbf\xdd\x14\xc3\x51\xb9\xc6\xd7\x2c\x6f\x9f\xfc\xb5\x87\x5b\xe6\xb3", 32) == 0; + + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), true, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); + bResult &= memcmp(out, "\x32\xf7\x36\xec\x1d\x2f\x3f\xc5\x4c\x49\xbe\xb8\xa0\x47\x6c\xbf\xdd\x14\xc3\x51\xb9\xc6\xd7\x2c\x6f\x9f\xfc\xb5\x87\x5b\xe6\xb3", 32) == 0; + } else printer::inst()->print_msg(L0, "Cryptonight hash self-test NOT defined for POW %s", algo.Name().c_str()); @@ -610,6 +620,9 @@ void minethd::func_multi_selector(cryptonight_ctx** ctx, minethd::cn_on_new_job& case cryptonight_r: algv = 14; break; + case cryptonight_v8_reversewaltz: + algv = 15; + break; default: algv = 2; break; @@ -689,7 +702,12 @@ void minethd::func_multi_selector(cryptonight_ctx** ctx, minethd::cn_on_new_job& Cryptonight_hash::template hash, Cryptonight_hash::template hash, Cryptonight_hash::template hash, - Cryptonight_hash::template hash + Cryptonight_hash::template hash, + + Cryptonight_hash::template hash, + Cryptonight_hash::template hash, + Cryptonight_hash::template hash, + Cryptonight_hash::template hash }; std::bitset<2> digit; diff --git a/xmrstak/backend/cpu/minethd.hpp b/xmrstak/backend/cpu/minethd.hpp index 2d128a080..1e25f5d4f 100644 --- a/xmrstak/backend/cpu/minethd.hpp +++ b/xmrstak/backend/cpu/minethd.hpp @@ -34,7 +34,7 @@ class minethd : public iBackend bool bHaveAes, bool bNoPrefetch, const xmrstak_algo& algo, const std::string& asm_version_str = "off"); private: - + minethd(miner_work& pWork, size_t iNo, int iMultiway, bool no_prefetch, int64_t affinity, const std::string& asm_version); template diff --git a/xmrstak/backend/cryptonight.hpp b/xmrstak/backend/cryptonight.hpp index addf8ea48..4f5d88dea 100644 --- a/xmrstak/backend/cryptonight.hpp +++ b/xmrstak/backend/cryptonight.hpp @@ -26,6 +26,7 @@ enum xmrstak_algo_id cryptonight_conceal = 14, cryptonight_r_wow = 15, cryptonight_r = 16, + cryptonight_v8_reversewaltz = 17, //equal to cryptonight_monero_v8 but with 3/4 iterations and reversed shuffle operation cryptonight_turtle = start_derived_algo_id, cryptonight_v8_half = (start_derived_algo_id + 1), @@ -39,7 +40,7 @@ enum xmrstak_algo_id */ inline std::string get_algo_name(xmrstak_algo_id algo_id) { - static std::array base_algo_names = + static std::array base_algo_names = {{ "invalid_algo", "cryptonight", @@ -57,7 +58,8 @@ inline std::string get_algo_name(xmrstak_algo_id algo_id) "cryptonight_gpu", "cryptonight_conceal", "cryptonight_r_wow", - "cryptonight_r" + "cryptonight_r", + "cryptonight_v8_reversewaltz" // used by graft }}; static std::array derived_algo_names = @@ -177,9 +179,11 @@ constexpr uint32_t CN_TURTLE_MASK = 0x1FFF0; constexpr uint32_t CN_ZELERIUS_ITER = 0x60000; +constexpr uint32_t CN_WALTZ_ITER = 0x60000; + inline xmrstak_algo POW(xmrstak_algo_id algo_id) { - static std::array pow = {{ + static std::array pow = {{ {invalid_algo, invalid_algo}, {cryptonight, cryptonight, CN_ITER, CN_MEMORY}, {cryptonight_lite, cryptonight_lite, CN_ITER/2, CN_MEMORY/2}, @@ -196,7 +200,8 @@ inline xmrstak_algo POW(xmrstak_algo_id algo_id) {cryptonight_gpu, cryptonight_gpu, CN_GPU_ITER, CN_MEMORY, CN_GPU_MASK}, {cryptonight_conceal, cryptonight_conceal, CN_ITER/2, CN_MEMORY}, {cryptonight_r_wow, cryptonight_r_wow, CN_ITER, CN_MEMORY}, - {cryptonight_r, cryptonight_r, CN_ITER, CN_MEMORY} + {cryptonight_r, cryptonight_r, CN_ITER, CN_MEMORY}, + {cryptonight_v8_reversewaltz, cryptonight_v8_reversewaltz, CN_WALTZ_ITER, CN_MEMORY} }}; static std::array derived_pow = diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu index d082f3362..3c62bd090 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu @@ -315,7 +315,7 @@ __global__ void cryptonight_core_gpu_phase2_double( uint64_t bx1; uint32_t sqrt_result; uint64_t division_result; - if(ALGO == cryptonight_monero_v8) + if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) { bx0 = ((uint64_t*)(d_ctx_b + thread * 16))[sub]; bx1 = ((uint64_t*)(d_ctx_b + thread * 16 + 4))[sub]; @@ -370,6 +370,21 @@ __global__ void cryptonight_core_gpu_phase2_double( myChunks[ idx1 ^ 4 + sub ] = chunk1 + bx0; myChunks[ idx1 ^ 6 + sub ] = chunk2 + ax0; } + else if(ALGO == cryptonight_v8_reversewaltz) + { + + const uint64_t chunk3 = myChunks[ idx1 ^ 2 + sub ]; + const uint64_t chunk2 = myChunks[ idx1 ^ 4 + sub ]; + const uint64_t chunk1 = myChunks[ idx1 ^ 6 + sub ]; +#if (__CUDACC_VER_MAJOR__ >= 9) + __syncwarp(); +#else + __syncthreads( ); +#endif + myChunks[ idx1 ^ 2 + sub ] = chunk3 + bx1; + myChunks[ idx1 ^ 4 + sub ] = chunk1 + bx0; + myChunks[ idx1 ^ 6 + sub ] = chunk2 + ax0; + } myChunks[ idx1 + sub ] = cx_aes ^ bx0; if(MEM_MODE == 0) @@ -398,14 +413,14 @@ __global__ void cryptonight_core_gpu_phase2_double( else ((ulonglong4*)myChunks)[sub] = ((ulonglong4*)ptr0)[sub]; - if(ALGO != cryptonight_monero_v8) + if(ALGO != cryptonight_monero_v8 && ALGO != cryptonight_v8_reversewaltz) bx0 = cx_aes; uint64_t cx_mul; ((uint32_t*)&cx_mul)[0] = shuffle<2>(sPtr, sub, cx_aes.x , 0); ((uint32_t*)&cx_mul)[1] = shuffle<2>(sPtr, sub, cx_aes.y , 0); - if((ALGO == cryptonight_monero_v8) && sub == 1) + if((ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) && sub == 1) { // Use division and square root results from the _previous_ iteration to hide the latency ((uint32_t*)&division_result)[1] ^= sqrt_result; @@ -439,6 +454,21 @@ __global__ void cryptonight_core_gpu_phase2_double( __syncwarp(); #else __syncthreads( ); +#endif + myChunks[ idx1 ^ 2 + sub ] = chunk3 + bx1; + myChunks[ idx1 ^ 4 + sub ] = chunk1 + bx0; + myChunks[ idx1 ^ 6 + sub ] = chunk2 + ax0; + } + if(ALGO == cryptonight_v8_reversewaltz) + { + const uint64_t chunk3 = myChunks[ idx1 ^ 2 + sub ] ^ res; + uint64_t chunk2 = myChunks[ idx1 ^ 4 + sub ]; + res ^= ((uint64_t*)&chunk2)[0]; + const uint64_t chunk1 = myChunks[ idx1 ^ 6 + sub ]; +#if (__CUDACC_VER_MAJOR__ >= 9) + __syncwarp(); +#else + __syncthreads( ); #endif myChunks[ idx1 ^ 2 + sub ] = chunk3 + bx1; myChunks[ idx1 ^ 4 + sub ] = chunk1 + bx0; @@ -446,7 +476,7 @@ __global__ void cryptonight_core_gpu_phase2_double( } ax0 += res; } - if(ALGO == cryptonight_monero_v8) + if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) { bx1 = bx0; bx0 = cx_aes; @@ -469,7 +499,7 @@ __global__ void cryptonight_core_gpu_phase2_double( if ( bfactor > 0 ) { ((uint64_t*)(d_ctx_a + thread * 4))[sub] = ax0; - if(ALGO == cryptonight_monero_v8) + if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) { ((uint64_t*)(d_ctx_b + thread * 16))[sub] = bx0; ((uint64_t*)(d_ctx_b + thread * 16 + 4))[sub] = bx1; @@ -815,7 +845,7 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce, const xmrstak_algo for ( int i = 0; i < partcount; i++ ) { - if(ALGO == cryptonight_monero_v8) + if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) { // two threads per block CUDA_CHECK_MSG_KERNEL( @@ -1068,7 +1098,10 @@ void cryptonight_core_cpu_hash(nvid_ctx* ctx, const xmrstak_algo& miner_algo, ui cryptonight_core_gpu_hash, cryptonight_core_gpu_hash, - cryptonight_core_gpu_hash + cryptonight_core_gpu_hash, + + cryptonight_core_gpu_hash, + cryptonight_core_gpu_hash }; std::bitset<1> digit; diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index 32d6eeadb..e909e2fa3 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -127,7 +127,7 @@ __global__ void cryptonight_extra_gpu_prepare( int threads, uint32_t * __restric XOR_BLOCKS_DST( ctx_state, ctx_state + 8, ctx_a ); XOR_BLOCKS_DST( ctx_state + 4, ctx_state + 12, ctx_b ); memcpy( d_ctx_a + thread * 4, ctx_a, 4 * 4 ); - if(ALGO == cryptonight_monero_v8) + if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) { memcpy( d_ctx_b + thread * 16, ctx_b, 4 * 4 ); // bx1 @@ -326,7 +326,8 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) { ctx_b_size += sizeof(uint32_t) * 4 * wsize; } - else if(std::find(neededAlgorithms.begin(), neededAlgorithms.end(), cryptonight_monero_v8) != neededAlgorithms.end()) + else if((std::find(neededAlgorithms.begin(), neededAlgorithms.end(), cryptonight_monero_v8) != neededAlgorithms.end()) + || (std::find(neededAlgorithms.begin(), neededAlgorithms.end(), cryptonight_v8_reversewaltz) != neededAlgorithms.end())) { // bx0 (16byte), bx1 (16byte), division_result (8byte) and sqrt_result (8byte), padding (16byte) ctx_b_size = 4 * 4 * sizeof(uint32_t) * wsize; @@ -406,6 +407,11 @@ extern "C" void cryptonight_extra_cpu_prepare(nvid_ctx* ctx, uint32_t startNonce CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_prepare<<>>( wsize, ctx->d_input, ctx->inputlen, startNonce, ctx->d_ctx_state,ctx->d_ctx_state2, ctx->d_ctx_a, ctx->d_ctx_b, ctx->d_ctx_key1, ctx->d_ctx_key2 )); } + else if(miner_algo == cryptonight_v8_reversewaltz) + { + CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_prepare<<>>( wsize, ctx->d_input, ctx->inputlen, startNonce, + ctx->d_ctx_state,ctx->d_ctx_state2, ctx->d_ctx_a, ctx->d_ctx_b, ctx->d_ctx_key1, ctx->d_ctx_key2 )); + } else { /* pass two times d_ctx_state because the second state is used later in phase1, @@ -776,9 +782,10 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) bool useCryptonight_v8 = (std::find(neededAlgorithms.begin(), neededAlgorithms.end(), cryptonight_monero_v8) != neededAlgorithms.end()); bool useCryptonight_r = (std::find(neededAlgorithms.begin(), neededAlgorithms.end(), cryptonight_r) != neededAlgorithms.end()); bool useCryptonight_r_wow = (std::find(neededAlgorithms.begin(), neededAlgorithms.end(), cryptonight_r_wow) != neededAlgorithms.end()); + bool useCryptonight_reversewaltz = (std::find(neededAlgorithms.begin(), neededAlgorithms.end(), cryptonight_v8_reversewaltz) != neededAlgorithms.end()); // overwrite default config if cryptonight_monero_v8 is mined and GPU has at least compute capability 5.0 - if((useCryptonight_v8 || useCryptonight_r || useCryptonight_r_wow) && gpuArch >= 50) + if((useCryptonight_v8 || useCryptonight_r || useCryptonight_r_wow || useCryptonight_reversewaltz) && gpuArch >= 50) { // 4 based on my test maybe it must be adjusted later size_t threads = 4; diff --git a/xmrstak/jconf.cpp b/xmrstak/jconf.cpp index e60420234..2b22a2fb9 100644 --- a/xmrstak/jconf.cpp +++ b/xmrstak/jconf.cpp @@ -104,12 +104,13 @@ xmrstak::coin_selection coins[] = { { "cryptonight_v7", {POW(cryptonight_monero)}, {POW(cryptonight_gpu)}, nullptr }, { "cryptonight_v8", {POW(cryptonight_monero_v8)}, {POW(cryptonight_r),10,POW(cryptonight_monero_v8)}, nullptr }, { "cryptonight_v8_half", {POW(cryptonight_v8_half)}, {POW(cryptonight_gpu)}, nullptr }, + { "cryptonight_v8_reversewaltz", {POW(cryptonight_v8_reversewaltz)}, {POW(cryptonight_gpu)}, nullptr }, { "cryptonight_v8_zelerius", {POW(cryptonight_v8_zelerius)},{POW(cryptonight_gpu)}, nullptr }, { "cryptonight_v7_stellite", {POW(cryptonight_stellite)}, {POW(cryptonight_gpu)}, nullptr }, { "cryptonight_gpu", {POW(cryptonight_gpu)}, {POW(cryptonight_gpu)}, "pool.ryo-currency.com:3333" }, { "cryptonight_conceal", {POW(cryptonight_conceal)}, {POW(cryptonight_gpu)}, nullptr }, { "freehaven", {POW(cryptonight_superfast)}, {POW(cryptonight_gpu)}, nullptr }, - { "graft", {POW(cryptonight_monero_v8)}, {POW(cryptonight_gpu)}, nullptr }, + { "graft", {POW(cryptonight_v8_reversewaltz), 12, POW(cryptonight_monero_v8)}, {POW(cryptonight_gpu)}, nullptr }, { "haven", {POW(cryptonight_haven)}, {POW(cryptonight_gpu)}, nullptr }, { "lethean", {POW(cryptonight_monero)}, {POW(cryptonight_gpu)}, nullptr }, { "masari", {POW(cryptonight_v8_half)}, {POW(cryptonight_gpu)}, nullptr }, diff --git a/xmrstak/pools.tpl b/xmrstak/pools.tpl index f8f1d7d6c..891fb8d78 100644 --- a/xmrstak/pools.tpl +++ b/xmrstak/pools.tpl @@ -50,6 +50,7 @@ POOLCONF], * cryptonight_v7 * cryptonight_v8 * cryptonight_v8_half (used by masari and stellite) + * cryptonight_v8_reversewaltz (used by graft) * cryptonight_v8_zelerius * # 4MiB scratchpad memory * cryptonight_bittube2 From 968beb778dbd7792c6e01441a6121c69745dff7b Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Thu, 7 Mar 2019 22:04:12 +0100 Subject: [PATCH 8/9] increase version to 2.10.0 --- xmrstak/version.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xmrstak/version.cpp b/xmrstak/version.cpp index bcaebf4e0..334a246bd 100644 --- a/xmrstak/version.cpp +++ b/xmrstak/version.cpp @@ -18,7 +18,7 @@ #endif #define XMR_STAK_NAME "xmr-stak" -#define XMR_STAK_VERSION "2.9.0" +#define XMR_STAK_VERSION "2.10.0" #if defined(_WIN32) #define OS_TYPE "win" From 28ae703b5f63742e053bb03b246fad0b56a1d44a Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Thu, 7 Mar 2019 22:33:30 +0100 Subject: [PATCH 9/9] dev pool change activate nicehash --- xmrstak/misc/executor.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xmrstak/misc/executor.cpp b/xmrstak/misc/executor.cpp index 24e348897..79d4731e6 100644 --- a/xmrstak/misc/executor.cpp +++ b/xmrstak/misc/executor.cpp @@ -368,7 +368,7 @@ void executor::on_pool_have_job(size_t pool_id, pool_job& oPoolJob) dat.iSavedNonce = oPoolJob.iSavedNonce; dat.pool_id = pool_id; - xmrstak::globalStates::inst().switch_work(xmrstak::miner_work(oPoolJob.sJobID, oPoolJob.bWorkBlob, + xmrstak::globalStates::inst().switch_work(xmrstak::miner_work(oPoolJob.sJobID, oPoolJob.bWorkBlob, oPoolJob.iWorkLen, oPoolJob.iTarget, pool->is_nicehash(), pool_id, oPoolJob.iBlockHeight), dat); if(dat.pool_id != pool_id) @@ -584,9 +584,9 @@ void executor::ex_main() break; default: if(dev_tls) - pools.emplace_front(0, "donate.xmr-stak.net:6666", "", "", "", 0.0, true, true, "", false); + pools.emplace_front(0, "donate.xmr-stak.net:6666", "", "", "", 0.0, true, true, "", true); else - pools.emplace_front(0, "donate.xmr-stak.net:3333", "", "", "", 0.0, true, false, "", false); + pools.emplace_front(0, "donate.xmr-stak.net:3333", "", "", "", 0.0, true, false, "", true); break; }