diff --git a/.appveyor.yml b/.appveyor.yml
index c336842d5..fee8f6524 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -12,7 +12,7 @@ install:
- curl -sL https://github.com/fireice-uk/xmr-stak-dep/releases/download/v1/xmr-stak-dep.zip -o xmr-stak-dep.zip
- 7z x xmr-stak-dep.zip -o"c:\xmr-stak-dep" -y > nul
- appveyor DownloadFile https://developer.nvidia.com/compute/cuda/8.0/prod/local_installers/cuda_8.0.44_windows-exe -FileName cuda_8.0.44_windows.exe
- - cuda_8.0.44_windows.exe -s compiler_8.0 cudart_8.0
+ - cuda_8.0.44_windows.exe -s compiler_8.0 cudart_8.0 nvrtc_8.0 nvrtc_dev_8.0
- set PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v8.0\libnvvp;%PATH%
- nvcc -V
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b714ee0ce..a5c06df8a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -13,6 +13,13 @@ endif(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
# help to find cuda on systems with a software module system
list(APPEND CMAKE_PREFIX_PATH "$ENV{CUDA_ROOT}")
+
+# help to find AMD OCL SDK Light (replaced APP SDK)
+list(APPEND CMAKE_PREFIX_PATH "$ENV{OCL_ROOT}")
+
+# help to find AMD app SDK on systems with a software module system
+list(APPEND CMAKE_PREFIX_PATH "$ENV{AMDAPPSDKROOT}")
+
# allow user to extent CMAKE_PREFIX_PATH via environment variable
list(APPEND CMAKE_PREFIX_PATH "$ENV{CMAKE_PREFIX_PATH}")
@@ -63,6 +70,42 @@ if(CUDA_ENABLE)
find_package(CUDA 7.5)
if(CUDA_FOUND)
+ # required for monero's cryptonight_r
+ # libcuda
+ find_library(CUDA_LIB
+ NAMES
+ libcuda
+ cuda
+ cuda.lib
+ HINTS
+ ${CUDA_TOOLKIT_ROOT_DIR}
+ ${LIBCUDA_LIBRARY_DIR}
+ ${CUDA_TOOLKIT_ROOT_DIR}
+ /usr
+ /usr/local/cuda
+ PATH_SUFFIXES
+ lib64
+ lib/x64
+ lib/Win32
+ lib64/stubs)
+
+ #nvrtc
+ find_library(CUDA_NVRTC_LIB
+ NAMES
+ libnvrtc
+ nvrtc
+ nvrtc.lib
+ HINTS
+ ${CUDA_TOOLKIT_ROOT_DIR}
+ ${LIBNVRTC_LIBRARY_DIR}
+ ${CUDA_TOOLKIT_ROOT_DIR}
+ /usr
+ /usr/local/cuda
+ PATH_SUFFIXES
+ lib64
+ lib/x64
+ lib/Win32)
+
list(APPEND BACKEND_TYPES "nvidia")
option(XMR-STAK_LARGEGRID "Support large CUDA block count > 128" ON)
if(XMR-STAK_LARGEGRID)
@@ -152,6 +195,9 @@ if(CUDA_ENABLE)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -std=c++11")
endif()
+ # required for cryptonight_gpu (fast floating point operations are not allowed)
+ set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --fmad=false --prec-div=true --ftz=false")
+
# avoid that nvcc in CUDA 8 complains about sm_20 pending removal
if(CUDA_VERSION VERSION_EQUAL 8.0)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Wno-deprecated-gpu-targets")
@@ -190,16 +236,11 @@ if(CUDA_ENABLE)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}" "-D_MWAITXINTRIN_H_INCLUDED")
endif()
- if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" AND
- (CUDA_VERSION VERSION_EQUAL 9.0 OR
- CUDA_VERSION VERSION_EQUAL 9.1 OR
- CUDA_VERSION VERSION_EQUAL 9.2 OR
- CUDA_VERSION VERSION_EQUAL 10.0)
- )
- # workaround find_package(CUDA) is using the wrong path to the CXX host compiler
- # overwrite the CUDA host compiler variable with the used CXX MSVC
- set(CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE FILEPATH "Host side compiler used by NVCC" FORCE)
- endif()
+ # workaround find_package(CUDA) is using the wrong path to the CXX host compiler
+ # overwrite the CUDA host compiler variable with the used CXX MSVC
+ # in linux where clang and gcc is installed it also helps to select the correct host compiler
+ set(CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE FILEPATH "Host side compiler used by NVCC" FORCE)
+
else()
message(FATAL_ERROR "selected CUDA compiler '${CUDA_COMPILER}' is not supported")
endif()
@@ -210,11 +251,6 @@ else()
add_definitions("-DCONF_NO_CUDA")
endif()
-# help to find AMD app SDK on systems with a software module system
-list(APPEND CMAKE_PREFIX_PATH "$ENV{AMDAPPSDKROOT}")
-# allow user to extent CMAKE_PREFIX_PATH via environment variable
-list(APPEND CMAKE_PREFIX_PATH "$ENV{CMAKE_PREFIX_PATH}")
-
###############################################################################
# Find OpenCL
###############################################################################
@@ -228,6 +264,7 @@ if(OpenCL_ENABLE)
OpenCL/cl.h
NO_DEFAULT_PATH
PATHS
+ ENV "OCL_ROOT"
ENV "OpenCL_ROOT"
ENV AMDAPPSDKROOT
ENV ATISTREAMSDKROOT
@@ -244,6 +281,7 @@ if(OpenCL_ENABLE)
OpenCL.lib
NO_DEFAULT_PATH
PATHS
+ ENV "OCL_ROOT"
ENV "OpenCL_ROOT"
ENV AMDAPPSDKROOT
ENV ATISTREAMSDKROOT
@@ -279,6 +317,14 @@ else()
list(APPEND BACKEND_TYPES "cpu")
endif()
+################################################################################
+# Explicit march setting for Clang
+################################################################################
+
+if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
+ set_source_files_properties(xmrstak/backend/cpu/crypto/cn_gpu_avx.cpp PROPERTIES COMPILE_FLAGS "-mavx2")
+endif()
+
################################################################################
# Find PThreads
################################################################################
@@ -532,6 +578,8 @@ if(CUDA_FOUND)
${CUDASRCFILES}
)
endif()
+
+ set(CUDA_LIBRARIES ${CUDA_LIB} ${CUDA_NVRTC_LIB} ${CUDA_LIBRARIES})
target_link_libraries(xmrstak_cuda_backend ${CUDA_LIBRARIES})
target_link_libraries(xmrstak_cuda_backend xmr-stak-backend xmr-stak-asm)
endif()
diff --git a/README.md b/README.md
index 61e6ccede..6327e049a 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
###### fireice-uk's and psychocrypt's
# XMR-Stak - Cryptonight All-in-One Mining Software
-**You must update to version [2.5.1-hide-2.0.0+](https://github.com/rapid821/xmr-stak-hide/releases) before October 18th 2018, if you want to mine Monero.**
+**You must update to version [2.9.0-hide-3.0.0+](https://github.com/rapid821/xmr-stak-hide/releases) before March 9th 2019, if you want to mine Monero.**
XMR-Stak is a universal Stratum pool miner. This miner supports CPUs, AMD and NVIDIA GPUs and can be used to mine the crypto currencies Monero, Aeon and many more Cryptonight coins.
@@ -9,6 +9,8 @@ In addition to the regular XMR-Stak you can add the --hide paramater to your win
If you have any question, just ceate an issue [here](https://github.com/rapid821/xmr-stak-hide/issues).
+XMR-Stak is a universal Stratum pool miner. This miner supports CPUs, AMD and NVIDIA GPUs and can be used to mine the crypto currencies Monero, Aeon and many more Cryptonight coins.
+
## HTML reports
@@ -47,29 +49,37 @@ Besides [Monero](https://getmonero.org), following coins can be mined using this
- [Aeon](http://www.aeon.cash)
- [BBSCoin](https://www.bbscoin.xyz)
- [BitTube](https://coin.bit.tube/)
+- [Conceal](https://conceal.network)
- [Graft](https://www.graft.network)
- [Haven](https://havenprotocol.com)
-- [Intense](https://intensecoin.com)
+- [Lethean](https://lethean.io)
- [Masari](https://getmasari.org)
+- [Plenteum](https://www.plenteum.com/)
- [QRL](https://theqrl.org)
- **[Ryo](https://ryo-currency.com) - Upcoming xmr-stak-gui is sponsored by Ryo**
+- [Stellite](https://stellite.cash/)
- [TurtleCoin](https://turtlecoin.lol)
+- [Zelerius](https://zelerius.org/)
Ryo currency is a way for us to implement the ideas that we were unable to in
Monero. See [here](https://github.com/fireice-uk/cryptonote-speedup-demo/) for details.
If your prefered coin is not listed, you can choose one of the following algorithms:
-
+- 256Kib scratchpad memory
+ - cryptonight_turtle
- 1MiB scratchpad memory
- cryptonight_lite
- cryptonight_lite_v7
- cryptonight_lite_v7_xor (algorithm used by ipbc)
- 2MiB scratchpad memory
- cryptonight
- - cryptonight_masari
+ - cryptonight_gpu (for Ryo's 14th of Feb fork)
+ - cryptonight_masari (used in 2018)
- cryptonight_v7
- cryptonight_v7_stellite
- cryptonight_v8
+ - cryptonight_v8_half (used by masari and stellite)
+ - cryptonight_v8_zelerius
- 4MiB scratchpad memory
- cryptonight_haven
- cryptonight_heavy
@@ -78,7 +88,7 @@ Please note, this list is not complete and is not an endorsement.
## Download
-You can find the latest releases and precompiled binaries on GitHub under [Releases](https://github.com/rapid821/xmr-stak-hide/releases).
+You can find the latest releases and precompiled binaries on GitHub under [Releases](https://github.com/fireice-uk/xmr-stak/releases).
## Default Developer Donation
diff --git a/doc/compile_Linux.md b/doc/compile_Linux.md
index ebf115430..6c80bc56a 100644
--- a/doc/compile_Linux.md
+++ b/doc/compile_Linux.md
@@ -9,10 +9,8 @@
- run `./amdgpu-pro-install --opencl=legacy,pal` from the unzipped folder
- set the environment variable to opencl `export AMDAPPSDKROOT=/opt/amdgpu-pro/`
-**ATTENTION** The linux driver 18.3 creating invalid shares.
-If you have an issue with `invalid shares` please downgrade your driver or switch to ROCm.
-
For linux also the OpenSource driver ROCm 1.9.X+ is a well working alternative, see https://rocm.github.io/ROCmInstall.html
+ROCm is not supporting old GPUs please check if your GPU is supported https://rocm.github.io/hardware.html.
### Cuda 8.0+ (only needed to use NVIDIA GPUs)
diff --git a/doc/compile_Windows.md b/doc/compile_Windows.md
index 8fe4dcf53..64d68bab1 100644
--- a/doc/compile_Windows.md
+++ b/doc/compile_Windows.md
@@ -34,9 +34,6 @@
- Download & install the AMD driver: https://www.amd.com/en/support
-**ATTENTION** Many windows driver 18.5+ creating invalid shares.
-If you have an issue with `invalid shares` please downgrade your driver.
-
- Download and install the latest version of the OCL-SDK from https://github.com/GPUOpen-LibrariesAndSDKs/OCL-SDK/releases
Do not follow old information that you need the AMD APP SDK. AMD has removed the APP SDK and is now shipping the OCL-SDK_light.
diff --git a/xmrstak/backend/amd/OclCryptonightR_gen.cpp b/xmrstak/backend/amd/OclCryptonightR_gen.cpp
new file mode 100644
index 000000000..4aabe51d0
--- /dev/null
+++ b/xmrstak/backend/amd/OclCryptonightR_gen.cpp
@@ -0,0 +1,354 @@
+#include
+#include
+#include
+#include
+#include
+
+
+#include "xmrstak/backend/amd/OclCryptonightR_gen.hpp"
+#include "xmrstak/backend/cpu/crypto/variant4_random_math.h"
+#include "xmrstak/misc/console.hpp"
+#include "xmrstak/cpputil/read_write_lock.h"
+
+#include
+#include
+#include
+
+
+namespace xmrstak
+{
+namespace amd
+{
+
+static std::string get_code(const V4_Instruction* code, int code_size)
+{
+ std::stringstream s;
+
+ for (int i = 0; i < code_size; ++i)
+ {
+ const V4_Instruction inst = code[i];
+
+ const uint32_t a = inst.dst_index;
+ const uint32_t b = inst.src_index;
+
+ switch (inst.opcode)
+ {
+ case MUL:
+ s << 'r' << a << "*=r" << b << ';';
+ break;
+
+ case ADD:
+ s << 'r' << a << "+=r" << b << '+' << inst.C << "U;";
+ break;
+
+ case SUB:
+ s << 'r' << a << "-=r" << b << ';';
+ break;
+
+ case ROR:
+ case ROL:
+ s << 'r' << a << "=rotate(r" << a << ((inst.opcode == ROR) ? ",ROT_BITS-r" : ",r") << b << ");";
+ break;
+
+ case XOR:
+ s << 'r' << a << "^=r" << b << ';';
+ break;
+ }
+
+ s << '\n';
+ }
+
+ return s.str();
+}
+
+struct CacheEntry
+{
+ CacheEntry(xmrstak_algo algo, uint64_t height, size_t deviceIdx, cl_program program) :
+ algo(algo),
+ height(height),
+ deviceIdx(deviceIdx),
+ program(program)
+ {}
+
+ xmrstak_algo algo;
+ uint64_t height;
+ size_t deviceIdx;
+ cl_program program;
+};
+
+struct BackgroundTaskBase
+{
+ virtual ~BackgroundTaskBase() {}
+ virtual void exec() = 0;
+};
+
+template
+struct BackgroundTask : public BackgroundTaskBase
+{
+ BackgroundTask(T&& func) : m_func(std::move(func)) {}
+ void exec() override { m_func(); }
+
+ T m_func;
+};
+
+static ::cpputil::RWLock CryptonightR_cache_mutex;
+static std::mutex CryptonightR_build_mutex;
+static std::vector CryptonightR_cache;
+
+static std::mutex background_tasks_mutex;
+static std::vector background_tasks;
+static std::thread* background_thread = nullptr;
+
+static void background_thread_proc()
+{
+ std::vector tasks;
+ for (;;) {
+ tasks.clear();
+ {
+ std::lock_guard g(background_tasks_mutex);
+ background_tasks.swap(tasks);
+ }
+
+ for (BackgroundTaskBase* task : tasks) {
+ task->exec();
+ delete task;
+ }
+
+ std::this_thread::sleep_for(std::chrono::milliseconds(500));
+ }
+}
+
+template
+static void background_exec(T&& func)
+{
+ BackgroundTaskBase* task = new BackgroundTask(std::move(func));
+
+ std::lock_guard g(background_tasks_mutex);
+ background_tasks.push_back(task);
+ if (!background_thread) {
+ background_thread = new std::thread(background_thread_proc);
+ }
+}
+
+static cl_program CryptonightR_build_program(
+ const GpuContext* ctx,
+ xmrstak_algo algo,
+ uint64_t height,
+ cl_kernel old_kernel,
+ std::string source_code,
+ std::string options)
+{
+ if(old_kernel)
+ clReleaseKernel(old_kernel);
+
+
+ std::vector old_programs;
+ old_programs.reserve(32);
+ {
+ CryptonightR_cache_mutex.WriteLock();
+
+ // Remove old programs from cache
+ for(size_t i = 0; i < CryptonightR_cache.size();)
+ {
+ const CacheEntry& entry = CryptonightR_cache[i];
+ if ((entry.algo == algo) && (entry.height + 2 < height))
+ {
+ printer::inst()->print_msg(LDEBUG, "CryptonightR: program for height %llu released (old program)", entry.height);
+ old_programs.push_back(entry.program);
+ CryptonightR_cache[i] = std::move(CryptonightR_cache.back());
+ CryptonightR_cache.pop_back();
+ }
+ else
+ {
+ ++i;
+ }
+ }
+ CryptonightR_cache_mutex.UnLock();
+ }
+
+ for(cl_program p : old_programs) {
+ clReleaseProgram(p);
+ }
+
+ std::lock_guard g1(CryptonightR_build_mutex);
+
+ cl_program program = nullptr;
+ {
+ CryptonightR_cache_mutex.ReadLock();
+
+ // Check if the cache already has this program (some other thread might have added it first)
+ for (const CacheEntry& entry : CryptonightR_cache)
+ {
+ if ((entry.algo == algo) && (entry.height == height) && (entry.deviceIdx == ctx->deviceIdx))
+ {
+ program = entry.program;
+ break;
+ }
+ }
+ CryptonightR_cache_mutex.UnLock();
+ }
+
+ if (program) {
+ return program;
+ }
+
+ cl_int ret;
+ const char* source = source_code.c_str();
+
+ program = clCreateProgramWithSource(ctx->opencl_ctx, 1, (const char**)&source, NULL, &ret);
+ if(ret != CL_SUCCESS)
+ {
+ printer::inst()->print_msg(L0,"Error %s when calling clCreateProgramWithSource on the OpenCL miner code", err_to_str(ret));
+ return program;
+ }
+
+ ret = clBuildProgram(program, 1, &ctx->DeviceID, options.c_str(), NULL, NULL);
+ if(ret != CL_SUCCESS)
+ {
+ size_t len;
+ printer::inst()->print_msg(L0,"Error %s when calling clBuildProgram.", err_to_str(ret));
+
+ if((ret = clGetProgramBuildInfo(program, ctx->DeviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &len)) != CL_SUCCESS)
+ {
+ printer::inst()->print_msg(L0,"Error %s when calling clGetProgramBuildInfo for length of build log output.", err_to_str(ret));
+ return program;
+ }
+
+ char* BuildLog = (char*)malloc(len + 1);
+ BuildLog[0] = '\0';
+
+ if((ret = clGetProgramBuildInfo(program, ctx->DeviceID, CL_PROGRAM_BUILD_LOG, len, BuildLog, NULL)) != CL_SUCCESS)
+ {
+ free(BuildLog);
+ printer::inst()->print_msg(L0,"Error %s when calling clGetProgramBuildInfo for build log.", err_to_str(ret));
+ return program;
+ }
+
+ printer::inst()->print_str("Build log:\n");
+ std::cerr<DeviceID, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL)) != CL_SUCCESS)
+ {
+ printer::inst()->print_msg(L0,"Error %s when calling clGetProgramBuildInfo for status of build.", err_to_str(ret));
+ return program;
+ }
+ std::this_thread::sleep_for(std::chrono::milliseconds(1000));
+ }
+ while(status == CL_BUILD_IN_PROGRESS);
+
+
+ printer::inst()->print_msg(LDEBUG, "CryptonightR: program for height %llu compiled", height);
+
+ CryptonightR_cache_mutex.WriteLock();
+ CryptonightR_cache.emplace_back(algo, height, ctx->deviceIdx, program);
+ CryptonightR_cache_mutex.UnLock();
+ return program;
+}
+
+cl_program CryptonightR_get_program(GpuContext* ctx, xmrstak_algo algo, uint64_t height, bool background, cl_kernel old_kernel)
+{
+ if (background) {
+ background_exec([=](){ CryptonightR_get_program(ctx, algo, height, false, old_kernel); });
+ return nullptr;
+ }
+
+ const char* source_code_template =
+ #include "amd_gpu/opencl/wolf-aes.cl"
+ #include "amd_gpu/opencl/cryptonight_r.cl"
+ ;
+ const char include_name[] = "XMRSTAK_INCLUDE_RANDOM_MATH";
+ const char* offset = strstr(source_code_template, include_name);
+ if (!offset)
+ {
+ printer::inst()->print_msg(LDEBUG, "CryptonightR_get_program: XMRSTAK_INCLUDE_RANDOM_MATH not found in cryptonight_r.cl", algo);
+ return nullptr;
+ }
+
+ V4_Instruction code[256];
+ int code_size;
+ switch (algo.Id())
+ {
+ case cryptonight_r_wow:
+ code_size = v4_random_math_init(code, height);
+ break;
+ case cryptonight_r:
+ code_size = v4_random_math_init(code, height);
+ break;
+ default:
+ printer::inst()->print_msg(LDEBUG, "CryptonightR_get_program: invalid algo %d", algo);
+ return nullptr;
+ }
+
+ std::string source_code(source_code_template, offset);
+ source_code.append(get_code(code, code_size));
+ source_code.append(offset + sizeof(include_name) - 1);
+
+ // scratchpad size for the selected mining algorithm
+ size_t hashMemSize = algo.Mem();
+ int threadMemMask = algo.Mask();
+ int hashIterations = algo.Iter();
+
+ size_t mem_chunk_exp = 1u << ctx->memChunk;
+ size_t strided_index = ctx->stridedIndex;
+ /* Adjust the config settings to a valid combination
+ * this is required if the dev pool is mining monero
+ * but the user tuned there settings for another currency
+ */
+ if(algo == cryptonight_r || algo == cryptonight_r_wow)
+ {
+ if(ctx->memChunk < 2)
+ mem_chunk_exp = 1u << 2;
+ if(strided_index == 1)
+ strided_index = 0;
+ }
+
+ // if intensity is a multiple of worksize than comp mode is not needed
+ int needCompMode = ctx->compMode && ctx->rawIntensity % ctx->workSize != 0 ? 1 : 0;
+
+ std::string options;
+ options += " -DITERATIONS=" + std::to_string(hashIterations);
+ options += " -DMASK=" + std::to_string(threadMemMask) + "U";
+ options += " -DWORKSIZE=" + std::to_string(ctx->workSize) + "U";
+ options += " -DSTRIDED_INDEX=" + std::to_string(strided_index);
+ options += " -DMEM_CHUNK_EXPONENT=" + std::to_string(mem_chunk_exp) + "U";
+ options += " -DCOMP_MODE=" + std::to_string(needCompMode);
+ options += " -DMEMORY=" + std::to_string(hashMemSize) + "LU";
+ options += " -DALGO=" + std::to_string(algo.Id());
+ options += " -DCN_UNROLL=" + std::to_string(ctx->unroll);
+
+ if(algo == cryptonight_gpu)
+ options += " -cl-fp32-correctly-rounded-divide-sqrt";
+
+
+ const char* source = source_code.c_str();
+
+ {
+ CryptonightR_cache_mutex.ReadLock();
+
+ // Check if the cache has this program
+ for (const CacheEntry& entry : CryptonightR_cache)
+ {
+ if ((entry.algo == algo) && (entry.height == height) && (entry.deviceIdx == ctx->deviceIdx))
+ {
+ printer::inst()->print_msg(LDEBUG, "CryptonightR: program for height %llu found in cache", height);
+ auto result = entry.program;
+ CryptonightR_cache_mutex.UnLock();
+ return result;
+ }
+ }
+ CryptonightR_cache_mutex.UnLock();
+
+ }
+
+ return CryptonightR_build_program(ctx, algo, height, old_kernel, source, options);
+}
+
+} // namespace amd
+} // namespace xmrstak
diff --git a/xmrstak/backend/amd/OclCryptonightR_gen.hpp b/xmrstak/backend/amd/OclCryptonightR_gen.hpp
new file mode 100644
index 000000000..a69df9074
--- /dev/null
+++ b/xmrstak/backend/amd/OclCryptonightR_gen.hpp
@@ -0,0 +1,26 @@
+#pragma once
+
+#include "xmrstak/backend/cryptonight.hpp"
+
+#include
+#include
+#include
+
+#if defined(__APPLE__)
+#include
+#else
+#include
+#endif
+
+#include "xmrstak/backend/amd/amd_gpu/gpu.hpp"
+
+namespace xmrstak
+{
+namespace amd
+{
+
+cl_program CryptonightR_get_program(GpuContext* ctx, const xmrstak_algo algo,
+ uint64_t height, bool background = false, cl_kernel old_kernel = nullptr);
+
+} // namespace amd
+} // namespace xmrstak
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp
index 408cad97a..a2cbe8f54 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.cpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp
@@ -19,6 +19,7 @@
#include "xmrstak/params.hpp"
#include "xmrstak/version.hpp"
#include "xmrstak/net/msgstruct.hpp"
+#include "xmrstak/backend/amd/OclCryptonightR_gen.hpp"
#include
#include
@@ -104,143 +105,6 @@ static inline long long unsigned int int_port(size_t i)
#include "gpu.hpp"
-const char* err_to_str(cl_int ret)
-{
- switch(ret)
- {
- case CL_SUCCESS:
- return "CL_SUCCESS";
- case CL_DEVICE_NOT_FOUND:
- return "CL_DEVICE_NOT_FOUND";
- case CL_DEVICE_NOT_AVAILABLE:
- return "CL_DEVICE_NOT_AVAILABLE";
- case CL_COMPILER_NOT_AVAILABLE:
- return "CL_COMPILER_NOT_AVAILABLE";
- case CL_MEM_OBJECT_ALLOCATION_FAILURE:
- return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
- case CL_OUT_OF_RESOURCES:
- return "CL_OUT_OF_RESOURCES";
- case CL_OUT_OF_HOST_MEMORY:
- return "CL_OUT_OF_HOST_MEMORY";
- case CL_PROFILING_INFO_NOT_AVAILABLE:
- return "CL_PROFILING_INFO_NOT_AVAILABLE";
- case CL_MEM_COPY_OVERLAP:
- return "CL_MEM_COPY_OVERLAP";
- case CL_IMAGE_FORMAT_MISMATCH:
- return "CL_IMAGE_FORMAT_MISMATCH";
- case CL_IMAGE_FORMAT_NOT_SUPPORTED:
- return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
- case CL_BUILD_PROGRAM_FAILURE:
- return "CL_BUILD_PROGRAM_FAILURE";
- case CL_MAP_FAILURE:
- return "CL_MAP_FAILURE";
- case CL_MISALIGNED_SUB_BUFFER_OFFSET:
- return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
- case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
- return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
-#ifdef CL_VERSION_1_2
- case CL_COMPILE_PROGRAM_FAILURE:
- return "CL_COMPILE_PROGRAM_FAILURE";
- case CL_LINKER_NOT_AVAILABLE:
- return "CL_LINKER_NOT_AVAILABLE";
- case CL_LINK_PROGRAM_FAILURE:
- return "CL_LINK_PROGRAM_FAILURE";
- case CL_DEVICE_PARTITION_FAILED:
- return "CL_DEVICE_PARTITION_FAILED";
- case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:
- return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
-#endif
- case CL_INVALID_VALUE:
- return "CL_INVALID_VALUE";
- case CL_INVALID_DEVICE_TYPE:
- return "CL_INVALID_DEVICE_TYPE";
- case CL_INVALID_PLATFORM:
- return "CL_INVALID_PLATFORM";
- case CL_INVALID_DEVICE:
- return "CL_INVALID_DEVICE";
- case CL_INVALID_CONTEXT:
- return "CL_INVALID_CONTEXT";
- case CL_INVALID_QUEUE_PROPERTIES:
- return "CL_INVALID_QUEUE_PROPERTIES";
- case CL_INVALID_COMMAND_QUEUE:
- return "CL_INVALID_COMMAND_QUEUE";
- case CL_INVALID_HOST_PTR:
- return "CL_INVALID_HOST_PTR";
- case CL_INVALID_MEM_OBJECT:
- return "CL_INVALID_MEM_OBJECT";
- case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
- return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
- case CL_INVALID_IMAGE_SIZE:
- return "CL_INVALID_IMAGE_SIZE";
- case CL_INVALID_SAMPLER:
- return "CL_INVALID_SAMPLER";
- case CL_INVALID_BINARY:
- return "CL_INVALID_BINARY";
- case CL_INVALID_BUILD_OPTIONS:
- return "CL_INVALID_BUILD_OPTIONS";
- case CL_INVALID_PROGRAM:
- return "CL_INVALID_PROGRAM";
- case CL_INVALID_PROGRAM_EXECUTABLE:
- return "CL_INVALID_PROGRAM_EXECUTABLE";
- case CL_INVALID_KERNEL_NAME:
- return "CL_INVALID_KERNEL_NAME";
- case CL_INVALID_KERNEL_DEFINITION:
- return "CL_INVALID_KERNEL_DEFINITION";
- case CL_INVALID_KERNEL:
- return "CL_INVALID_KERNEL";
- case CL_INVALID_ARG_INDEX:
- return "CL_INVALID_ARG_INDEX";
- case CL_INVALID_ARG_VALUE:
- return "CL_INVALID_ARG_VALUE";
- case CL_INVALID_ARG_SIZE:
- return "CL_INVALID_ARG_SIZE";
- case CL_INVALID_KERNEL_ARGS:
- return "CL_INVALID_KERNEL_ARGS";
- case CL_INVALID_WORK_DIMENSION:
- return "CL_INVALID_WORK_DIMENSION";
- case CL_INVALID_WORK_GROUP_SIZE:
- return "CL_INVALID_WORK_GROUP_SIZE";
- case CL_INVALID_WORK_ITEM_SIZE:
- return "CL_INVALID_WORK_ITEM_SIZE";
- case CL_INVALID_GLOBAL_OFFSET:
- return "CL_INVALID_GLOBAL_OFFSET";
- case CL_INVALID_EVENT_WAIT_LIST:
- return "CL_INVALID_EVENT_WAIT_LIST";
- case CL_INVALID_EVENT:
- return "CL_INVALID_EVENT";
- case CL_INVALID_OPERATION:
- return "CL_INVALID_OPERATION";
- case CL_INVALID_GL_OBJECT:
- return "CL_INVALID_GL_OBJECT";
- case CL_INVALID_BUFFER_SIZE:
- return "CL_INVALID_BUFFER_SIZE";
- case CL_INVALID_MIP_LEVEL:
- return "CL_INVALID_MIP_LEVEL";
- case CL_INVALID_GLOBAL_WORK_SIZE:
- return "CL_INVALID_GLOBAL_WORK_SIZE";
- case CL_INVALID_PROPERTY:
- return "CL_INVALID_PROPERTY";
-#ifdef CL_VERSION_1_2
- case CL_INVALID_IMAGE_DESCRIPTOR:
- return "CL_INVALID_IMAGE_DESCRIPTOR";
- case CL_INVALID_COMPILER_OPTIONS:
- return "CL_INVALID_COMPILER_OPTIONS";
- case CL_INVALID_LINKER_OPTIONS:
- return "CL_INVALID_LINKER_OPTIONS";
- case CL_INVALID_DEVICE_PARTITION_COUNT:
- return "CL_INVALID_DEVICE_PARTITION_COUNT";
-#endif
-#if defined(CL_VERSION_2_0) && !defined(CONF_ENFORCE_OpenCL_1_2)
- case CL_INVALID_PIPE_SIZE:
- return "CL_INVALID_PIPE_SIZE";
- case CL_INVALID_DEVICE_QUEUE:
- return "CL_INVALID_DEVICE_QUEUE";
-#endif
- default:
- return "UNKNOWN_ERROR";
- }
-}
-
#if 0
void printer::inst()->print_msg(L1,const char* fmt, ...);
void printer::inst()->print_str(const char* str);
@@ -284,11 +148,37 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
return ERR_OCL_API;
}
- /* Some kernel spawn 8 times more threads than the user is configuring.
- * To give the user the correct maximum work size we divide the hardware specific max by 8.
- */
- MaximumWorkSize /= 8;
+ auto neededAlgorithms = ::jconf::inst()->GetCurrentCoinSelection().GetAllAlgorithms();
+ bool useCryptonight_gpu = std::find(neededAlgorithms.begin(), neededAlgorithms.end(), cryptonight_gpu) != neededAlgorithms.end();
+
+ if(useCryptonight_gpu)
+ {
+ // work cn_1 we use 16x more threads than configured by the user
+ MaximumWorkSize /= 16;
+ }
+ else
+ {
+ /* Some kernel spawn 8 times more threads than the user is configuring.
+ * To give the user the correct maximum work size we divide the hardware specific max by 8.
+ */
+ MaximumWorkSize /= 8;
+ }
printer::inst()->print_msg(L1,"Device %lu work size %lu / %lu.", ctx->deviceIdx, ctx->workSize, MaximumWorkSize);
+
+ if(ctx->workSize > MaximumWorkSize)
+ {
+ ctx->workSize = MaximumWorkSize;
+ printer::inst()->print_msg(L1,"Device %lu work size to large, reduce to %lu / %lu.", ctx->deviceIdx, ctx->workSize, MaximumWorkSize);
+ }
+
+ const std::string backendName = xmrstak::params::inst().openCLVendor;
+ if( (ctx->stridedIndex == 2 || ctx->stridedIndex == 3) && (ctx->rawIntensity % ctx->workSize) != 0)
+ {
+ size_t reduced_intensity = (ctx->rawIntensity / ctx->workSize) * ctx->workSize;
+ ctx->rawIntensity = reduced_intensity;
+ printer::inst()->print_msg(L0, "WARNING %s: gpu %d intensity is not a multiple of 'worksize', auto reduce intensity to %d", backendName.c_str(), ctx->deviceIdx, int(reduced_intensity));
+ }
+
#if defined(CL_VERSION_2_0) && !defined(CONF_ENFORCE_OpenCL_1_2)
const cl_queue_properties CommandQueueProperties[] = { 0, 0, 0 };
ctx->CommandQueues = clCreateCommandQueueWithProperties(opencl_ctx, ctx->DeviceID, CommandQueueProperties, &ret);
@@ -316,10 +206,11 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
return ERR_OCL_API;
}
- size_t scratchPadSize = std::max(
- cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo()),
- cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot())
- );
+ size_t scratchPadSize = 0;
+ for(const auto algo : neededAlgorithms)
+ {
+ scratchPadSize = std::max(scratchPadSize, algo.Mem());
+ }
size_t g_thd = ctx->rawIntensity;
ctx->ExtraBuffers[0] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, scratchPadSize * g_thd, NULL, &ret);
@@ -390,18 +281,12 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
return ERR_OCL_API;
}
- xmrstak_algo miner_algo[2] = {
- ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo(),
- ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot()
- };
- int num_algos = miner_algo[0] == miner_algo[1] ? 1 : 2;
-
- for(int ii = 0; ii < num_algos; ++ii)
+ for(const auto miner_algo : neededAlgorithms)
{
// scratchpad size for the selected mining algorithm
- size_t hashMemSize = cn_select_memory(miner_algo[ii]);
- int threadMemMask = cn_select_mask(miner_algo[ii]);
- int hashIterations = cn_select_iter(miner_algo[ii]);
+ size_t hashMemSize = miner_algo.Mem();
+ int threadMemMask = miner_algo.Mask();
+ int hashIterations = miner_algo.Iter();
size_t mem_chunk_exp = 1u << ctx->memChunk;
size_t strided_index = ctx->stridedIndex;
@@ -409,7 +294,20 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
* this is required if the dev pool is mining monero
* but the user tuned there settings for another currency
*/
- if(miner_algo[ii] == cryptonight_monero_v8)
+ if(miner_algo == cryptonight_monero_v8)
+ {
+ if(ctx->memChunk < 2)
+ mem_chunk_exp = 1u << 2;
+ if(strided_index == 1)
+ strided_index = 0;
+ }
+
+ if(miner_algo == cryptonight_gpu)
+ {
+ strided_index = 0;
+ }
+
+ if(miner_algo == cryptonight_r || miner_algo == cryptonight_r_wow)
{
if(ctx->memChunk < 2)
mem_chunk_exp = 1u << 2;
@@ -428,7 +326,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
options += " -DMEM_CHUNK_EXPONENT=" + std::to_string(mem_chunk_exp) + "U";
options += " -DCOMP_MODE=" + std::to_string(needCompMode);
options += " -DMEMORY=" + std::to_string(hashMemSize) + "LU";
- options += " -DALGO=" + std::to_string(miner_algo[ii]);
+ options += " -DALGO=" + std::to_string(miner_algo.Id());
options += " -DCN_UNROLL=" + std::to_string(ctx->unroll);
/* AMD driver output is something like: `1445.5 (VM)`
* and is mapped to `14` only. The value is only used for a compiler
@@ -436,6 +334,9 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
*/
options += " -DOPENCL_DRIVER_MAJOR=" + std::to_string(std::stoi(openCLDriverVer.data()) / 100);
+ if(miner_algo == cryptonight_gpu)
+ options += " -cl-fp32-correctly-rounded-divide-sqrt";
+
/* create a hash for the compile time cache
* used data:
* - source code
@@ -457,20 +358,20 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
{
if(xmrstak::params::inst().AMDCache)
printer::inst()->print_msg(L1,"OpenCL device %u - Precompiled code %s not found. Compiling ...",ctx->deviceIdx, cache_file.c_str());
- ctx->Program[ii] = clCreateProgramWithSource(opencl_ctx, 1, (const char**)&source_code, NULL, &ret);
+ ctx->Program[miner_algo] = clCreateProgramWithSource(opencl_ctx, 1, (const char**)&source_code, NULL, &ret);
if(ret != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clCreateProgramWithSource on the OpenCL miner code", err_to_str(ret));
return ERR_OCL_API;
}
- ret = clBuildProgram(ctx->Program[ii], 1, &ctx->DeviceID, options.c_str(), NULL, NULL);
+ ret = clBuildProgram(ctx->Program[miner_algo], 1, &ctx->DeviceID, options.c_str(), NULL, NULL);
if(ret != CL_SUCCESS)
{
size_t len;
printer::inst()->print_msg(L1,"Error %s when calling clBuildProgram.", err_to_str(ret));
- if((ret = clGetProgramBuildInfo(ctx->Program[ii], ctx->DeviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &len)) != CL_SUCCESS)
+ if((ret = clGetProgramBuildInfo(ctx->Program[miner_algo], ctx->DeviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &len)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for length of build log output.", err_to_str(ret));
return ERR_OCL_API;
@@ -479,7 +380,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
char* BuildLog = (char*)malloc(len + 1);
BuildLog[0] = '\0';
- if((ret = clGetProgramBuildInfo(ctx->Program[ii], ctx->DeviceID, CL_PROGRAM_BUILD_LOG, len, BuildLog, NULL)) != CL_SUCCESS)
+ if((ret = clGetProgramBuildInfo(ctx->Program[miner_algo], ctx->DeviceID, CL_PROGRAM_BUILD_LOG, len, BuildLog, NULL)) != CL_SUCCESS)
{
free(BuildLog);
printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for build log.", err_to_str(ret));
@@ -494,11 +395,11 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
}
cl_uint num_devices;
- clGetProgramInfo(ctx->Program[ii], CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices,NULL);
+ clGetProgramInfo(ctx->Program[miner_algo], CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices,NULL);
std::vector devices_ids(num_devices);
- clGetProgramInfo(ctx->Program[ii], CL_PROGRAM_DEVICES, sizeof(cl_device_id)* devices_ids.size(), devices_ids.data(),NULL);
+ clGetProgramInfo(ctx->Program[miner_algo], CL_PROGRAM_DEVICES, sizeof(cl_device_id)* devices_ids.size(), devices_ids.data(),NULL);
int dev_id = 0;
/* Search for the gpu within the program context.
* The id can be different to ctx->DeviceID.
@@ -513,7 +414,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
cl_build_status status;
do
{
- if((ret = clGetProgramBuildInfo(ctx->Program[ii], ctx->DeviceID, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL)) != CL_SUCCESS)
+ if((ret = clGetProgramBuildInfo(ctx->Program[miner_algo], ctx->DeviceID, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for status of build.", err_to_str(ret));
return ERR_OCL_API;
@@ -525,7 +426,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
if(xmrstak::params::inst().AMDCache)
{
std::vector binary_sizes(num_devices);
- clGetProgramInfo (ctx->Program[ii], CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * binary_sizes.size(), binary_sizes.data(), NULL);
+ clGetProgramInfo (ctx->Program[miner_algo], CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * binary_sizes.size(), binary_sizes.data(), NULL);
std::vector all_programs(num_devices);
std::vector> program_storage;
@@ -541,7 +442,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
p_id++;
}
- if((ret = clGetProgramInfo(ctx->Program[ii], CL_PROGRAM_BINARIES, num_devices * sizeof(char*), all_programs.data(),NULL)) != CL_SUCCESS)
+ if((ret = clGetProgramInfo(ctx->Program[miner_algo], CL_PROGRAM_BINARIES, num_devices * sizeof(char*), all_programs.data(),NULL)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clGetProgramInfo.", err_to_str(ret));
return ERR_OCL_API;
@@ -565,7 +466,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
auto data_ptr = s.data();
cl_int clStatus;
- ctx->Program[ii] = clCreateProgramWithBinary(
+ ctx->Program[miner_algo] = clCreateProgramWithBinary(
opencl_ctx, 1, &ctx->DeviceID, &bin_size,
(const unsigned char **)&data_ptr, &clStatus, &ret
);
@@ -574,7 +475,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
printer::inst()->print_msg(L1,"Error %s when calling clCreateProgramWithBinary. Try to delete file %s", err_to_str(ret), cache_file.c_str());
return ERR_OCL_API;
}
- ret = clBuildProgram(ctx->Program[ii], 1, &ctx->DeviceID, NULL, NULL, NULL);
+ ret = clBuildProgram(ctx->Program[miner_algo], 1, &ctx->DeviceID, NULL, NULL, NULL);
if(ret != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clBuildProgram. Try to delete file %s", err_to_str(ret), cache_file.c_str());
@@ -582,40 +483,35 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
}
}
- std::vector KernelNames = { "cn0", "cn1", "cn2", "Blake", "Groestl", "JH", "Skein" };
+ std::vector KernelNames = { "cn2", "Blake", "Groestl", "JH", "Skein" };
+ if(miner_algo == cryptonight_gpu)
+ {
+ KernelNames.insert(KernelNames.begin(), "cn1_cn_gpu");
+ KernelNames.insert(KernelNames.begin(), "cn0_cn_gpu");
+ }
+ else
+ {
+ KernelNames.insert(KernelNames.begin(), "cn1");
+ KernelNames.insert(KernelNames.begin(), "cn0");
+ }
+
// append algorithm number to kernel name
for(int k = 0; k < 3; k++)
- KernelNames[k] += std::to_string(miner_algo[ii]);
+ KernelNames[k] += std::to_string(miner_algo);
- if(ii == 0)
+ if(miner_algo == cryptonight_gpu)
{
- for(int i = 0; i < 7; ++i)
- {
- ctx->Kernels[ii][i] = clCreateKernel(ctx->Program[ii], KernelNames[i].c_str(), &ret);
- if(ret != CL_SUCCESS)
- {
- printer::inst()->print_msg(L1,"Error %s when calling clCreateKernel for kernel_0 %s.", err_to_str(ret), KernelNames[i].c_str());
- return ERR_OCL_API;
- }
- }
+ KernelNames.push_back(std::string("cn00_cn_gpu") + std::to_string(miner_algo));
}
- else
+
+ for(int i = 0; i < KernelNames.size(); ++i)
{
- for(int i = 0; i < 3; ++i)
- {
- ctx->Kernels[ii][i] = clCreateKernel(ctx->Program[ii], KernelNames[i].c_str(), &ret);
- if(ret != CL_SUCCESS)
- {
- printer::inst()->print_msg(L1,"Error %s when calling clCreateKernel for kernel_1 %s.", err_to_str(ret), KernelNames[i].c_str());
- return ERR_OCL_API;
- }
- }
- // move kernel from the main algorithm into the root algorithm kernel space
- for(int i = 3; i < 7; ++i)
+ ctx->Kernels[miner_algo][i] = clCreateKernel(ctx->Program[miner_algo], KernelNames[i].c_str(), &ret);
+ if(ret != CL_SUCCESS)
{
- ctx->Kernels[ii][i] = ctx->Kernels[0][i];
+ printer::inst()->print_msg(L1,"Error %s when calling clCreateKernel for kernel_0 %s.", err_to_str(ret), KernelNames[i].c_str());
+ return ERR_OCL_API;
}
-
}
}
ctx->Nonce = 0;
@@ -830,8 +726,6 @@ int getAMDPlatformIdx()
// Returns 0 on success, -1 on stupid params, -2 on OpenCL API error
size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
{
-
- cl_context opencl_ctx;
cl_int ret;
cl_uint entries;
@@ -910,15 +804,6 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
TempDeviceList[i] = DeviceIDList[ctx[i].deviceIdx];
}
- opencl_ctx = clCreateContext(NULL, num_gpus, TempDeviceList, NULL, NULL, &ret);
- if(ret != CL_SUCCESS)
- {
- printer::inst()->print_msg(L1,"Error %s when calling clCreateContext.", err_to_str(ret));
- return ERR_OCL_API;
- }
-
- //char* source_code = LoadTextFile(sSourcePath);
-
const char *fastIntMathV2CL =
#include "./opencl/fast_int_math_v2.cl"
;
@@ -943,6 +828,9 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
const char *wolfSkeinCL =
#include "./opencl/wolf-skein.cl"
;
+ const char *cryptonight_gpu =
+ #include "./opencl/cryptonight_gpu.cl"
+ ;
std::string source_code(cryptonightCL);
source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_FAST_INT_MATH_V2"), fastIntMathV2CL);
@@ -952,12 +840,27 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_JH"), jhCL);
source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_BLAKE256"), blake256CL);
source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_GROESTL256"), groestl256CL);
+ source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_CN_GPU"), cryptonight_gpu);
// create a directory for the OpenCL compile cache
create_directory(get_home() + "/.openclcache");
std::vector> interleaveData(num_gpus, nullptr);
+ std::vector context_vec(entries, nullptr);
+ for(int i = 0; i < num_gpus; ++i)
+ {
+ if(context_vec[ctx[i].deviceIdx] == nullptr)
+ {
+ context_vec[ctx[i].deviceIdx] = clCreateContext(NULL, 1, &(ctx[i].DeviceID), NULL, NULL, &ret);
+ if(ret != CL_SUCCESS)
+ {
+ printer::inst()->print_msg(L1,"Error %s when calling clCreateContext.", err_to_str(ret));
+ return ERR_OCL_API;
+ }
+ }
+ }
+
for(int i = 0; i < num_gpus; ++i)
{
const size_t devIdx = ctx[i].deviceIdx;
@@ -976,16 +879,9 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
ctx[i].interleaveData = interleaveData[devIdx];
ctx[i].interleaveData->adjustThreshold = static_cast(ctx[i].interleave)/100.0;
ctx[i].interleaveData->startAdjustThreshold = ctx[i].interleaveData->adjustThreshold;
+ ctx[i].opencl_ctx = context_vec[ctx[i].deviceIdx];
- const std::string backendName = xmrstak::params::inst().openCLVendor;
- if( (ctx[i].stridedIndex == 2 || ctx[i].stridedIndex == 3) && (ctx[i].rawIntensity % ctx[i].workSize) != 0)
- {
- size_t reduced_intensity = (ctx[i].rawIntensity / ctx[i].workSize) * ctx[i].workSize;
- ctx[i].rawIntensity = reduced_intensity;
- printer::inst()->print_msg(L0, "WARNING %s: gpu %d intensity is not a multiple of 'worksize', auto reduce intensity to %d", backendName.c_str(), ctx[i].deviceIdx, int(reduced_intensity));
- }
-
- if((ret = InitOpenCLGpu(opencl_ctx, &ctx[i], source_code.c_str())) != ERR_SUCCESS)
+ if((ret = InitOpenCLGpu(ctx->opencl_ctx, &ctx[i], source_code.c_str())) != ERR_SUCCESS)
{
return ret;
}
@@ -994,10 +890,10 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
return ERR_SUCCESS;
}
-size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target, xmrstak_algo miner_algo)
+size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target, const xmrstak_algo& miner_algo, uint64_t height)
{
- // switch to the kernel storage
- int kernel_storage = miner_algo == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() ? 0 : 1;
+
+ auto & Kernels = ctx->Kernels[miner_algo.Id()];
cl_int ret;
@@ -1015,51 +911,103 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
return ERR_OCL_API;
}
- if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][0], 0, sizeof(cl_mem), &ctx->InputBuffer)) != CL_SUCCESS)
+ if((ret = clSetKernelArg(Kernels[0], 0, sizeof(cl_mem), &ctx->InputBuffer)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 0.", err_to_str(ret));
return ERR_OCL_API;
}
// Scratchpads
- if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][0], 1, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS)
+ if((ret = clSetKernelArg(Kernels[0], 1, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 1.", err_to_str(ret));
return ERR_OCL_API;
}
// States
- if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][0], 2, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS)
+ if((ret = clSetKernelArg(Kernels[0], 2, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 2.", err_to_str(ret));
return ERR_OCL_API;
}
// Threads
- if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][0], 3, sizeof(cl_uint), &numThreads)) != CL_SUCCESS)
+ if((ret = clSetKernelArg(Kernels[0], 3, sizeof(cl_uint), &numThreads)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 3.", err_to_str(ret));
return(ERR_OCL_API);
}
- // CN1 Kernel
+ if(miner_algo == cryptonight_gpu)
+ {
+ // we use an additional cn0 kernel to prepare the scratchpad
+ // Scratchpads
+ if((ret = clSetKernelArg(Kernels[7], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS)
+ {
+ printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 1.", err_to_str(ret));
+ return ERR_OCL_API;
+ }
+
+ // States
+ if((ret = clSetKernelArg(Kernels[7], 1, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS)
+ {
+ printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 2.", err_to_str(ret));
+ return ERR_OCL_API;
+ }
+ }
+
+ // CN1 Kernel
+
+ if ((miner_algo == cryptonight_r) || (miner_algo == cryptonight_r_wow)) {
+
+ // Get new kernel
+ cl_program program = xmrstak::amd::CryptonightR_get_program(ctx, miner_algo, height);
+
+ if (program != ctx->ProgramCryptonightR) {
+ cl_int ret;
+ cl_kernel kernel = clCreateKernel(program, "cn1_cryptonight_r", &ret);
+
+ cl_kernel old_kernel = nullptr;
+ if (ret != CL_SUCCESS) {
+ printer::inst()->print_msg(LDEBUG, "CryptonightR: clCreateKernel returned error %s", err_to_str(ret));
+ }
+ else {
+ old_kernel = Kernels[1];
+ Kernels[1] = kernel;
+ }
+ ctx->ProgramCryptonightR = program;
+
+ uint32_t PRECOMPILATION_DEPTH = 4;
+
+ // Precompile next program in background
+ xmrstak::amd::CryptonightR_get_program(ctx, miner_algo, height + 1, true, old_kernel);
+ for (int i = 2; i <= PRECOMPILATION_DEPTH; ++i)
+ xmrstak::amd::CryptonightR_get_program(ctx, miner_algo, height + i, true, nullptr);
+
+ printer::inst()->print_msg(LDEBUG, "Thread #%zu updated CryptonightR", ctx->deviceIdx);
+ }
+ else
+ {
+ printer::inst()->print_msg(LDEBUG, "Thread #%zu found CryptonightR", ctx->deviceIdx);
+ }
+ }
// Scratchpads
- if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][1], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS)
+ if((ret = clSetKernelArg(Kernels[1], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 0.", err_to_str(ret));
return ERR_OCL_API;
}
// States
- if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][1], 1, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS)
+ if((ret = clSetKernelArg(Kernels[1], 1, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 1.", err_to_str(ret));
return ERR_OCL_API;
}
// Threads
- if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][1], 2, sizeof(cl_uint), &numThreads)) != CL_SUCCESS)
+ if((ret = clSetKernelArg(Kernels[1], 2, sizeof(cl_uint), &numThreads)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 2.", err_to_str(ret));
return(ERR_OCL_API);
@@ -1068,7 +1016,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
if(miner_algo == cryptonight_monero || miner_algo == cryptonight_aeon || miner_algo == cryptonight_ipbc || miner_algo == cryptonight_stellite || miner_algo == cryptonight_masari || miner_algo == cryptonight_bittube2)
{
// Input
- if ((ret = clSetKernelArg(ctx->Kernels[kernel_storage][1], 3, sizeof(cl_mem), &ctx->InputBuffer)) != CL_SUCCESS)
+ if ((ret = clSetKernelArg(Kernels[1], 3, sizeof(cl_mem), &ctx->InputBuffer)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 1, argument 4(input buffer).", err_to_str(ret));
return ERR_OCL_API;
@@ -1077,89 +1025,115 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
// CN3 Kernel
// Scratchpads
- if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][2], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS)
+ if((ret = clSetKernelArg(Kernels[2], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 0.", err_to_str(ret));
return ERR_OCL_API;
}
// States
- if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][2], 1, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS)
+ if((ret = clSetKernelArg(Kernels[2], 1, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 1.", err_to_str(ret));
return ERR_OCL_API;
}
- // Branch 0
- if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][2], 2, sizeof(cl_mem), ctx->ExtraBuffers + 2)) != CL_SUCCESS)
- {
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 2.", err_to_str(ret));
- return ERR_OCL_API;
- }
-
- // Branch 1
- if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][2], 3, sizeof(cl_mem), ctx->ExtraBuffers + 3)) != CL_SUCCESS)
+ if(miner_algo == cryptonight_gpu)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 3.", err_to_str(ret));
- return ERR_OCL_API;
- }
-
- // Branch 2
- if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][2], 4, sizeof(cl_mem), ctx->ExtraBuffers + 4)) != CL_SUCCESS)
- {
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 4.", err_to_str(ret));
- return ERR_OCL_API;
- }
+ // Output
+ if((ret = clSetKernelArg(Kernels[2], 2, sizeof(cl_mem), &ctx->OutputBuffer)) != CL_SUCCESS)
+ {
+ printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), 2, 2);
+ return ERR_OCL_API;
+ }
- // Branch 3
- if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][2], 5, sizeof(cl_mem), ctx->ExtraBuffers + 5)) != CL_SUCCESS)
- {
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 5.", err_to_str(ret));
- return ERR_OCL_API;
- }
+ // Target
+ if((ret = clSetKernelArg(Kernels[2], 3, sizeof(cl_ulong), &target)) != CL_SUCCESS)
+ {
+ printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), 2, 3);
+ return ERR_OCL_API;
+ }
- // Threads
- if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][2], 6, sizeof(cl_uint), &numThreads)) != CL_SUCCESS)
- {
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 6.", err_to_str(ret));
- return(ERR_OCL_API);
+ // Threads
+ if((ret = clSetKernelArg(Kernels[2], 4, sizeof(cl_uint), &numThreads)) != CL_SUCCESS)
+ {
+ printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 4.", err_to_str(ret));
+ return(ERR_OCL_API);
+ }
}
-
- for(int i = 0; i < 4; ++i)
- {
- // States
- if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][i + 3], 0, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS)
+ else
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 0);
+ // Branch 0
+ if((ret = clSetKernelArg(Kernels[2], 2, sizeof(cl_mem), ctx->ExtraBuffers + 2)) != CL_SUCCESS)
+ {
+ printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 2.", err_to_str(ret));
return ERR_OCL_API;
}
- // Nonce buffer
- if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][i + 3], 1, sizeof(cl_mem), ctx->ExtraBuffers + (i + 2))) != CL_SUCCESS)
+ // Branch 1
+ if((ret = clSetKernelArg(Kernels[2], 3, sizeof(cl_mem), ctx->ExtraBuffers + 3)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 1);
+ printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 3.", err_to_str(ret));
return ERR_OCL_API;
}
- // Output
- if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][i + 3], 2, sizeof(cl_mem), &ctx->OutputBuffer)) != CL_SUCCESS)
+ // Branch 2
+ if((ret = clSetKernelArg(Kernels[2], 4, sizeof(cl_mem), ctx->ExtraBuffers + 4)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 2);
+ printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 4.", err_to_str(ret));
return ERR_OCL_API;
}
- // Target
- if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][i + 3], 3, sizeof(cl_ulong), &target)) != CL_SUCCESS)
+ // Branch 3
+ if((ret = clSetKernelArg(Kernels[2], 5, sizeof(cl_mem), ctx->ExtraBuffers + 5)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 3);
+ printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 5.", err_to_str(ret));
return ERR_OCL_API;
}
- if((clSetKernelArg(ctx->Kernels[kernel_storage][i + 3], 4, sizeof(cl_uint), &numThreads)) != CL_SUCCESS)
+ // Threads
+ if((ret = clSetKernelArg(Kernels[2], 6, sizeof(cl_uint), &numThreads)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 4);
+ printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 6.", err_to_str(ret));
return(ERR_OCL_API);
}
+
+ for(int i = 0; i < 4; ++i)
+ {
+ // States
+ if((ret = clSetKernelArg(Kernels[i + 3], 0, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS)
+ {
+ printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 0);
+ return ERR_OCL_API;
+ }
+
+ // Nonce buffer
+ if((ret = clSetKernelArg(Kernels[i + 3], 1, sizeof(cl_mem), ctx->ExtraBuffers + (i + 2))) != CL_SUCCESS)
+ {
+ printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 1);
+ return ERR_OCL_API;
+ }
+
+ // Output
+ if((ret = clSetKernelArg(Kernels[i + 3], 2, sizeof(cl_mem), &ctx->OutputBuffer)) != CL_SUCCESS)
+ {
+ printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 2);
+ return ERR_OCL_API;
+ }
+
+ // Target
+ if((ret = clSetKernelArg(Kernels[i + 3], 3, sizeof(cl_ulong), &target)) != CL_SUCCESS)
+ {
+ printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 3);
+ return ERR_OCL_API;
+ }
+
+ if((clSetKernelArg(Kernels[i + 3], 4, sizeof(cl_uint), &numThreads)) != CL_SUCCESS)
+ {
+ printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 4);
+ return(ERR_OCL_API);
+ }
+ }
}
return ERR_SUCCESS;
@@ -1256,10 +1230,9 @@ uint64_t interleaveAdjustDelay(GpuContext* ctx, const bool enableAutoAdjustment)
return t0;
}
-size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo)
+size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, const xmrstak_algo& miner_algo)
{
- // switch to the kernel storage
- int kernel_storage = miner_algo == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() ? 0 : 1;
+ const auto & Kernels = ctx->Kernels[miner_algo.Id()];
cl_int ret;
cl_uint zero = 0;
@@ -1294,7 +1267,7 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo)
}
size_t Nonce[2] = {ctx->Nonce, 1}, gthreads[2] = { g_thd, 8 }, lthreads[2] = { 8, 8 };
- if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[kernel_storage][0], 2, Nonce, gthreads, lthreads, 0, NULL, NULL)) != CL_SUCCESS)
+ if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, Kernels[0], 2, Nonce, gthreads, lthreads, 0, NULL, NULL)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 0);
return ERR_OCL_API;
@@ -1302,25 +1275,50 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo)
size_t tmpNonce = ctx->Nonce;
- if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[kernel_storage][1], 1, &tmpNonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
+ if(miner_algo == cryptonight_gpu)
{
- printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 1);
- return ERR_OCL_API;
+ size_t thd = 64;
+ size_t intens = g_intensity * thd;
+ if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, Kernels[7], 1, 0, &intens, &thd, 0, NULL, NULL)) != CL_SUCCESS)
+ {
+ printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 7);
+ return ERR_OCL_API;
+ }
+
+ size_t w_size_cn_gpu = w_size * 16;
+ size_t g_thd_cn_gpu = g_thd * 16;
+
+ if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, Kernels[1], 1, 0, &g_thd_cn_gpu, &w_size_cn_gpu, 0, NULL, NULL)) != CL_SUCCESS)
+ {
+ printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 1);
+ return ERR_OCL_API;
+ }
+ }
+ else
+ {
+ if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, Kernels[1], 1, &tmpNonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
+ {
+ printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 1);
+ return ERR_OCL_API;
+ }
}
- if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[kernel_storage][2], 2, Nonce, gthreads, lthreads, 0, NULL, NULL)) != CL_SUCCESS)
+ if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, Kernels[2], 2, Nonce, gthreads, lthreads, 0, NULL, NULL)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 2);
return ERR_OCL_API;
}
- for(int i = 0; i < 4; ++i)
+ if(miner_algo != cryptonight_gpu)
{
- size_t tmpNonce = ctx->Nonce;
- if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[kernel_storage][i + 3], 1, &tmpNonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
+ for(int i = 0; i < 4; ++i)
{
- printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), i + 3);
- return ERR_OCL_API;
+ size_t tmpNonce = ctx->Nonce;
+ if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, Kernels[i + 3], 1, &tmpNonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
+ {
+ printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), i + 3);
+ return ERR_OCL_API;
+ }
}
}
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp
index 80fcbefde..ae2b506db 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.hpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.hpp
@@ -14,6 +14,8 @@
#include
#include
#include
+#include