diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index 8c1bb8b5d..1eede3cfa 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -789,6 +789,8 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) // 8 is chosen by checking the occupancy calculator size_t blockOptimal = 8 * ctx->device_mpcount; + if(gpuArch == 30) + blockOptimal = 8 * ctx->device_mpcount; // the following values are calculated with CUDA10 and the occupancy calculator if(gpuArch == 35 || gpuArch / 10 == 5 || gpuArch / 10 == 6) blockOptimal = 7 * ctx->device_mpcount; @@ -798,26 +800,17 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) blockOptimal = 6 * ctx->device_mpcount; if(blockOptimal * threads * hashMemSize < limitedMemory) - { - ctx->device_threads = threads; ctx->device_blocks = blockOptimal; - } + else + ctx->device_blocks = limitedMemory / hashMemSize / threads; // round to a memory fitting value + ctx->device_threads = threads; } } - if(useCryptonight_gpu) - { - // cryptonight_gpu used 16 threads per share - if(ctx->device_threads * 16 > ctx->device_maxThreadsPerBlock) - { - ctx->device_threads = ctx->device_maxThreadsPerBlock / 16; - printf("WARNING: 'threads' configuration to large, value adjusted to %i\n", ctx->device_threads); - } - } - else if(ctx->device_threads * 8 > ctx->device_maxThreadsPerBlock) + if(ctx->device_threads * threadsPerHash > ctx->device_maxThreadsPerBlock) { // by default cryptonight CUDA implementations uses 8 threads per thread for some kernel - ctx->device_threads = ctx->device_maxThreadsPerBlock / 8; + ctx->device_threads = ctx->device_maxThreadsPerBlock / threadsPerHash; printf("WARNING: 'threads' configuration to large, value adjusted to %i\n", ctx->device_threads); } printf("device init succeeded\n");