diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
index 8c1bb8b5d..1eede3cfa 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
@@ -789,6 +789,8 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
 			// 8 is chosen by checking the occupancy calculator
 			size_t blockOptimal = 8 * ctx->device_mpcount;
 
+			if(gpuArch == 30)
+				blockOptimal = 8 * ctx->device_mpcount;
 			// the following values are calculated with CUDA10 and the occupancy calculator
 			if(gpuArch == 35 || gpuArch / 10 == 5 || gpuArch / 10 == 6)
 				blockOptimal = 7 * ctx->device_mpcount;
@@ -798,26 +800,17 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
 				blockOptimal = 6 * ctx->device_mpcount;
 
 			if(blockOptimal * threads * hashMemSize < limitedMemory)
-			{
-				ctx->device_threads = threads;
 				ctx->device_blocks = blockOptimal;
-			}
+			else
+				ctx->device_blocks = limitedMemory / hashMemSize / threads; // round to a memory fitting value
+			ctx->device_threads = threads;
 		}
 	}
 
-	if(useCryptonight_gpu)
-	{
-		// cryptonight_gpu used 16 threads per share
-		if(ctx->device_threads * 16 > ctx->device_maxThreadsPerBlock)
-		{
-			ctx->device_threads = ctx->device_maxThreadsPerBlock / 16;
-			printf("WARNING: 'threads' configuration to large, value adjusted to %i\n", ctx->device_threads);
-		}
-	}
-	else if(ctx->device_threads * 8 > ctx->device_maxThreadsPerBlock)
+	if(ctx->device_threads * threadsPerHash > ctx->device_maxThreadsPerBlock)
 	{
 		// by default cryptonight CUDA implementations uses 8 threads per thread for some kernel
-		ctx->device_threads = ctx->device_maxThreadsPerBlock / 8;
+		ctx->device_threads = ctx->device_maxThreadsPerBlock / threadsPerHash;
 		printf("WARNING: 'threads' configuration to large, value adjusted to %i\n", ctx->device_threads);
 	}
 	printf("device init succeeded\n");