From 1cbcc8288f5e6f3b7e32edc738551d00ff4e82c9 Mon Sep 17 00:00:00 2001 From: "C. S." <76898260+Pentalimbed@users.noreply.github.com> Date: Sun, 4 Aug 2024 16:36:05 +0100 Subject: [PATCH] feat: resolution-adaptive llf cluster count (#376) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: semi-dynamic llf cluster count * style: 🎨 apply clang-format changes * s * fix: fix ClusterCulling * chore: remove comments * fix: potential VR consistency fix --------- Co-authored-by: Pentalimbed --- .../LightLimitFix/ClusterCullingCS.hlsl | 42 +++++++++++-------- .../Shaders/LightLimitFix/Common.hlsli | 9 ++-- .../Shaders/LightLimitFix/LightLimitFix.hlsli | 9 ++-- package/Shaders/Common/SharedData.hlsli | 2 + src/Features/LightLimitFix.cpp | 38 +++++++++++------ src/Features/LightLimitFix.h | 4 ++ 6 files changed, 66 insertions(+), 38 deletions(-) diff --git a/features/Light Limit Fix/Shaders/LightLimitFix/ClusterCullingCS.hlsl b/features/Light Limit Fix/Shaders/LightLimitFix/ClusterCullingCS.hlsl index 59d52c5f6..7d5fc3567 100644 --- a/features/Light Limit Fix/Shaders/LightLimitFix/ClusterCullingCS.hlsl +++ b/features/Light Limit Fix/Shaders/LightLimitFix/ClusterCullingCS.hlsl @@ -11,9 +11,9 @@ cbuffer PerFrame : register(b0) StructuredBuffer clusters : register(t0); StructuredBuffer lights : register(t1); -RWStructuredBuffer lightIndexCounter : register(u0); //1 -RWStructuredBuffer lightIndexList : register(u1); //MAX_CLUSTER_LIGHTS * 16^3 -RWStructuredBuffer lightGrid : register(u2); //16^3 +RWStructuredBuffer lightIndexCounter : register(u0); +RWStructuredBuffer lightIndexList : register(u1); +RWStructuredBuffer lightGrid : register(u2); groupshared StructuredLight sharedLights[GROUP_SIZE]; @@ -25,14 +25,15 @@ bool LightIntersectsCluster(StructuredLight light, ClusterAABB cluster, int eyeI return dot(dist, dist) <= (light.radius * light.radius); } -[numthreads(16, 8, 8)] void main(uint3 groupId - : SV_GroupID, - uint3 dispatchThreadId - : SV_DispatchThreadID, - uint3 groupThreadId - : SV_GroupThreadID, - uint groupIndex - : SV_GroupIndex) { +[numthreads(NUMTHREAD_X, NUMTHREAD_Y, NUMTHREAD_Z)] void main( + uint3 groupId + : SV_GroupID, uint3 dispatchThreadId + : SV_DispatchThreadID, uint3 groupThreadId + : SV_GroupThreadID, uint groupIndex + : SV_GroupIndex) { + if (any(dispatchThreadId >= uint3(CLUSTER_BUILDING_DISPATCH_SIZE_X, CLUSTER_BUILDING_DISPATCH_SIZE_Y, CLUSTER_BUILDING_DISPATCH_SIZE_Z))) + return; + if (all(dispatchThreadId == 0)) { lightIndexCounter[0] = 0; } @@ -40,7 +41,9 @@ bool LightIntersectsCluster(StructuredLight light, ClusterAABB cluster, int eyeI uint visibleLightCount = 0; uint visibleLightIndices[MAX_CLUSTER_LIGHTS]; - uint clusterIndex = groupIndex + GROUP_SIZE * groupId.z; + uint clusterIndex = dispatchThreadId.x + + dispatchThreadId.y * CLUSTER_BUILDING_DISPATCH_SIZE_X + + dispatchThreadId.z * (CLUSTER_BUILDING_DISPATCH_SIZE_X * CLUSTER_BUILDING_DISPATCH_SIZE_Y); ClusterAABB cluster = clusters[clusterIndex]; @@ -61,11 +64,13 @@ bool LightIntersectsCluster(StructuredLight light, ClusterAABB cluster, int eyeI for (uint i = 0; i < batchSize; i++) { StructuredLight light = lights[i]; - if (visibleLightCount < MAX_CLUSTER_LIGHTS && (LightIntersectsCluster(light, cluster) + bool updateCluster = LightIntersectsCluster(light, cluster); #ifdef VR - || LightIntersectsCluster(light, cluster, 1) + updateCluster = updateCluster || LightIntersectsCluster(light, cluster, 1); #endif // VR - )) { + updateCluster = updateCluster && (visibleLightCount < MAX_CLUSTER_LIGHTS); + + if (updateCluster) { visibleLightIndices[visibleLightCount] = lightOffset + i; visibleLightCount++; } @@ -83,8 +88,11 @@ bool LightIntersectsCluster(StructuredLight light, ClusterAABB cluster, int eyeI lightIndexList[offset + i] = visibleLightIndices[i]; } - lightGrid[clusterIndex].offset = offset; - lightGrid[clusterIndex].lightCount = visibleLightCount; + LightGrid output = { + offset, visibleLightCount, 0, 0 + }; + + lightGrid[clusterIndex] = output; } //https://www.3dgep.com/forward-plus/#Grid_Frustums_Compute_Shader diff --git a/features/Light Limit Fix/Shaders/LightLimitFix/Common.hlsli b/features/Light Limit Fix/Shaders/LightLimitFix/Common.hlsli index df0c52f61..7e84f1df7 100644 --- a/features/Light Limit Fix/Shaders/LightLimitFix/Common.hlsli +++ b/features/Light Limit Fix/Shaders/LightLimitFix/Common.hlsli @@ -1,11 +1,10 @@ -#define GROUP_SIZE (16 * 16 * 4) +#define NUMTHREAD_X 16 +#define NUMTHREAD_Y 16 +#define NUMTHREAD_Z 4 +#define GROUP_SIZE (NUMTHREAD_X * NUMTHREAD_Y * NUMTHREAD_Z) #define MAX_CLUSTER_LIGHTS 128 -#define CLUSTER_BUILDING_DISPATCH_SIZE_X 16 -#define CLUSTER_BUILDING_DISPATCH_SIZE_Y 16 -#define CLUSTER_BUILDING_DISPATCH_SIZE_Z 16 - struct ClusterAABB { float4 minPoint; diff --git a/features/Light Limit Fix/Shaders/LightLimitFix/LightLimitFix.hlsli b/features/Light Limit Fix/Shaders/LightLimitFix/LightLimitFix.hlsli index d48fb9574..66576d0e9 100644 --- a/features/Light Limit Fix/Shaders/LightLimitFix/LightLimitFix.hlsli +++ b/features/Light Limit Fix/Shaders/LightLimitFix/LightLimitFix.hlsli @@ -31,15 +31,16 @@ StructuredBuffer strictLights : register(t53); bool GetClusterIndex(in float2 uv, in float z, out uint clusterIndex) { + const uint3 clusterSize = lightLimitFixSettings.ClusterSize.xyz; + if (z < strictLights[0].LightsNear || z > strictLights[0].LightsFar) return false; float clampedZ = clamp(z, strictLights[0].LightsNear, strictLights[0].LightsFar); - uint clusterZ = uint(max((log2(z) - log2(strictLights[0].LightsNear)) * 16.0 / log2(strictLights[0].LightsFar / strictLights[0].LightsNear), 0.0)); - uint2 clusterDim = ceil(BufferDim / float2(16, 16)); - uint3 cluster = uint3(uint2((uv * BufferDim) / clusterDim), clusterZ); + uint clusterZ = uint(max((log2(z) - log2(strictLights[0].LightsNear)) * clusterSize.z / log2(strictLights[0].LightsFar / strictLights[0].LightsNear), 0.0)); + uint3 cluster = uint3(uint2(uv * clusterSize.xy), clusterZ); - clusterIndex = cluster.x + (16 * cluster.y) + (16 * 16 * cluster.z); + clusterIndex = cluster.x + (clusterSize.x * cluster.y) + (clusterSize.x * clusterSize.y * cluster.z); return true; } diff --git a/package/Shaders/Common/SharedData.hlsli b/package/Shaders/Common/SharedData.hlsli index 0830ed5c0..6d68c1772 100644 --- a/package/Shaders/Common/SharedData.hlsli +++ b/package/Shaders/Common/SharedData.hlsli @@ -112,6 +112,8 @@ struct LightLimitFixSettings uint EnableLightsVisualisation; uint LightsVisualisationMode; uint pad0; + + uint4 ClusterSize; }; # define SL_INCL_STRUCT diff --git a/src/Features/LightLimitFix.cpp b/src/Features/LightLimitFix.cpp index a5ed12417..76e6608af 100644 --- a/src/Features/LightLimitFix.cpp +++ b/src/Features/LightLimitFix.cpp @@ -5,13 +5,8 @@ #include "State.h" #include "Util.h" -static constexpr uint CLUSTER_SIZE_X = 16; -static constexpr uint CLUSTER_SIZE_Y = 16; -static constexpr uint CLUSTER_SIZE_Z = 16; constexpr uint CLUSTER_MAX_LIGHTS = 128; -constexpr std::uint32_t CLUSTER_COUNT = CLUSTER_SIZE_X * CLUSTER_SIZE_Y * CLUSTER_SIZE_Z; - static constexpr uint MAX_LIGHTS = 2048; NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( @@ -125,14 +120,33 @@ LightLimitFix::PerFrame LightLimitFix::GetCommonBufferData() perFrame.EnableContactShadows = settings.EnableContactShadows; perFrame.EnableLightsVisualisation = settings.EnableLightsVisualisation; perFrame.LightsVisualisationMode = settings.LightsVisualisationMode; + std::copy(clusterSize, clusterSize + 3, perFrame.ClusterSize); return perFrame; } void LightLimitFix::SetupResources() { + auto screenSize = Util::ConvertToDynamic(State::GetSingleton()->screenSize); + if (REL::Module::IsVR()) + screenSize.x *= .5; + clusterSize[0] = ((uint)screenSize.x + 63) / 64; + clusterSize[1] = ((uint)screenSize.y + 63) / 64; + clusterSize[2] = 16; + uint clusterCount = clusterSize[0] * clusterSize[1] * clusterSize[2]; + { - clusterBuildingCS = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\LightLimitFix\\ClusterBuildingCS.hlsl", {}, "cs_5_0"); - clusterCullingCS = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\LightLimitFix\\ClusterCullingCS.hlsl", {}, "cs_5_0"); + std::string clusterSizeStrs[3]; + for (int i = 0; i < 3; ++i) + clusterSizeStrs[i] = std::format("{}", clusterSize[i]); + + std::vector> defines = { + { "CLUSTER_BUILDING_DISPATCH_SIZE_X", clusterSizeStrs[0].c_str() }, + { "CLUSTER_BUILDING_DISPATCH_SIZE_Y", clusterSizeStrs[1].c_str() }, + { "CLUSTER_BUILDING_DISPATCH_SIZE_Z", clusterSizeStrs[2].c_str() } + }; + + clusterBuildingCS = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\LightLimitFix\\ClusterBuildingCS.hlsl", defines, "cs_5_0"); + clusterCullingCS = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\LightLimitFix\\ClusterCullingCS.hlsl", defines, "cs_5_0"); lightBuildingCB = new ConstantBuffer(ConstantBufferDesc()); lightCullingCB = new ConstantBuffer(ConstantBufferDesc()); @@ -156,7 +170,7 @@ void LightLimitFix::SetupResources() uavDesc.Buffer.FirstElement = 0; uavDesc.Buffer.Flags = 0; - std::uint32_t numElements = CLUSTER_COUNT; + std::uint32_t numElements = clusterCount; sbDesc.StructureByteStride = sizeof(ClusterAABB); sbDesc.ByteWidth = sizeof(ClusterAABB) * numElements; @@ -175,7 +189,7 @@ void LightLimitFix::SetupResources() uavDesc.Buffer.NumElements = numElements; lightCounter->CreateUAV(uavDesc); - numElements = CLUSTER_COUNT * CLUSTER_MAX_LIGHTS; + numElements = clusterCount * CLUSTER_MAX_LIGHTS; sbDesc.StructureByteStride = sizeof(uint32_t); sbDesc.ByteWidth = sizeof(uint32_t) * numElements; lightList = eastl::make_unique(sbDesc); @@ -184,7 +198,7 @@ void LightLimitFix::SetupResources() uavDesc.Buffer.NumElements = numElements; lightList->CreateUAV(uavDesc); - numElements = CLUSTER_COUNT; + numElements = clusterCount; sbDesc.StructureByteStride = sizeof(LightGrid); sbDesc.ByteWidth = sizeof(LightGrid) * numElements; lightGrid = eastl::make_unique(sbDesc); @@ -872,7 +886,7 @@ void LightLimitFix::UpdateLights() context->CSSetUnorderedAccessViews(0, 1, &clusters_uav, nullptr); context->CSSetShader(clusterBuildingCS, nullptr, 0); - context->Dispatch(CLUSTER_SIZE_X, CLUSTER_SIZE_Y, CLUSTER_SIZE_Z); + context->Dispatch(clusterSize[0], clusterSize[1], clusterSize[2]); ID3D11UnorderedAccessView* null_uav = nullptr; context->CSSetUnorderedAccessViews(0, 1, &null_uav, nullptr); @@ -908,7 +922,7 @@ void LightLimitFix::UpdateLights() context->CSSetUnorderedAccessViews(0, 3, uavs, nullptr); context->CSSetShader(clusterCullingCS, nullptr, 0); - context->Dispatch(CLUSTER_SIZE_X / 16, CLUSTER_SIZE_Y / 16, CLUSTER_SIZE_Z / 4); + context->Dispatch((clusterSize[0] + 15) / 16, (clusterSize[1] + 15) / 16, (clusterSize[2] + 3) / 4); } context->CSSetShader(nullptr, nullptr, 0); diff --git a/src/Features/LightLimitFix.h b/src/Features/LightLimitFix.h index 6bc3b3560..7d0acef10 100644 --- a/src/Features/LightLimitFix.h +++ b/src/Features/LightLimitFix.h @@ -74,6 +74,8 @@ struct LightLimitFix : Feature uint EnableLightsVisualisation; uint LightsVisualisationMode; uint pad0; + + uint ClusterSize[4]; }; PerFrame GetCommonBufferData(); @@ -171,6 +173,8 @@ struct LightLimitFix : Feature float lightsNear = 0.0f; float lightsFar = 16384.0f; + uint clusterSize[3] = { 16 }; + Settings settings; using ConfigPair = std::pair;