feat: spatial filter for ssgi (#288)

* feat: bring back normal disocclusion * feat: added spatial denoiser (blur) * fix: correct frame accumulation * feat: blur with geometry and normal weights
doodlum · Apr 19, 2024 · 72e4ff6 · 72e4ff6
1 parent 05ab2be
commit 72e4ff6
Show file tree

Hide file tree

Showing 6 changed files with 281 additions and 104 deletions.
diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/blur.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/blur.cs.hlsl
@@ -0,0 +1,73 @@
+// FAST DENOISING WITH SELF-STABILIZING RECURRENT BLURS
+// 	https://developer.download.nvidia.com/video/gputechconf/gtc/2020/presentations/s22699-fast-denoising-with-self-stabilizing-recurrent-blurs.pdf
+
+#include "../Common/FastMath.hlsli"
+#include "../Common/GBuffer.hlsli"
+#include "../Common/VR.hlsli"
+#include "common.hlsli"
+
+Texture2D<lpfloat4> srcGI : register(t0);              // maybe half-res
+Texture2D<unorm float> srcAccumFrames : register(t1);  // maybe half-res
+Texture2D<half> srcDepth : register(t2);
+Texture2D<half4> srcNormal : register(t3);
+
+RWTexture2D<lpfloat4> outGI : register(u0);
+
+// samples = 8, min distance = 0.5, average samples on radius = 2
+static const float3 g_Poisson8[8] = {
+	float3(-0.4706069, -0.4427112, +0.6461146),
+	float3(-0.9057375, +0.3003471, +0.9542373),
+	float3(-0.3487388, +0.4037880, +0.5335386),
+	float3(+0.1023042, +0.6439373, +0.6520134),
+	float3(+0.5699277, +0.3513750, +0.6695386),
+	float3(+0.2939128, -0.1131226, +0.3149309),
+	float3(+0.7836658, -0.4208784, +0.8895339),
+	float3(+0.1564120, -0.8198990, +0.8346850)
+};
+
+[numthreads(8, 8, 1)] void main(const uint2 dtid : SV_DispatchThreadID) {
+	float radius = BlurRadius;
+#ifdef TEMPORAL_DENOISER
+	radius /= (srcAccumFrames[dtid] * 255);
+#endif
+	const uint numSamples = 8;
+
+	const float2 uv = (dtid + .5) * RcpFrameDim;
+	uint eyeIndex = GET_EYE_IDX(uv);
+	const float2 screenPos = ConvertToStereoUV(uv, eyeIndex);
+
+	float depth = READ_DEPTH(srcDepth, dtid);
+	float3 pos = ScreenToViewPosition(screenPos, depth, eyeIndex);
+	float3 normal = DecodeNormal(FULLRES_LOAD(srcNormal, dtid, uv, samplerLinearClamp).xy);
+
+	lpfloat4 sum = srcGI[dtid];
+	float4 wsum = 1;
+	for (uint i = 0; i < numSamples; i++) {
+		float w = g_Poisson8[i].z;
+
+		float2 pxOffset = radius * g_Poisson8[i].xy;
+		float2 uvOffset = pxOffset * RcpFrameDim;
+		float2 uvSample = uv + uvOffset;
+
+		if (eyeIndex != GET_EYE_IDX(uvSample))
+			continue;
+
+		const float2 screenPosSample = ConvertToStereoUV(uvSample, eyeIndex);
+		float depthSample = srcDepth.SampleLevel(samplerLinearClamp, uvSample, 0);
+		float3 posSample = ScreenToViewPosition(screenPosSample, depthSample, eyeIndex);
+
+		float3 normalSample = DecodeNormal(srcNormal.SampleLevel(samplerLinearClamp, uvSample, 0).xy);
+
+		// geometry weight
+		w *= saturate(1 - abs(dot(normal, posSample - pos)) * DistanceNormalisation);
+		// normal weight
+		w *= 1 - saturate(acosFast4(saturate(dot(normalSample, normal))) / fsl_HALF_PI * 2);
+
+		lpfloat4 gi = srcGI.SampleLevel(samplerLinearClamp, uvSample * res_scale, 0);
+
+		sum += gi * w;
+		wsum += w;
+	}
+
+	outGI[dtid] = sum / wsum;
+}
diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/common.hlsli b/features/Screen Space GI/Shaders/ScreenSpaceGI/common.hlsli
@@ -79,9 +79,12 @@ cbuffer SSGICB : register(b1)
 	float GIStrength;
 
 	float DepthDisocclusion;
+	float NormalDisocclusion;
 	uint MaxAccumFrames;
 
-	float pad;
+	float BlurRadius;
+	float DistanceNormalisation;
+	float2 pad;
 };
 
 SamplerState samplerPointClamp : register(s0);

diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl
@@ -41,12 +41,12 @@ Texture2D<lpfloat> srcWorkingDepth : register(t0);
 Texture2D<lpfloat4> srcNormal : register(t1);
 Texture2D<lpfloat3> srcRadiance : register(t2);  // maybe half-res
 Texture2D<uint> srcHilbertLUT : register(t3);
-Texture2D<uint> srcAccumFrames : register(t4);  // maybe half-res
-Texture2D<lpfloat4> srcPrevGI : register(t5);   // maybe half-res
+Texture2D<unorm float> srcAccumFrames : register(t4);  // maybe half-res
+Texture2D<lpfloat4> srcPrevGI : register(t5);          // maybe half-res
 
 RWTexture2D<lpfloat4> outGI : register(u0);
 RWTexture2D<unorm float2> outBentNormal : register(u1);
-RWTexture2D<half> outPrevDepth : register(u2);
+RWTexture2D<half3> outPrevGeo : register(u2);
 
 lpfloat GetDepthFade(lpfloat depth)
 {
@@ -60,7 +60,8 @@ lpfloat2 SpatioTemporalNoise(uint2 pixCoord, uint temporalIndex)  // without TAA
 	uint index = srcHilbertLUT.Load(uint3(pixCoord % 64, 0)).x;
 	index += 288 * (temporalIndex % 64);  // why 288? tried out a few and that's the best so far (with XE_HILBERT_LEVEL 6U) - but there's probably better :)
 	// R2 sequence - see http://extremelearning.com.au/unreasonable-effectiveness-of-quasirandom-sequences/
-	return lpfloat2(frac(0.5 + index * float2(0.75487766624669276005, 0.5698402909980532659114)));
+	// https://www.shadertoy.com/view/mts3zN
+	return lpfloat2(frac(0.5 + index * float2(0.245122333753, 0.430159709002)));
 }
 
 // HBIL pp.29
@@ -340,14 +341,16 @@ void CalculateGI(
 [numthreads(8, 8, 1)] void main(const uint2 dtid
 								: SV_DispatchThreadID) {
 	float2 uv = (dtid + .5f) * RcpFrameDim;
+	uint eyeIndex = GET_EYE_IDX(uv);
 
 	float viewspaceZ = READ_DEPTH(srcWorkingDepth, dtid);
 
-	outPrevDepth[dtid] = viewspaceZ;
-
 	lpfloat2 normalSample = FULLRES_LOAD(srcNormal, dtid, uv, samplerLinearClamp).xy;
 	lpfloat3 viewspaceNormal = (lpfloat3)DecodeNormal(normalSample);
 
+	half2 encodedWorldNormal = EncodeNormal(ViewToWorldVector(viewspaceNormal, InvViewMatrix[eyeIndex]));
+	outPrevGeo[dtid] = half3(viewspaceZ, encodedWorldNormal);
+
 // Move center pixel slightly towards camera to avoid imprecision artifacts due to depth buffer imprecision; offset depends on depth texture format used
 #if USE_HALF_FLOAT_PRECISION == 1
 	viewspaceZ *= 0.99920h;  // this is good for FP16 depth buffer
@@ -369,9 +372,9 @@ void CalculateGI(
 #ifdef TEMPORAL_DENOISER
 	if (viewspaceZ < DepthFadeRange.y) {
 		lpfloat4 prevGIAO = srcPrevGI[dtid];
-		uint accumFrames = srcAccumFrames[dtid];
+		uint accumFrames = srcAccumFrames[dtid] * 255;
 
-		currGIAO = lerp(prevGIAO, currGIAO, fastRcpNR0(accumFrames));
+		currGIAO = lerp(prevGIAO, currGIAO, rcp(accumFrames));
 	}
 #endif
 

diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl
@@ -6,12 +6,13 @@ Texture2D<half4> srcDiffuse : register(t0);
 Texture2D<half4> srcPrevGI : register(t1);  // maybe half-res
 Texture2D<half> srcCurrDepth : register(t2);
 Texture2D<half4> srcCurrNormal : register(t3);
-Texture2D<half> srcPrevDepth : register(t4);  // maybe half-res
+Texture2D<half3> srcPrevGeo : register(t4);  // maybe half-res
 Texture2D<float4> srcMotionVec : register(t5);
 Texture2D<half4> srcPrevGIAlbedo : register(t6);
+Texture2D<unorm float> srcAccumFrames : register(t7);
 
 RWTexture2D<float3> outRadianceDisocc : register(u0);
-RWTexture2D<uint> outAccumFrames : register(u1);
+RWTexture2D<unorm float> outAccumFrames : register(u1);
 RWTexture2D<float4> outRemappedPrevGI : register(u2);
 
 #if (defined(GI) && defined(GI_BOUNCE)) || defined(TEMPORAL_DENOISER)
@@ -26,7 +27,7 @@ RWTexture2D<float4> outRemappedPrevGI : register(u2);
 
 	float2 prev_uv = uv;
 #ifdef REPROJECTION
-	prev_uv += FULLRES_LOAD(srcMotionVec, pixCoord, uv, samplerLinearClamp);
+	prev_uv += FULLRES_LOAD(srcMotionVec, pixCoord, uv, samplerLinearClamp).xy;
 #endif
 	float2 prev_screen_pos = ConvertToStereoUV(prev_uv, eyeIndex);
 
@@ -36,16 +37,23 @@ RWTexture2D<float4> outRemappedPrevGI : register(u2);
 
 #ifdef REPROJECTION
 	if ((curr_depth <= DepthFadeRange.y) && !(any(prev_screen_pos < 0) || any(prev_screen_pos > 1))) {
+		float3 curr_normal = DecodeNormal(FULLRES_LOAD(srcCurrNormal, pixCoord, uv, samplerLinearClamp).xy);
+		curr_normal = ViewToWorldVector(curr_normal, InvViewMatrix[eyeIndex]);
 		float3 curr_pos = ScreenToViewPosition(screen_pos, curr_depth, eyeIndex);
 		curr_pos = ViewToWorldPosition(curr_pos, InvViewMatrix[eyeIndex]);
 
-		const float prev_depth = srcPrevDepth.SampleLevel(samplerPointClamp, prev_uv * res_scale, 0);
+		const half3 prev_geo = srcPrevGeo.SampleLevel(samplerPointClamp, prev_uv * res_scale, 0);
+		const float prev_depth = prev_geo.x;
+		const float3 prev_normal = DecodeNormal(prev_geo.yz);  // prev normal is already world
 		float3 prev_pos = ScreenToViewPosition(prev_screen_pos, prev_depth, eyeIndex);
 		prev_pos = ViewToWorldPosition(prev_pos, PrevInvViewMat[eyeIndex]);
 
 		float3 delta_pos = curr_pos - prev_pos;
+		float normal_prod = dot(curr_normal, prev_normal);
+
 		bool depth_pass = dot(delta_pos, delta_pos) < DepthDisocclusion * DepthDisocclusion;
-		valid_history = depth_pass;
+		bool normal_pass = normal_prod * normal_prod > NormalDisocclusion;
+		valid_history = depth_pass && normal_pass;
 	}
 #endif
 
@@ -66,7 +74,7 @@ RWTexture2D<float4> outRemappedPrevGI : register(u2);
 
 	half3 radiance = 0;
 #ifdef GI
-	radiance = FULLRES_LOAD(srcDiffuse, pixCoord, uv, samplerLinearClamp);
+	radiance = FULLRES_LOAD(srcDiffuse, pixCoord, uv, samplerLinearClamp).rgb;
 #	ifdef GI_BOUNCE
 	radiance += prev_gi_albedo.rgb * GIBounceFade;
 #	endif
@@ -76,10 +84,10 @@ RWTexture2D<float4> outRemappedPrevGI : register(u2);
 #ifdef TEMPORAL_DENOISER
 	uint accum_frames = 0;
 	[branch] if (valid_history)
-		accum_frames = outAccumFrames[pixCoord];
+		accum_frames = srcAccumFrames.SampleLevel(samplerLinearClamp, prev_uv * res_scale, 0) * 255;
 	accum_frames = min(accum_frames + 1, MaxAccumFrames);
 
-	outAccumFrames[pixCoord] = accum_frames;
+	outAccumFrames[pixCoord] = accum_frames / 255.0;
 	outRemappedPrevGI[pixCoord] = prev_gi;
 #endif
 }