Skip to content

Commit

Permalink
feat: spatial filter for ssgi (#288)
Browse files Browse the repository at this point in the history
* feat: bring back normal disocclusion

* feat: added spatial denoiser (blur)

* fix: correct frame accumulation

* feat: blur with geometry and normal weights
  • Loading branch information
Pentalimbed authored Apr 19, 2024
1 parent 05ab2be commit 72e4ff6
Show file tree
Hide file tree
Showing 6 changed files with 281 additions and 104 deletions.
73 changes: 73 additions & 0 deletions features/Screen Space GI/Shaders/ScreenSpaceGI/blur.cs.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// FAST DENOISING WITH SELF-STABILIZING RECURRENT BLURS
// https://developer.download.nvidia.com/video/gputechconf/gtc/2020/presentations/s22699-fast-denoising-with-self-stabilizing-recurrent-blurs.pdf

#include "../Common/FastMath.hlsli"
#include "../Common/GBuffer.hlsli"
#include "../Common/VR.hlsli"
#include "common.hlsli"

Texture2D<lpfloat4> srcGI : register(t0); // maybe half-res
Texture2D<unorm float> srcAccumFrames : register(t1); // maybe half-res
Texture2D<half> srcDepth : register(t2);
Texture2D<half4> srcNormal : register(t3);

RWTexture2D<lpfloat4> outGI : register(u0);

// samples = 8, min distance = 0.5, average samples on radius = 2
static const float3 g_Poisson8[8] = {
float3(-0.4706069, -0.4427112, +0.6461146),
float3(-0.9057375, +0.3003471, +0.9542373),
float3(-0.3487388, +0.4037880, +0.5335386),
float3(+0.1023042, +0.6439373, +0.6520134),
float3(+0.5699277, +0.3513750, +0.6695386),
float3(+0.2939128, -0.1131226, +0.3149309),
float3(+0.7836658, -0.4208784, +0.8895339),
float3(+0.1564120, -0.8198990, +0.8346850)
};

[numthreads(8, 8, 1)] void main(const uint2 dtid : SV_DispatchThreadID) {
float radius = BlurRadius;
#ifdef TEMPORAL_DENOISER
radius /= (srcAccumFrames[dtid] * 255);
#endif
const uint numSamples = 8;

const float2 uv = (dtid + .5) * RcpFrameDim;
uint eyeIndex = GET_EYE_IDX(uv);
const float2 screenPos = ConvertToStereoUV(uv, eyeIndex);

float depth = READ_DEPTH(srcDepth, dtid);
float3 pos = ScreenToViewPosition(screenPos, depth, eyeIndex);
float3 normal = DecodeNormal(FULLRES_LOAD(srcNormal, dtid, uv, samplerLinearClamp).xy);

lpfloat4 sum = srcGI[dtid];
float4 wsum = 1;
for (uint i = 0; i < numSamples; i++) {
float w = g_Poisson8[i].z;

float2 pxOffset = radius * g_Poisson8[i].xy;
float2 uvOffset = pxOffset * RcpFrameDim;
float2 uvSample = uv + uvOffset;

if (eyeIndex != GET_EYE_IDX(uvSample))
continue;

const float2 screenPosSample = ConvertToStereoUV(uvSample, eyeIndex);
float depthSample = srcDepth.SampleLevel(samplerLinearClamp, uvSample, 0);
float3 posSample = ScreenToViewPosition(screenPosSample, depthSample, eyeIndex);

float3 normalSample = DecodeNormal(srcNormal.SampleLevel(samplerLinearClamp, uvSample, 0).xy);

// geometry weight
w *= saturate(1 - abs(dot(normal, posSample - pos)) * DistanceNormalisation);
// normal weight
w *= 1 - saturate(acosFast4(saturate(dot(normalSample, normal))) / fsl_HALF_PI * 2);

lpfloat4 gi = srcGI.SampleLevel(samplerLinearClamp, uvSample * res_scale, 0);

sum += gi * w;
wsum += w;
}

outGI[dtid] = sum / wsum;
}
5 changes: 4 additions & 1 deletion features/Screen Space GI/Shaders/ScreenSpaceGI/common.hlsli
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,12 @@ cbuffer SSGICB : register(b1)
float GIStrength;

float DepthDisocclusion;
float NormalDisocclusion;
uint MaxAccumFrames;

float pad;
float BlurRadius;
float DistanceNormalisation;
float2 pad;
};

SamplerState samplerPointClamp : register(s0);
Expand Down
19 changes: 11 additions & 8 deletions features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ Texture2D<lpfloat> srcWorkingDepth : register(t0);
Texture2D<lpfloat4> srcNormal : register(t1);
Texture2D<lpfloat3> srcRadiance : register(t2); // maybe half-res
Texture2D<uint> srcHilbertLUT : register(t3);
Texture2D<uint> srcAccumFrames : register(t4); // maybe half-res
Texture2D<lpfloat4> srcPrevGI : register(t5); // maybe half-res
Texture2D<unorm float> srcAccumFrames : register(t4); // maybe half-res
Texture2D<lpfloat4> srcPrevGI : register(t5); // maybe half-res

RWTexture2D<lpfloat4> outGI : register(u0);
RWTexture2D<unorm float2> outBentNormal : register(u1);
RWTexture2D<half> outPrevDepth : register(u2);
RWTexture2D<half3> outPrevGeo : register(u2);

lpfloat GetDepthFade(lpfloat depth)
{
Expand All @@ -60,7 +60,8 @@ lpfloat2 SpatioTemporalNoise(uint2 pixCoord, uint temporalIndex) // without TAA
uint index = srcHilbertLUT.Load(uint3(pixCoord % 64, 0)).x;
index += 288 * (temporalIndex % 64); // why 288? tried out a few and that's the best so far (with XE_HILBERT_LEVEL 6U) - but there's probably better :)
// R2 sequence - see http://extremelearning.com.au/unreasonable-effectiveness-of-quasirandom-sequences/
return lpfloat2(frac(0.5 + index * float2(0.75487766624669276005, 0.5698402909980532659114)));
// https://www.shadertoy.com/view/mts3zN
return lpfloat2(frac(0.5 + index * float2(0.245122333753, 0.430159709002)));
}

// HBIL pp.29
Expand Down Expand Up @@ -340,14 +341,16 @@ void CalculateGI(
[numthreads(8, 8, 1)] void main(const uint2 dtid
: SV_DispatchThreadID) {
float2 uv = (dtid + .5f) * RcpFrameDim;
uint eyeIndex = GET_EYE_IDX(uv);

float viewspaceZ = READ_DEPTH(srcWorkingDepth, dtid);

outPrevDepth[dtid] = viewspaceZ;

lpfloat2 normalSample = FULLRES_LOAD(srcNormal, dtid, uv, samplerLinearClamp).xy;
lpfloat3 viewspaceNormal = (lpfloat3)DecodeNormal(normalSample);

half2 encodedWorldNormal = EncodeNormal(ViewToWorldVector(viewspaceNormal, InvViewMatrix[eyeIndex]));
outPrevGeo[dtid] = half3(viewspaceZ, encodedWorldNormal);

// Move center pixel slightly towards camera to avoid imprecision artifacts due to depth buffer imprecision; offset depends on depth texture format used
#if USE_HALF_FLOAT_PRECISION == 1
viewspaceZ *= 0.99920h; // this is good for FP16 depth buffer
Expand All @@ -369,9 +372,9 @@ void CalculateGI(
#ifdef TEMPORAL_DENOISER
if (viewspaceZ < DepthFadeRange.y) {
lpfloat4 prevGIAO = srcPrevGI[dtid];
uint accumFrames = srcAccumFrames[dtid];
uint accumFrames = srcAccumFrames[dtid] * 255;

currGIAO = lerp(prevGIAO, currGIAO, fastRcpNR0(accumFrames));
currGIAO = lerp(prevGIAO, currGIAO, rcp(accumFrames));
}
#endif

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@ Texture2D<half4> srcDiffuse : register(t0);
Texture2D<half4> srcPrevGI : register(t1); // maybe half-res
Texture2D<half> srcCurrDepth : register(t2);
Texture2D<half4> srcCurrNormal : register(t3);
Texture2D<half> srcPrevDepth : register(t4); // maybe half-res
Texture2D<half3> srcPrevGeo : register(t4); // maybe half-res
Texture2D<float4> srcMotionVec : register(t5);
Texture2D<half4> srcPrevGIAlbedo : register(t6);
Texture2D<unorm float> srcAccumFrames : register(t7);

RWTexture2D<float3> outRadianceDisocc : register(u0);
RWTexture2D<uint> outAccumFrames : register(u1);
RWTexture2D<unorm float> outAccumFrames : register(u1);
RWTexture2D<float4> outRemappedPrevGI : register(u2);

#if (defined(GI) && defined(GI_BOUNCE)) || defined(TEMPORAL_DENOISER)
Expand All @@ -26,7 +27,7 @@ RWTexture2D<float4> outRemappedPrevGI : register(u2);

float2 prev_uv = uv;
#ifdef REPROJECTION
prev_uv += FULLRES_LOAD(srcMotionVec, pixCoord, uv, samplerLinearClamp);
prev_uv += FULLRES_LOAD(srcMotionVec, pixCoord, uv, samplerLinearClamp).xy;
#endif
float2 prev_screen_pos = ConvertToStereoUV(prev_uv, eyeIndex);

Expand All @@ -36,16 +37,23 @@ RWTexture2D<float4> outRemappedPrevGI : register(u2);

#ifdef REPROJECTION
if ((curr_depth <= DepthFadeRange.y) && !(any(prev_screen_pos < 0) || any(prev_screen_pos > 1))) {
float3 curr_normal = DecodeNormal(FULLRES_LOAD(srcCurrNormal, pixCoord, uv, samplerLinearClamp).xy);
curr_normal = ViewToWorldVector(curr_normal, InvViewMatrix[eyeIndex]);
float3 curr_pos = ScreenToViewPosition(screen_pos, curr_depth, eyeIndex);
curr_pos = ViewToWorldPosition(curr_pos, InvViewMatrix[eyeIndex]);

const float prev_depth = srcPrevDepth.SampleLevel(samplerPointClamp, prev_uv * res_scale, 0);
const half3 prev_geo = srcPrevGeo.SampleLevel(samplerPointClamp, prev_uv * res_scale, 0);
const float prev_depth = prev_geo.x;
const float3 prev_normal = DecodeNormal(prev_geo.yz); // prev normal is already world
float3 prev_pos = ScreenToViewPosition(prev_screen_pos, prev_depth, eyeIndex);
prev_pos = ViewToWorldPosition(prev_pos, PrevInvViewMat[eyeIndex]);

float3 delta_pos = curr_pos - prev_pos;
float normal_prod = dot(curr_normal, prev_normal);

bool depth_pass = dot(delta_pos, delta_pos) < DepthDisocclusion * DepthDisocclusion;
valid_history = depth_pass;
bool normal_pass = normal_prod * normal_prod > NormalDisocclusion;
valid_history = depth_pass && normal_pass;
}
#endif

Expand All @@ -66,7 +74,7 @@ RWTexture2D<float4> outRemappedPrevGI : register(u2);

half3 radiance = 0;
#ifdef GI
radiance = FULLRES_LOAD(srcDiffuse, pixCoord, uv, samplerLinearClamp);
radiance = FULLRES_LOAD(srcDiffuse, pixCoord, uv, samplerLinearClamp).rgb;
# ifdef GI_BOUNCE
radiance += prev_gi_albedo.rgb * GIBounceFade;
# endif
Expand All @@ -76,10 +84,10 @@ RWTexture2D<float4> outRemappedPrevGI : register(u2);
#ifdef TEMPORAL_DENOISER
uint accum_frames = 0;
[branch] if (valid_history)
accum_frames = outAccumFrames[pixCoord];
accum_frames = srcAccumFrames.SampleLevel(samplerLinearClamp, prev_uv * res_scale, 0) * 255;
accum_frames = min(accum_frames + 1, MaxAccumFrames);

outAccumFrames[pixCoord] = accum_frames;
outAccumFrames[pixCoord] = accum_frames / 255.0;
outRemappedPrevGI[pixCoord] = prev_gi;
#endif
}
Loading

0 comments on commit 72e4ff6

Please sign in to comment.