Skip to content

Commit

Permalink
v4.7.0:
Browse files Browse the repository at this point in the history
HIGHLIGHTS:

- REBLUR: minor performance optimization
- SIGMA: numerous improvements and bug fixes
- SIGMA: temporal stabilization pass can be disabled, what makes SIGMA more useful for per-light shadow denoising if lights are many

DETAILS:

- REBLUR: number of steps in "prev-prev" test reduced to 1 (was 2)
- SIGMA: taken kernel from REBLUR
- SIGMA: per pixel rotations replaced with per frame rotations
- SIGMA: tuned random rotators (affects REBLUR too)
- SIGMA: improved "umbra in wide penumbra" behavior
- SIGMA: clarified usage of "SIGMA_BackEnd_UnpackShadow"
- SIGMA: reduced bias
- SIGMA: removed "SigmaSettings::blurRadiusScale"
- SIGMA: exposed "SigmaSettings::lightDirection"
- SIGMA: fixed mismatched behavior between opaque and translucent shadows
- SIGMA: anisotropic filtering support (currently only for directional light sources)
- SIGMA: reduced flickering in areas with a small blur radius (if TS is on)
- SIGMA: exposed "stabilizationStrength" (TS pass is disabled if 0)
- SIGMA: reduced umbra blurring when in wide penumbra
- NRD INTEGRATION: bug fixes for debug logging
- Updated deps
- Updated docs
  • Loading branch information
dzhdanNV committed Apr 15, 2024
1 parent db4f66f commit f20158d
Show file tree
Hide file tree
Showing 23 changed files with 239 additions and 189 deletions.
2 changes: 1 addition & 1 deletion External/MathLib
Submodule MathLib updated 1 files
+1 −1 External/sse2neon
6 changes: 3 additions & 3 deletions Include/NRD.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
#include <cstddef>

#define NRD_VERSION_MAJOR 4
#define NRD_VERSION_MINOR 6
#define NRD_VERSION_BUILD 1
#define NRD_VERSION_DATE "25 March 2024"
#define NRD_VERSION_MINOR 7
#define NRD_VERSION_BUILD 0
#define NRD_VERSION_DATE "17 April 2024"

#if defined(_MSC_VER)
#define NRD_CALL __fastcall
Expand Down
2 changes: 1 addition & 1 deletion Include/NRDDescs.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
#pragma once

#define NRD_DESCS_VERSION_MAJOR 4
#define NRD_DESCS_VERSION_MINOR 6
#define NRD_DESCS_VERSION_MINOR 7

static_assert(NRD_VERSION_MAJOR == NRD_DESCS_VERSION_MAJOR && NRD_VERSION_MINOR == NRD_DESCS_VERSION_MINOR, "Please, update all NRD SDK files");

Expand Down
12 changes: 9 additions & 3 deletions Include/NRDSettings.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
#pragma once

#define NRD_SETTINGS_VERSION_MAJOR 4
#define NRD_SETTINGS_VERSION_MINOR 6
#define NRD_SETTINGS_VERSION_MINOR 7

static_assert(NRD_VERSION_MAJOR == NRD_SETTINGS_VERSION_MAJOR && NRD_VERSION_MINOR == NRD_SETTINGS_VERSION_MINOR, "Please, update all NRD SDK files");

Expand Down Expand Up @@ -223,6 +223,7 @@ namespace nrd
float responsiveAccumulationRoughnessThreshold = 0.0f;

// (normalized %) - stabilizes output, more stabilization improves antilag (clean signals can use lower values)
// 0 - disables the stabilization pass
float stabilizationStrength = 1.0f;

// (normalized %) - represents maximum allowed deviation from local tangent plane
Expand Down Expand Up @@ -260,11 +261,16 @@ namespace nrd

struct SigmaSettings
{
// Direction to the light source
// IMPORTANT: it is needed only for directional light sources (sun)
float lightDirection[3] = {0.0f, 0.0f, 0.0f};

// (normalized %) - represents maximum allowed deviation from local tangent plane
float planeDistanceSensitivity = 0.005f;

// [1; 3] - adds bias and stability if > 1
float blurRadiusScale = 2.0f;
// (normalized %) - stabilizes output, more stabilization improves antilag (clean signals can use lower values)
// 0 - disables the stabilization pass and makes denoising spatial only (no history)
float stabilizationStrength = 1.0f;
};

// RELAX
Expand Down
4 changes: 2 additions & 2 deletions Integration/NRDIntegration.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
#include <map>

#define NRD_INTEGRATION_MAJOR 1
#define NRD_INTEGRATION_MINOR 11
#define NRD_INTEGRATION_DATE "19 March 2024"
#define NRD_INTEGRATION_MINOR 12
#define NRD_INTEGRATION_DATE "17 April 2024"
#define NRD_INTEGRATION 1

#define NRD_INTEGRATION_DEBUG_LOGGING 0
Expand Down
11 changes: 3 additions & 8 deletions Integration/NRDIntegration.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ void NrdIntegration::CreateResources(uint16_t resourceWidth, uint16_t resourceHe
m_TransientPoolSize += memoryDesc.size;

#if( NRD_INTEGRATION_DEBUG_LOGGING == 1 )
printf("%s %ux%u format=%u mips=%u\n", name, nrdTextureDesc.width, nrdTextureDesc.height, nrdTextureDesc.format, nrdTextureDesc.mipNum);
printf("%s format=%u downsampleFactor=%u\n", name, nrdTextureDesc.format, nrdTextureDesc.downsampleFactor);
#endif
}

Expand Down Expand Up @@ -403,7 +403,7 @@ void NrdIntegration::NewFrame()
NRD_INTEGRATION_ASSERT(m_Instance, "Uninitialized! Did you forget to call 'Initialize'?");

#if( NRD_INTEGRATION_DEBUG_LOGGING == 1 )
printf("%s (frame %u) ==============================================================================\n\n", m_Name, frameIndex);
printf("%s (frame %u) ==============================================================================\n\n", m_Name, m_FrameIndex);
#endif

m_DescriptorPoolIndex = m_FrameIndex % m_BufferedFramesNum;
Expand Down Expand Up @@ -616,12 +616,7 @@ void NrdIntegration::Dispatch(nri::CommandBuffer& commandBuffer, nri::Descriptor
if( r.type == nrd::ResourceType::PERMANENT_POOL )
printf("P(%u) ", r.indexInPool);
else if( r.type == nrd::ResourceType::TRANSIENT_POOL )
{
if (r.mipNum != 1 || r.mipOffset != 0)
printf("T(%u)[%u:%u] ", r.indexInPool, r.mipOffset, r.mipNum);
else
printf("T(%u) ", r.indexInPool);
}
printf("T(%u) ", r.indexInPool);
else
{
const char* s = nrd::GetResourceTypeString(r.type);
Expand Down
19 changes: 11 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# NVIDIA REAL-TIME DENOISERS v4.6.1 (NRD)
# NVIDIA REAL-TIME DENOISERS v4.7.0 (NRD)

[![Build NRD SDK](https://github.com/NVIDIAGameWorks/RayTracingDenoiser/actions/workflows/build.yml/badge.svg)](https://github.com/NVIDIAGameWorks/RayTracingDenoiser/actions/workflows/build.yml)

Expand All @@ -18,7 +18,8 @@ For quick starting see *[NRD sample](https://github.com/NVIDIAGameWorks/NRDSampl
Performance on RTX 4080 @ 1440p (native resolution, default denoiser settings):
- `REBLUR_DIFFUSE_SPECULAR` - 2.45 ms
- `RELAX_DIFFUSE_SPECULAR` - 2.90 ms
- `SIGMA_SHADOW` - 0.30 ms
- `SIGMA_SHADOW` - 0.30 ms (0.24 mns if temporal stabilization is off)
- `SIGMA_SHADOW_TRANSLUCENCY` - 0.40 ms (0.30 ms if temporal stabilization is off)

Supported signal types:
- *RELAX*:
Expand Down Expand Up @@ -589,7 +590,7 @@ Denoising is not a panacea or miracle. Denoising works best with ray tracing res
float3 preintegratedBRDF = PreintegratedBRDF( Rf0, N, V, roughness )
Denoising( specularRadiance * BRDF ) → NRD( specularRadiance * BRDF / preintegratedBRDF ) * preintegratedBRDF

A good approximation for pre-integrated specular BRDF can be found *[here](https://github.com/NVIDIAGameWorks/Falcor/blob/056f7b7c73b69fa8140d211bbf683ddf297a2ae0/Source/Falcor/Rendering/Materials/Microfacet.slang#L213)*.
A good approximation for pre-integrated specular BRDF can be found *[here](https://github.com/NVIDIAGameWorks/MathLib/blob/407ecd0d1892d12ee1ec98c3d46cbeed73b79a0d/STL.hlsli#L2147*. Pre-integrated specular BRDF can also be referenced as "specular albedo" or "environment BRDF".

## COMBINED DENOISING OF DIRECT AND INDIRECT LIGHTING

Expand Down Expand Up @@ -710,7 +711,7 @@ Hair strands tangent vectors *can't* be used as "normals guide" for *NRD* due to
**[NRD]** Hit distances should come from an importance sampling method. But if denoising of AO/SO is needed, AO/SO can come from cos-weighted (or VNDF) sampling in a tradeoff of IQ.
**[NRD]** Low discrepancy sampling (blue noise) helps to have more stable output in 0.5-1 rpp mode. It's a must for REBLUR-based Ambient and Specular Occlusion denoisers and SIGMA.
**[NRD]** Low discrepancy sampling (blue noise) helps to get more stable output in 0.5-1 rpp mode. It's a must for REBLUR-based Ambient and Specular Occlusion denoisers and SIGMA.
**[NRD]** It's recommended to set `CommonSettings::accumulationMode` to `RESET` for a single frame, if a history reset is needed. If history buffers are recreated or contain garbage, it's recommended to use `CLEAR_AND_RESET` for a single frame. `CLEAR_AND_RESET` is not free because clearing is done in a compute shader. Render target clears on the application side should be prioritized over this solution.
Expand Down Expand Up @@ -742,13 +743,11 @@ maxAccumulatedFrameNum > maxFastAccumulatedFrameNum > historyFixFrameNum
**[REBLUR]** *REBLUR* expects hit distances in a normalized form. To avoid mismatching, `REBLUR_FrontEnd_GetNormHitDist` must be used for normalization. Normalization parameters should be passed into *NRD* as `HitDistanceParameters` for internal hit distance denormalization. Some tweaking can be needed here, but in most cases default `HitDistanceParameters` works well. *REBLUR* outputs denoised normalized hit distance, which can be used by the application as ambient or specular occlusion (AO & SO) (see unpacking functions from `NRD.hlsli`).
**[REBLUR]** Intensity antilag parameters need to be carefully tuned. The defaults are good but `AntilagIntensitySettings::sensitivityToDarkness` needs to be tuned for a given HDR range. Initial integration should work with intensity antilag turned off.
**[REBLUR]** Even if antilag is off, it's recommended to tune `AntilagIntensitySettings::sensitivityToDarkness`, because it is used for error estimation.
**[REBLUR/RELAX]** Antilag parameters need to be carefully tuned. Initial integration should be done with disabled antilag.
**[RELAX]** *RELAX* works well with signals produced by *RTXDI* or very clean high RPP signals. The Sweet Home of *RELAX* is *RTXDI* sample. Please, consider getting familiar with this application.
**[SIGMA]** Using "blue" noise can help to avoid shadow shimmering, it works best if the pattern is static on the screen. Additionally, `blurRadiusScale` can be set to `2-4` to mitigate such problems in complicated cases.
**[SIGMA]** Using "blue" noise can help to avoid shadow shimmering. It works best if the pattern is static on the screen.
**[SIGMA]** *SIGMA_TRANSLUCENT_SHADOW* can be used for shadow denoising from multiple light sources:
Expand Down Expand Up @@ -798,3 +797,7 @@ Is this a biased solution? If spatial filtering is off - no, because we just reo
- if shadows overlap, a separate pass is needed to analyze noisy input and classify pixels as *umbra* - *penumbra* (and optionally *empty space*). Raster shadow maps can be used for this if available
- it is not recommended to mix 1 cd and 100000 cd lights, since FP32 texture will be needed for a weighted sum.
In this case, it's better to process the sun and other bright light sources separately.

**[SIGMA]** *SIGMA* can be used for multi-light shadow denoising if applied "per light". `SigmaSettings::stabilizationStrength` can be set to `0` to disable temporal history. It provides the followinmg benefits:
- light count independent memory usage
- no need to manage history buffers for lights
4 changes: 2 additions & 2 deletions Resources/Version.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Versioning rules:
*/

#define VERSION_MAJOR 4
#define VERSION_MINOR 6
#define VERSION_BUILD 1
#define VERSION_MINOR 7
#define VERSION_BUILD 0

#define VERSION_STRING STR(VERSION_MAJOR.VERSION_MINOR.VERSION_BUILD encoding=NRD_NORMAL_ENCODING.NRD_ROUGHNESS_ENCODING)
28 changes: 28 additions & 0 deletions Shaders/Include/Common.hlsli
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,34 @@ Usage:
printf(__VA_ARGS__)
#endif

//==================================================================================================================
// KERNELS
//==================================================================================================================

static const float3 g_Special6[ 6 ] =
{
// https://www.desmos.com/calculator/e5mttzlg6v
float3( -0.50 * sqrt( 3.0 ) , -0.50 , 1.0 ),
float3( 0.00 , 1.00 , 1.0 ),
float3( 0.50 * sqrt( 3.0 ) , -0.50 , 1.0 ),
float3( 0.00 , -0.30 , 0.3 ),
float3( 0.15 * sqrt( 3.0 ) , 0.15 , 0.3 ),
float3( -0.15 * sqrt( 3.0 ) , 0.15 , 0.3 ),
};

static const float3 g_Special8[ 8 ] =
{
// https://www.desmos.com/calculator/abaqyvswem
float3( -1.00 , 0.00 , 1.0 ),
float3( 0.00 , 1.00 , 1.0 ),
float3( 1.00 , 0.00 , 1.0 ),
float3( 0.00 , -1.00 , 1.0 ),
float3( -0.25 * sqrt( 2.0 ) , 0.25 * sqrt( 2.0 ) , 0.5 ),
float3( 0.25 * sqrt( 2.0 ) , 0.25 * sqrt( 2.0 ) , 0.5 ),
float3( 0.25 * sqrt( 2.0 ) , -0.25 * sqrt( 2.0 ) , 0.5 ),
float3( -0.25 * sqrt( 2.0 ) , -0.25 * sqrt( 2.0 ) , 0.5 )
};

//==================================================================================================================
// SHARED FUNCTIONS
//==================================================================================================================
Expand Down
25 changes: 10 additions & 15 deletions Shaders/Include/NRD.hlsli
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.
*/

// NRD v4.6
// NRD v4.7

//=================================================================================================================================
// INPUT PARAMETERS
Expand Down Expand Up @@ -56,7 +56,7 @@ float distanceToOccluder:
- distance to occluder, must follow the rules:
- NoL <= 0 - 0 ( it's very important )
- NoL > 0 ( hit ) - hit distance
- NoL > 0 ( miss ) - NRD_FP16_MAX
- NoL > 0 ( miss ) - >= NRD_FP16_MAX
float tanOfLightAngularRadius:
- tan( lightAngularSize * 0.5 )
Expand Down Expand Up @@ -747,12 +747,12 @@ float2 SIGMA_FrontEnd_PackShadow( float viewZ, float distanceToOccluder, float t
r.x = 0.0;
r.y = _NRD_PackViewZ( viewZ );

if( distanceToOccluder == NRD_FP16_MAX )
if( distanceToOccluder >= NRD_FP16_MAX )
r.x = NRD_FP16_MAX;
else if( distanceToOccluder != 0.0 )
{
float distanceToOccluderProj = distanceToOccluder * tanOfLightAngularRadius;
r.x = min( distanceToOccluderProj, 32768.0 );
float penumbraRadius = distanceToOccluder * tanOfLightAngularRadius;
r.x = min( penumbraRadius, 32768.0 );
}

return r;
Expand All @@ -762,7 +762,7 @@ float2 SIGMA_FrontEnd_PackShadow( float viewZ, float distanceToOccluder, float t
float2 SIGMA_FrontEnd_PackShadow( float viewZ, float distanceToOccluder, float tanOfLightAngularRadius, float3 translucency, out float4 out2 )
{
// IN_SHADOW_TRANSLUCENCY
out2.x = float( distanceToOccluder == NRD_FP16_MAX );
out2.x = float( distanceToOccluder >= NRD_FP16_MAX );
out2.yzw = saturate( translucency );

// IN_SHADOWDATA
Expand Down Expand Up @@ -879,16 +879,11 @@ NRD_SG RELAX_BackEnd_UnpackSh( float4 sh0, float4 sh1 )
//=================================================================================================================================

// OUT_SHADOW_TRANSLUCENCY => X
// SIGMA_SHADOW:
// float shadowData = SIGMA_BackEnd_UnpackShadow( shadowData );
// shadow = shadowData;
// SIGMA_SHADOW / SIGMA_SHADOW_TRANSLUCENCY:
// float shadow = SIGMA_BackEnd_UnpackShadow( OUT_SHADOW_TRANSLUCENCY );
// SIGMA_SHADOW_TRANSLUCENCY:
// float4 shadowData = SIGMA_BackEnd_UnpackShadow( shadowData );
// float3 finalShadowCommon = lerp( shadowData.yzw, 1.0, shadowData.x ); // or
// float3 finalShadowExotic = shadowData.yzw * shadowData.x; // or
// float3 finalShadowMoreExotic = shadowData.yzw;
// IMPORTANT: use "^ 3" to compensate over-blurring ( it really makes the result closer to the reference )
#define SIGMA_BackEnd_UnpackShadow( color ) ( color * color * color )
// float3 translucentShadow = SIGMA_BackEnd_UnpackShadow( OUT_SHADOW_TRANSLUCENCY ).yzw;
#define SIGMA_BackEnd_UnpackShadow( shadow ) ( shadow * shadow )

//=================================================================================================================================
// BACK-END - HIGH QUALITY RESOLVE
Expand Down
29 changes: 3 additions & 26 deletions Shaders/Include/REBLUR_Common.hlsli
Original file line number Diff line number Diff line change
Expand Up @@ -14,31 +14,6 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
#define REBLUR_BLUR 1
#define REBLUR_POST_BLUR 2

// Kernels
static const float3 g_Special6[ 6 ] =
{
// https://www.desmos.com/calculator/e5mttzlg6v
float3( -0.50 * sqrt( 3.0 ) , -0.50 , 1.0 ),
float3( 0.00 , 1.00 , 1.0 ),
float3( 0.50 * sqrt( 3.0 ) , -0.50 , 1.0 ),
float3( 0.00 , -0.30 , 0.3 ),
float3( 0.15 * sqrt( 3.0 ) , 0.15 , 0.3 ),
float3( -0.15 * sqrt( 3.0 ) , 0.15 , 0.3 ),
};

static const float3 g_Special8[ 8 ] =
{
// https://www.desmos.com/calculator/abaqyvswem
float3( -1.00 , 0.00 , 1.0 ),
float3( 0.00 , 1.00 , 1.0 ),
float3( 1.00 , 0.00 , 1.0 ),
float3( 0.00 , -1.00 , 1.0 ),
float3( -0.25 * sqrt( 2.0 ) , 0.25 * sqrt( 2.0 ) , 0.5 ),
float3( 0.25 * sqrt( 2.0 ) , 0.25 * sqrt( 2.0 ) , 0.5 ),
float3( 0.25 * sqrt( 2.0 ) , -0.25 * sqrt( 2.0 ) , 0.5 ),
float3( -0.25 * sqrt( 2.0 ) , -0.25 * sqrt( 2.0 ) , 0.5 )
};

// Storage

#define REBLUR_MAX_ACCUM_FRAME_NUM 63.0
Expand Down Expand Up @@ -360,7 +335,9 @@ float2x3 GetKernelBasis( float3 D, float3 N, float NoD, float roughness = 1.0, f
B = cross( R, T );

float skewFactor = lerp( 0.5 + 0.5 * roughness, 1.0, NoD );
T *= lerp( skewFactor, 1.0, anisoFade );
skewFactor = lerp( skewFactor, 1.0, anisoFade );

T *= skewFactor; // TODO: B /= skewFactor?
}

return float2x3( T, B );
Expand Down
2 changes: 1 addition & 1 deletion Shaders/Include/REBLUR_Config.hlsli
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
#define REBLUR_HIT_DIST_MIN_WEIGHT( smc ) ( 0.1 * smc ) // was 0.1

#define REBLUR_MAX_PERCENT_OF_LOBE_VOLUME 0.75
#define REBLUR_VIRTUAL_MOTION_PREV_PREV_WEIGHT_ITERATION_NUM 2
#define REBLUR_VIRTUAL_MOTION_PREV_PREV_WEIGHT_ITERATION_NUM 1
#define REBLUR_COLOR_CLAMPING_SIGMA_SCALE 2.0 // using smaller values leads to bias if camera rotates slowly due to reprojection instabilities
#define REBLUR_FIREFLY_SUPPRESSOR_MAX_RELATIVE_INTENSITY float2( 10.0, 1.1 )
#define REBLUR_FIREFLY_SUPPRESSOR_RADIUS_SCALE 0.1
Expand Down
Loading

0 comments on commit f20158d

Please sign in to comment.