From f20158d5a35e0ceefde3564dbfa1108c460e94a3 Mon Sep 17 00:00:00 2001 From: dzhdan Date: Mon, 15 Apr 2024 17:01:50 +0800 Subject: [PATCH] v4.7.0: HIGHLIGHTS: - REBLUR: minor performance optimization - SIGMA: numerous improvements and bug fixes - SIGMA: temporal stabilization pass can be disabled, what makes SIGMA more useful for per-light shadow denoising if lights are many DETAILS: - REBLUR: number of steps in "prev-prev" test reduced to 1 (was 2) - SIGMA: taken kernel from REBLUR - SIGMA: per pixel rotations replaced with per frame rotations - SIGMA: tuned random rotators (affects REBLUR too) - SIGMA: improved "umbra in wide penumbra" behavior - SIGMA: clarified usage of "SIGMA_BackEnd_UnpackShadow" - SIGMA: reduced bias - SIGMA: removed "SigmaSettings::blurRadiusScale" - SIGMA: exposed "SigmaSettings::lightDirection" - SIGMA: fixed mismatched behavior between opaque and translucent shadows - SIGMA: anisotropic filtering support (currently only for directional light sources) - SIGMA: reduced flickering in areas with a small blur radius (if TS is on) - SIGMA: exposed "stabilizationStrength" (TS pass is disabled if 0) - SIGMA: reduced umbra blurring when in wide penumbra - NRD INTEGRATION: bug fixes for debug logging - Updated deps - Updated docs --- External/MathLib | 2 +- Include/NRD.h | 6 +- Include/NRDDescs.h | 2 +- Include/NRDSettings.h | 12 +- Integration/NRDIntegration.h | 4 +- Integration/NRDIntegration.hpp | 11 +- README.md | 19 +-- Resources/Version.h | 4 +- Shaders/Include/Common.hlsli | 28 ++++ Shaders/Include/NRD.hlsli | 25 ++-- Shaders/Include/REBLUR_Common.hlsli | 29 +--- Shaders/Include/REBLUR_Config.hlsli | 2 +- Shaders/Include/SIGMA_Blur.hlsli | 136 ++++++++++-------- Shaders/Include/SIGMA_ClassifyTiles.hlsli | 7 +- Shaders/Include/SIGMA_Common.hlsli | 21 ++- Shaders/Include/SIGMA_Config.hlsli | 14 +- Shaders/Include/SIGMA_SplitScreen.hlsli | 6 +- .../Include/SIGMA_TemporalStabilization.hlsli | 13 +- Source/Denoisers/Sigma_Shadow.hpp | 21 +-- Source/Denoisers/Sigma_ShadowTranslucency.hpp | 21 +-- Source/InstanceImpl.cpp | 8 +- Source/Sigma.cpp | 27 ++-- UPDATE.md | 10 ++ 23 files changed, 239 insertions(+), 189 deletions(-) diff --git a/External/MathLib b/External/MathLib index 407ecd0..310266c 160000 --- a/External/MathLib +++ b/External/MathLib @@ -1 +1 @@ -Subproject commit 407ecd0d1892d12ee1ec98c3d46cbeed73b79a0d +Subproject commit 310266c8cec4dd5408485c4ae9ffe6567e5e0683 diff --git a/Include/NRD.h b/Include/NRD.h index 06e2aa8..3efed55 100644 --- a/Include/NRD.h +++ b/Include/NRD.h @@ -28,9 +28,9 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #include #define NRD_VERSION_MAJOR 4 -#define NRD_VERSION_MINOR 6 -#define NRD_VERSION_BUILD 1 -#define NRD_VERSION_DATE "25 March 2024" +#define NRD_VERSION_MINOR 7 +#define NRD_VERSION_BUILD 0 +#define NRD_VERSION_DATE "17 April 2024" #if defined(_MSC_VER) #define NRD_CALL __fastcall diff --git a/Include/NRDDescs.h b/Include/NRDDescs.h index 770301e..e38bef3 100644 --- a/Include/NRDDescs.h +++ b/Include/NRDDescs.h @@ -11,7 +11,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #pragma once #define NRD_DESCS_VERSION_MAJOR 4 -#define NRD_DESCS_VERSION_MINOR 6 +#define NRD_DESCS_VERSION_MINOR 7 static_assert(NRD_VERSION_MAJOR == NRD_DESCS_VERSION_MAJOR && NRD_VERSION_MINOR == NRD_DESCS_VERSION_MINOR, "Please, update all NRD SDK files"); diff --git a/Include/NRDSettings.h b/Include/NRDSettings.h index cd48012..f0f5a8a 100644 --- a/Include/NRDSettings.h +++ b/Include/NRDSettings.h @@ -11,7 +11,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #pragma once #define NRD_SETTINGS_VERSION_MAJOR 4 -#define NRD_SETTINGS_VERSION_MINOR 6 +#define NRD_SETTINGS_VERSION_MINOR 7 static_assert(NRD_VERSION_MAJOR == NRD_SETTINGS_VERSION_MAJOR && NRD_VERSION_MINOR == NRD_SETTINGS_VERSION_MINOR, "Please, update all NRD SDK files"); @@ -223,6 +223,7 @@ namespace nrd float responsiveAccumulationRoughnessThreshold = 0.0f; // (normalized %) - stabilizes output, more stabilization improves antilag (clean signals can use lower values) + // 0 - disables the stabilization pass float stabilizationStrength = 1.0f; // (normalized %) - represents maximum allowed deviation from local tangent plane @@ -260,11 +261,16 @@ namespace nrd struct SigmaSettings { + // Direction to the light source + // IMPORTANT: it is needed only for directional light sources (sun) + float lightDirection[3] = {0.0f, 0.0f, 0.0f}; + // (normalized %) - represents maximum allowed deviation from local tangent plane float planeDistanceSensitivity = 0.005f; - // [1; 3] - adds bias and stability if > 1 - float blurRadiusScale = 2.0f; + // (normalized %) - stabilizes output, more stabilization improves antilag (clean signals can use lower values) + // 0 - disables the stabilization pass and makes denoising spatial only (no history) + float stabilizationStrength = 1.0f; }; // RELAX diff --git a/Integration/NRDIntegration.h b/Integration/NRDIntegration.h index 24e5767..a782817 100644 --- a/Integration/NRDIntegration.h +++ b/Integration/NRDIntegration.h @@ -23,8 +23,8 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #include #define NRD_INTEGRATION_MAJOR 1 -#define NRD_INTEGRATION_MINOR 11 -#define NRD_INTEGRATION_DATE "19 March 2024" +#define NRD_INTEGRATION_MINOR 12 +#define NRD_INTEGRATION_DATE "17 April 2024" #define NRD_INTEGRATION 1 #define NRD_INTEGRATION_DEBUG_LOGGING 0 diff --git a/Integration/NRDIntegration.hpp b/Integration/NRDIntegration.hpp index 4ac4f2c..2018dc9 100644 --- a/Integration/NRDIntegration.hpp +++ b/Integration/NRDIntegration.hpp @@ -312,7 +312,7 @@ void NrdIntegration::CreateResources(uint16_t resourceWidth, uint16_t resourceHe m_TransientPoolSize += memoryDesc.size; #if( NRD_INTEGRATION_DEBUG_LOGGING == 1 ) - printf("%s %ux%u format=%u mips=%u\n", name, nrdTextureDesc.width, nrdTextureDesc.height, nrdTextureDesc.format, nrdTextureDesc.mipNum); + printf("%s format=%u downsampleFactor=%u\n", name, nrdTextureDesc.format, nrdTextureDesc.downsampleFactor); #endif } @@ -403,7 +403,7 @@ void NrdIntegration::NewFrame() NRD_INTEGRATION_ASSERT(m_Instance, "Uninitialized! Did you forget to call 'Initialize'?"); #if( NRD_INTEGRATION_DEBUG_LOGGING == 1 ) - printf("%s (frame %u) ==============================================================================\n\n", m_Name, frameIndex); + printf("%s (frame %u) ==============================================================================\n\n", m_Name, m_FrameIndex); #endif m_DescriptorPoolIndex = m_FrameIndex % m_BufferedFramesNum; @@ -616,12 +616,7 @@ void NrdIntegration::Dispatch(nri::CommandBuffer& commandBuffer, nri::Descriptor if( r.type == nrd::ResourceType::PERMANENT_POOL ) printf("P(%u) ", r.indexInPool); else if( r.type == nrd::ResourceType::TRANSIENT_POOL ) - { - if (r.mipNum != 1 || r.mipOffset != 0) - printf("T(%u)[%u:%u] ", r.indexInPool, r.mipOffset, r.mipNum); - else - printf("T(%u) ", r.indexInPool); - } + printf("T(%u) ", r.indexInPool); else { const char* s = nrd::GetResourceTypeString(r.type); diff --git a/README.md b/README.md index cf4dace..c7d10a3 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# NVIDIA REAL-TIME DENOISERS v4.6.1 (NRD) +# NVIDIA REAL-TIME DENOISERS v4.7.0 (NRD) [![Build NRD SDK](https://github.com/NVIDIAGameWorks/RayTracingDenoiser/actions/workflows/build.yml/badge.svg)](https://github.com/NVIDIAGameWorks/RayTracingDenoiser/actions/workflows/build.yml) @@ -18,7 +18,8 @@ For quick starting see *[NRD sample](https://github.com/NVIDIAGameWorks/NRDSampl Performance on RTX 4080 @ 1440p (native resolution, default denoiser settings): - `REBLUR_DIFFUSE_SPECULAR` - 2.45 ms - `RELAX_DIFFUSE_SPECULAR` - 2.90 ms -- `SIGMA_SHADOW` - 0.30 ms +- `SIGMA_SHADOW` - 0.30 ms (0.24 mns if temporal stabilization is off) +- `SIGMA_SHADOW_TRANSLUCENCY` - 0.40 ms (0.30 ms if temporal stabilization is off) Supported signal types: - *RELAX*: @@ -589,7 +590,7 @@ Denoising is not a panacea or miracle. Denoising works best with ray tracing res float3 preintegratedBRDF = PreintegratedBRDF( Rf0, N, V, roughness ) Denoising( specularRadiance * BRDF ) → NRD( specularRadiance * BRDF / preintegratedBRDF ) * preintegratedBRDF -A good approximation for pre-integrated specular BRDF can be found *[here](https://github.com/NVIDIAGameWorks/Falcor/blob/056f7b7c73b69fa8140d211bbf683ddf297a2ae0/Source/Falcor/Rendering/Materials/Microfacet.slang#L213)*. +A good approximation for pre-integrated specular BRDF can be found *[here](https://github.com/NVIDIAGameWorks/MathLib/blob/407ecd0d1892d12ee1ec98c3d46cbeed73b79a0d/STL.hlsli#L2147*. Pre-integrated specular BRDF can also be referenced as "specular albedo" or "environment BRDF". ## COMBINED DENOISING OF DIRECT AND INDIRECT LIGHTING @@ -710,7 +711,7 @@ Hair strands tangent vectors *can't* be used as "normals guide" for *NRD* due to **[NRD]** Hit distances should come from an importance sampling method. But if denoising of AO/SO is needed, AO/SO can come from cos-weighted (or VNDF) sampling in a tradeoff of IQ. -**[NRD]** Low discrepancy sampling (blue noise) helps to have more stable output in 0.5-1 rpp mode. It's a must for REBLUR-based Ambient and Specular Occlusion denoisers and SIGMA. +**[NRD]** Low discrepancy sampling (blue noise) helps to get more stable output in 0.5-1 rpp mode. It's a must for REBLUR-based Ambient and Specular Occlusion denoisers and SIGMA. **[NRD]** It's recommended to set `CommonSettings::accumulationMode` to `RESET` for a single frame, if a history reset is needed. If history buffers are recreated or contain garbage, it's recommended to use `CLEAR_AND_RESET` for a single frame. `CLEAR_AND_RESET` is not free because clearing is done in a compute shader. Render target clears on the application side should be prioritized over this solution. @@ -742,13 +743,11 @@ maxAccumulatedFrameNum > maxFastAccumulatedFrameNum > historyFixFrameNum **[REBLUR]** *REBLUR* expects hit distances in a normalized form. To avoid mismatching, `REBLUR_FrontEnd_GetNormHitDist` must be used for normalization. Normalization parameters should be passed into *NRD* as `HitDistanceParameters` for internal hit distance denormalization. Some tweaking can be needed here, but in most cases default `HitDistanceParameters` works well. *REBLUR* outputs denoised normalized hit distance, which can be used by the application as ambient or specular occlusion (AO & SO) (see unpacking functions from `NRD.hlsli`). -**[REBLUR]** Intensity antilag parameters need to be carefully tuned. The defaults are good but `AntilagIntensitySettings::sensitivityToDarkness` needs to be tuned for a given HDR range. Initial integration should work with intensity antilag turned off. - -**[REBLUR]** Even if antilag is off, it's recommended to tune `AntilagIntensitySettings::sensitivityToDarkness`, because it is used for error estimation. +**[REBLUR/RELAX]** Antilag parameters need to be carefully tuned. Initial integration should be done with disabled antilag. **[RELAX]** *RELAX* works well with signals produced by *RTXDI* or very clean high RPP signals. The Sweet Home of *RELAX* is *RTXDI* sample. Please, consider getting familiar with this application. -**[SIGMA]** Using "blue" noise can help to avoid shadow shimmering, it works best if the pattern is static on the screen. Additionally, `blurRadiusScale` can be set to `2-4` to mitigate such problems in complicated cases. +**[SIGMA]** Using "blue" noise can help to avoid shadow shimmering. It works best if the pattern is static on the screen. **[SIGMA]** *SIGMA_TRANSLUCENT_SHADOW* can be used for shadow denoising from multiple light sources: @@ -798,3 +797,7 @@ Is this a biased solution? If spatial filtering is off - no, because we just reo - if shadows overlap, a separate pass is needed to analyze noisy input and classify pixels as *umbra* - *penumbra* (and optionally *empty space*). Raster shadow maps can be used for this if available - it is not recommended to mix 1 cd and 100000 cd lights, since FP32 texture will be needed for a weighted sum. In this case, it's better to process the sun and other bright light sources separately. + +**[SIGMA]** *SIGMA* can be used for multi-light shadow denoising if applied "per light". `SigmaSettings::stabilizationStrength` can be set to `0` to disable temporal history. It provides the followinmg benefits: + - light count independent memory usage + - no need to manage history buffers for lights diff --git a/Resources/Version.h b/Resources/Version.h index 9987937..02ca3ba 100644 --- a/Resources/Version.h +++ b/Resources/Version.h @@ -22,7 +22,7 @@ Versioning rules: */ #define VERSION_MAJOR 4 -#define VERSION_MINOR 6 -#define VERSION_BUILD 1 +#define VERSION_MINOR 7 +#define VERSION_BUILD 0 #define VERSION_STRING STR(VERSION_MAJOR.VERSION_MINOR.VERSION_BUILD encoding=NRD_NORMAL_ENCODING.NRD_ROUGHNESS_ENCODING) diff --git a/Shaders/Include/Common.hlsli b/Shaders/Include/Common.hlsli index 504e1d6..176c9ea 100644 --- a/Shaders/Include/Common.hlsli +++ b/Shaders/Include/Common.hlsli @@ -129,6 +129,34 @@ Usage: printf(__VA_ARGS__) #endif +//================================================================================================================== +// KERNELS +//================================================================================================================== + +static const float3 g_Special6[ 6 ] = +{ + // https://www.desmos.com/calculator/e5mttzlg6v + float3( -0.50 * sqrt( 3.0 ) , -0.50 , 1.0 ), + float3( 0.00 , 1.00 , 1.0 ), + float3( 0.50 * sqrt( 3.0 ) , -0.50 , 1.0 ), + float3( 0.00 , -0.30 , 0.3 ), + float3( 0.15 * sqrt( 3.0 ) , 0.15 , 0.3 ), + float3( -0.15 * sqrt( 3.0 ) , 0.15 , 0.3 ), +}; + +static const float3 g_Special8[ 8 ] = +{ + // https://www.desmos.com/calculator/abaqyvswem + float3( -1.00 , 0.00 , 1.0 ), + float3( 0.00 , 1.00 , 1.0 ), + float3( 1.00 , 0.00 , 1.0 ), + float3( 0.00 , -1.00 , 1.0 ), + float3( -0.25 * sqrt( 2.0 ) , 0.25 * sqrt( 2.0 ) , 0.5 ), + float3( 0.25 * sqrt( 2.0 ) , 0.25 * sqrt( 2.0 ) , 0.5 ), + float3( 0.25 * sqrt( 2.0 ) , -0.25 * sqrt( 2.0 ) , 0.5 ), + float3( -0.25 * sqrt( 2.0 ) , -0.25 * sqrt( 2.0 ) , 0.5 ) +}; + //================================================================================================================== // SHARED FUNCTIONS //================================================================================================================== diff --git a/Shaders/Include/NRD.hlsli b/Shaders/Include/NRD.hlsli index 787051a..9c37039 100644 --- a/Shaders/Include/NRD.hlsli +++ b/Shaders/Include/NRD.hlsli @@ -8,7 +8,7 @@ distribution of this software and related documentation without an express license agreement from NVIDIA CORPORATION is strictly prohibited. */ -// NRD v4.6 +// NRD v4.7 //================================================================================================================================= // INPUT PARAMETERS @@ -56,7 +56,7 @@ float distanceToOccluder: - distance to occluder, must follow the rules: - NoL <= 0 - 0 ( it's very important ) - NoL > 0 ( hit ) - hit distance - - NoL > 0 ( miss ) - NRD_FP16_MAX + - NoL > 0 ( miss ) - >= NRD_FP16_MAX float tanOfLightAngularRadius: - tan( lightAngularSize * 0.5 ) @@ -747,12 +747,12 @@ float2 SIGMA_FrontEnd_PackShadow( float viewZ, float distanceToOccluder, float t r.x = 0.0; r.y = _NRD_PackViewZ( viewZ ); - if( distanceToOccluder == NRD_FP16_MAX ) + if( distanceToOccluder >= NRD_FP16_MAX ) r.x = NRD_FP16_MAX; else if( distanceToOccluder != 0.0 ) { - float distanceToOccluderProj = distanceToOccluder * tanOfLightAngularRadius; - r.x = min( distanceToOccluderProj, 32768.0 ); + float penumbraRadius = distanceToOccluder * tanOfLightAngularRadius; + r.x = min( penumbraRadius, 32768.0 ); } return r; @@ -762,7 +762,7 @@ float2 SIGMA_FrontEnd_PackShadow( float viewZ, float distanceToOccluder, float t float2 SIGMA_FrontEnd_PackShadow( float viewZ, float distanceToOccluder, float tanOfLightAngularRadius, float3 translucency, out float4 out2 ) { // IN_SHADOW_TRANSLUCENCY - out2.x = float( distanceToOccluder == NRD_FP16_MAX ); + out2.x = float( distanceToOccluder >= NRD_FP16_MAX ); out2.yzw = saturate( translucency ); // IN_SHADOWDATA @@ -879,16 +879,11 @@ NRD_SG RELAX_BackEnd_UnpackSh( float4 sh0, float4 sh1 ) //================================================================================================================================= // OUT_SHADOW_TRANSLUCENCY => X -// SIGMA_SHADOW: -// float shadowData = SIGMA_BackEnd_UnpackShadow( shadowData ); -// shadow = shadowData; +// SIGMA_SHADOW / SIGMA_SHADOW_TRANSLUCENCY: +// float shadow = SIGMA_BackEnd_UnpackShadow( OUT_SHADOW_TRANSLUCENCY ); // SIGMA_SHADOW_TRANSLUCENCY: -// float4 shadowData = SIGMA_BackEnd_UnpackShadow( shadowData ); -// float3 finalShadowCommon = lerp( shadowData.yzw, 1.0, shadowData.x ); // or -// float3 finalShadowExotic = shadowData.yzw * shadowData.x; // or -// float3 finalShadowMoreExotic = shadowData.yzw; -// IMPORTANT: use "^ 3" to compensate over-blurring ( it really makes the result closer to the reference ) -#define SIGMA_BackEnd_UnpackShadow( color ) ( color * color * color ) +// float3 translucentShadow = SIGMA_BackEnd_UnpackShadow( OUT_SHADOW_TRANSLUCENCY ).yzw; +#define SIGMA_BackEnd_UnpackShadow( shadow ) ( shadow * shadow ) //================================================================================================================================= // BACK-END - HIGH QUALITY RESOLVE diff --git a/Shaders/Include/REBLUR_Common.hlsli b/Shaders/Include/REBLUR_Common.hlsli index a3b3321..cc0ecb0 100644 --- a/Shaders/Include/REBLUR_Common.hlsli +++ b/Shaders/Include/REBLUR_Common.hlsli @@ -14,31 +14,6 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #define REBLUR_BLUR 1 #define REBLUR_POST_BLUR 2 -// Kernels -static const float3 g_Special6[ 6 ] = -{ - // https://www.desmos.com/calculator/e5mttzlg6v - float3( -0.50 * sqrt( 3.0 ) , -0.50 , 1.0 ), - float3( 0.00 , 1.00 , 1.0 ), - float3( 0.50 * sqrt( 3.0 ) , -0.50 , 1.0 ), - float3( 0.00 , -0.30 , 0.3 ), - float3( 0.15 * sqrt( 3.0 ) , 0.15 , 0.3 ), - float3( -0.15 * sqrt( 3.0 ) , 0.15 , 0.3 ), -}; - -static const float3 g_Special8[ 8 ] = -{ - // https://www.desmos.com/calculator/abaqyvswem - float3( -1.00 , 0.00 , 1.0 ), - float3( 0.00 , 1.00 , 1.0 ), - float3( 1.00 , 0.00 , 1.0 ), - float3( 0.00 , -1.00 , 1.0 ), - float3( -0.25 * sqrt( 2.0 ) , 0.25 * sqrt( 2.0 ) , 0.5 ), - float3( 0.25 * sqrt( 2.0 ) , 0.25 * sqrt( 2.0 ) , 0.5 ), - float3( 0.25 * sqrt( 2.0 ) , -0.25 * sqrt( 2.0 ) , 0.5 ), - float3( -0.25 * sqrt( 2.0 ) , -0.25 * sqrt( 2.0 ) , 0.5 ) -}; - // Storage #define REBLUR_MAX_ACCUM_FRAME_NUM 63.0 @@ -360,7 +335,9 @@ float2x3 GetKernelBasis( float3 D, float3 N, float NoD, float roughness = 1.0, f B = cross( R, T ); float skewFactor = lerp( 0.5 + 0.5 * roughness, 1.0, NoD ); - T *= lerp( skewFactor, 1.0, anisoFade ); + skewFactor = lerp( skewFactor, 1.0, anisoFade ); + + T *= skewFactor; // TODO: B /= skewFactor? } return float2x3( T, B ); diff --git a/Shaders/Include/REBLUR_Config.hlsli b/Shaders/Include/REBLUR_Config.hlsli index bb48def..85c858a 100644 --- a/Shaders/Include/REBLUR_Config.hlsli +++ b/Shaders/Include/REBLUR_Config.hlsli @@ -68,7 +68,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #define REBLUR_HIT_DIST_MIN_WEIGHT( smc ) ( 0.1 * smc ) // was 0.1 #define REBLUR_MAX_PERCENT_OF_LOBE_VOLUME 0.75 -#define REBLUR_VIRTUAL_MOTION_PREV_PREV_WEIGHT_ITERATION_NUM 2 +#define REBLUR_VIRTUAL_MOTION_PREV_PREV_WEIGHT_ITERATION_NUM 1 #define REBLUR_COLOR_CLAMPING_SIGMA_SCALE 2.0 // using smaller values leads to bias if camera rotates slowly due to reprojection instabilities #define REBLUR_FIREFLY_SUPPRESSOR_MAX_RELATIVE_INTENSITY float2( 10.0, 1.1 ) #define REBLUR_FIREFLY_SUPPRESSOR_RADIUS_SCALE 0.1 diff --git a/Shaders/Include/SIGMA_Blur.hlsli b/Shaders/Include/SIGMA_Blur.hlsli index 7bfb39d..98ecfb4 100644 --- a/Shaders/Include/SIGMA_Blur.hlsli +++ b/Shaders/Include/SIGMA_Blur.hlsli @@ -24,11 +24,11 @@ void Preload( uint2 sharedPos, int2 globalPos ) #if( !defined SIGMA_FIRST_PASS || defined SIGMA_TRANSLUCENT ) s = gIn_Shadow_Translucency[ globalPos ]; #else - s = float( data.x == NRD_FP16_MAX ); + s = IsLit( data.x ); #endif #ifndef SIGMA_FIRST_PASS - s = UnpackShadowSpecial( s ); + s = SIGMA_BackEnd_UnpackShadow( s ); #endif s_Shadow_Translucency[ sharedPos.y ][ sharedPos.x ] = s; @@ -58,7 +58,8 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : // Copy history #ifdef SIGMA_FIRST_PASS - gOut_History[ pixelPos ] = gIn_History[ pixelPos ]; + if( gStabilizationStrength != 0 ) + gOut_History[ pixelPos ] = gIn_History[ pixelPos ]; #endif // Tile-based early out ( potentially ) @@ -70,19 +71,11 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : if( ( tileValue == 0.0 && NRD_USE_TILE_CHECK ) || centerHitDist == 0.0 ) { - gOut_Shadow_Translucency[ pixelPos ] = PackShadow( s_Shadow_Translucency[ smemPos.y ][ smemPos.x ] ); gOut_Hit_ViewZ[ pixelPos ] = float2( 0.0, viewZ * NRD_FP16_VIEWZ_SCALE ); - - return; - } - - // Reference - #if( SIGMA_REFERENCE == 1 ) gOut_Shadow_Translucency[ pixelPos ] = PackShadow( s_Shadow_Translucency[ smemPos.y ][ smemPos.x ] ); - gOut_Hit_ViewZ[ pixelPos ] = float2( centerHitDist * centerSignNoL, viewZ * NRD_FP16_VIEWZ_SCALE ); return; - #endif + } // Position float3 Xv = STL::Geometry::ReconstructViewPosition( pixelUv, gFrustum, viewZ, gOrthoMode ); @@ -92,8 +85,12 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : float3 N = normalAndRoughness.xyz; float3 Nv = STL::Geometry::RotateVector( gWorldToView, N ); + // Parameters + float frustumSize = PixelRadiusToWorld( gUnproject, gOrthoMode, min( gRectSize.x, gRectSize.y ), viewZ ); // TODO: use GetFrustumSize + float2 geometryWeightParams = GetGeometryWeightParams( gPlaneDistSensitivity, frustumSize, Xv, Nv, 1.0 ); + // Estimate average distance to occluder - float sum = 0; + float2 sum = 0; float hitDist = 0; SIGMA_TYPE result = 0; @@ -104,9 +101,8 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : for( i = 0; i <= BORDER * 2; i++ ) { int2 pos = threadPos + int2( i, j ); - float2 data = s_Data[ pos.y ][ pos.x ]; - SIGMA_TYPE s = s_Shadow_Translucency[ pos.y ][ pos.x ]; + float2 data = s_Data[ pos.y ][ pos.x ]; float h = data.x; float signNoL = float( data.x != 0.0 ); float z = data.y; @@ -114,50 +110,67 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : float w = 1.0; if( !( i == BORDER && j == BORDER ) ) { - w = GetBilateralWeight( z, viewZ ); - w *= saturate( 1.0 - abs( centerSignNoL - signNoL ) ); + float2 uv = pixelUv + float2( i - BORDER, j - BORDER ) * gRectSizeInv; + float3 Xvs = STL::Geometry::ReconstructViewPosition( uv, gFrustum, z, gOrthoMode ); + float NoX = dot( Nv, Xvs ); + + w = ComputeWeight( NoX, geometryWeightParams.x, geometryWeightParams.y ); + w *= GetGaussianWeight( length( float2( i - BORDER, j - BORDER ) / BORDER ) ); + w *= float( z < gDenoisingRange ); + w *= float( centerSignNoL == signNoL ); } - result += s * w; - hitDist += h * float( s.x != 1.0 ) * w; - sum += w; + SIGMA_TYPE s = s_Shadow_Translucency[ pos.y ][ pos.x ]; + s = Denanify( w, s ); + + float2 ww = w; + ww.y *= float( s.x != 1.0 ); // TODO: what if s.x == 1.0, but h < NRD_FP16_MAX? + ww.y *= 1.0 / ( 1.0 + h * SIGMA_PENUMBRA_WEIGHT_SCALE ); // prefer smaller penumbra + + result += s * ww.x; + hitDist += h * ww.y; + sum += ww; } } - float invSum = 1.0 / sum; - result *= invSum; - hitDist *= invSum; + result /= sum.x; + hitDist /= max( sum.y, NRD_EPS ); // yes, without patching + + float invHitDist = 1.0 / max( hitDist, NRD_EPS ); // Blur radius float unprojectZ = PixelRadiusToWorld( gUnproject, gOrthoMode, 1.0, viewZ ); + float worldRadius = GetKernelRadiusInPixels( hitDist, unprojectZ ) * unprojectZ; + worldRadius *= tileValue; // helps to prevent blurring "inside" umbra + worldRadius /= SIGMA_SPATIAL_PASSES_NUM; - float innerShadowRadiusScale = lerp( 0.5, 1.0, result.x ); - float outerShadowRadiusScale = 1.0; // TODO: find a way to improve penumbra - float pixelRadius = innerShadowRadiusScale * outerShadowRadiusScale; - pixelRadius *= tileValue; - pixelRadius *= hitDist / unprojectZ; - pixelRadius *= gBlurRadiusScale; + // Tangent basis with anisotropy + float3x3 mWorldToLocal = STL::Geometry::GetBasis( Nv ); + float3 Tv = mWorldToLocal[ 0 ]; + float3 Bv = mWorldToLocal[ 1 ]; - float centerWeight = STL::Math::LinearStep( 0.9, 1.0, result.x ); - float penumbraFixWeight = lerp( saturate( pixelRadius / 1.5 ), 1.0, centerWeight ) * result.x; - pixelRadius += SIGMA_PENUMBRA_FIX_BLUR_RADIUS_ADDON * penumbraFixWeight; // TODO: improve + float3 t = cross( gLightDirectionView.xyz, Nv ); // TODO: add support for other light types to bring proper anisotropic filtering + if( length( t ) > 0.001 ) + { + Tv = normalize( t ); + Bv = cross( Tv, Nv ); - pixelRadius = min( pixelRadius, SIGMA_MAX_PIXEL_RADIUS ); + float cosa = abs( dot( Nv, gLightDirectionView.xyz ) ); + float skewFactor = lerp( 0.25, 1.0, cosa ); - // Tangent basis - float worldRadius = pixelRadius * unprojectZ; - float3x3 mWorldToLocal = STL::Geometry::GetBasis( Nv ); - float3 Tv = mWorldToLocal[ 0 ] * worldRadius; - float3 Bv = mWorldToLocal[ 1 ] * worldRadius; + //Tv *= skewFactor; // TODO: needed? + Bv /= skewFactor; + } + + Tv *= worldRadius; + Bv *= worldRadius; // Random rotation float4 rotator = GetBlurKernelRotation( SIGMA_ROTATOR_MODE, pixelPos, gRotator, gFrameIndex ); // Denoising - sum = 1.0; - - float frustumSize = PixelRadiusToWorld( gUnproject, gOrthoMode, min( gRectSize.x, gRectSize.y ), viewZ ); - float2 geometryWeightParams = GetGeometryWeightParams( gPlaneDistSensitivity, frustumSize, Xv, Nv, 1.0 ); + sum.x = 1.0; + sum.y = float( sum.y != 0.0 ); [unroll] for( uint n = 0; n < SIGMA_POISSON_SAMPLE_NUM; n++ ) @@ -184,40 +197,45 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : float w = IsInScreenNearest( uv ); w *= GetGaussianWeight( offset.z ); - w *= float( z < gDenoisingRange ); w *= ComputeWeight( NoX, geometryWeightParams.x, geometryWeightParams.y ); - w *= saturate( 1.0 - abs( centerSignNoL - signNoL ) ); + w *= float( z < gDenoisingRange ); + w *= float( centerSignNoL == signNoL ); + + // Avoid umbra leaking inside wide penumbra + float t = saturate( h * invHitDist ); + w *= STL::Math::LinearStep( 0.0, 0.1, t ); + // Fetch shadow SIGMA_TYPE s; #if( !defined SIGMA_FIRST_PASS || defined SIGMA_TRANSLUCENT ) s = gIn_Shadow_Translucency.SampleLevel( gNearestClamp, uvScaled, 0 ); #else - s = float( h == NRD_FP16_MAX ); + s = IsLit( h ); #endif s = Denanify( w, s ); #ifndef SIGMA_FIRST_PASS - s = UnpackShadowSpecial( s ); + s = SIGMA_BackEnd_UnpackShadow( s ); #endif - // Weight for outer shadow ( to avoid blurring of ~umbra ) - w *= lerp( 1.0, s.x, centerWeight ); - // Accumulate - sum += w; + float2 ww = w; + ww.y *= float( s.x != 1.0 ); // TODO: what if s.x == 1.0, but h < NRD_FP16_MAX? + ww.y *= 1.0 / ( 1.0 + h * SIGMA_PENUMBRA_WEIGHT_SCALE ); // prefer smaller penumbra - result += s * w; - hitDist += h * float( s.x != 1.0 ) * w; + result += s * ww.x; + hitDist += h * ww.y; + sum += ww; } - invSum = 1.0 / sum; - result *= invSum; - hitDist *= invSum; - - hitDist *= tileValue; - hitDist *= centerSignNoL; + result /= sum.x; + hitDist = sum.y == 0.0 ? centerHitDist : hitDist / sum.y; // Output + #ifndef SIGMA_FIRST_PASS + if( gStabilizationStrength != 0 ) + #endif + gOut_Hit_ViewZ[ pixelPos ] = float2( hitDist, viewZ * NRD_FP16_VIEWZ_SCALE ); + gOut_Shadow_Translucency[ pixelPos ] = PackShadow( result ); - gOut_Hit_ViewZ[ pixelPos ] = float2( hitDist, viewZ * NRD_FP16_VIEWZ_SCALE ); } diff --git a/Shaders/Include/SIGMA_ClassifyTiles.hlsli b/Shaders/Include/SIGMA_ClassifyTiles.hlsli index 062656e..8fcd422 100644 --- a/Shaders/Include/SIGMA_ClassifyTiles.hlsli +++ b/Shaders/Include/SIGMA_ClassifyTiles.hlsli @@ -40,7 +40,7 @@ NRD_EXPORT void NRD_CS_MAIN( uint2 threadPos : SV_GroupThreadId, uint2 tilePos : bool isInf = viewZ > gDenoisingRange; bool isShadow = data.x == 0; - bool isLit = data.x == NRD_FP16_MAX; + bool isLit = IsLit( data.x ); bool isOpaque = true; #ifdef SIGMA_TRANSLUCENT @@ -52,10 +52,9 @@ NRD_EXPORT void NRD_CS_MAIN( uint2 threadPos : SV_GroupThreadId, uint2 tilePos : mask += ( ( ( !isLit && isOpaque ) || isInf || isShadow ) ? 1 : 0 ) << 9; mask += ( isInf ? 1 : 0 ) << 18; - float worldRadius = ( isLit || isInf ) ? 0 : ( data.x * gBlurRadiusScale ); + float hitDist = ( isLit || isInf ) ? 0 : data.x; float unprojectZ = PixelRadiusToWorld( gUnproject, gOrthoMode, 1.0, viewZ ); - float pixelRadius = worldRadius * STL::Math::PositiveRcp( unprojectZ ); - pixelRadius = min( pixelRadius, SIGMA_MAX_PIXEL_RADIUS ); + float pixelRadius = GetKernelRadiusInPixels( hitDist, unprojectZ ); maxRadius = max( pixelRadius, maxRadius ); } diff --git a/Shaders/Include/SIGMA_Common.hlsli b/Shaders/Include/SIGMA_Common.hlsli index 321547e..b779b06 100644 --- a/Shaders/Include/SIGMA_Common.hlsli +++ b/Shaders/Include/SIGMA_Common.hlsli @@ -10,17 +10,16 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. // Misc -#define PackShadow( s ) STL::Math::Sqrt01( s ) -#define UnpackShadow( s ) ( s * s ) - -// TODO: shadow unpacking is less trivial -// 2.0 - closer to reference ( dictated by encoding ) -// 2.0 - s.x - looks better -#if 0 - #define UnpackShadowSpecial( s ) STL::Math::Pow01( s, 2.0 - s.x * ( 1 - SIGMA_REFERENCE ) ) -#else - #define UnpackShadowSpecial( s ) UnpackShadow( s ) -#endif +#define PackShadow( s ) STL::Math::Sqrt01( s ) // must match "SIGMA_BackEnd_UnpackShadow" +#define IsLit( h ) ( h >= NRD_FP16_MAX ) + +float GetKernelRadiusInPixels( float hitDist, float unprojectZ ) +{ + float pixelRadius = hitDist / unprojectZ; + pixelRadius = min( pixelRadius, SIGMA_MAX_PIXEL_RADIUS ); + + return pixelRadius; +} // TODO: move code below to STL.hlsl diff --git a/Shaders/Include/SIGMA_Config.hlsli b/Shaders/Include/SIGMA_Config.hlsli index 5be9710..f62ef56 100644 --- a/Shaders/Include/SIGMA_Config.hlsli +++ b/Shaders/Include/SIGMA_Config.hlsli @@ -16,17 +16,17 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #define SIGMA_5X5_BLUR_RADIUS_ESTIMATION_KERNEL 1 // helps to improve stability, but adds 10% of overhead // Switches ( default 0 ) -#define SIGMA_REFERENCE 0 // works better with 16-bit precision #define SIGMA_SHOW_TILES 0 +#define SIGMA_SHOW_PENUMBRA_SIZE 0 // Settings -#define SIGMA_ROTATOR_MODE NRD_PIXEL // NRD_FRAME? +#define SIGMA_ROTATOR_MODE NRD_FRAME #define SIGMA_POISSON_SAMPLE_NUM 8 -#define SIGMA_POISSON_SAMPLES g_Poisson8 +#define SIGMA_POISSON_SAMPLES g_Special8 #define SIGMA_MAX_PIXEL_RADIUS 32.0 -#define SIGMA_MIN_HIT_DISTANCE_OUTPUT 0.0001 -#define SIGMA_PENUMBRA_FIX_BLUR_RADIUS_ADDON 5.0 +#define SIGMA_PENUMBRA_WEIGHT_SCALE 10.0 #define SIGMA_MAX_SIGMA_SCALE 3.0 +#define SIGMA_SPATIAL_PASSES_NUM 2 #define SIGMA_TS_MOTION_MAX_REUSE 0.11 // Data type @@ -41,6 +41,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. NRD_CONSTANT( float4x4, gWorldToView ) \ NRD_CONSTANT( float4x4, gViewToClip ) \ NRD_CONSTANT( float4x4, gWorldToClipPrev ) \ + NRD_CONSTANT( float4, gLightDirectionView ) \ NRD_CONSTANT( float4, gFrustum ) \ NRD_CONSTANT( float4, gMvScale ) \ NRD_CONSTANT( float2, gResourceSizeInv ) \ @@ -58,8 +59,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. NRD_CONSTANT( float, gUnproject ) \ NRD_CONSTANT( float, gDenoisingRange ) \ NRD_CONSTANT( float, gPlaneDistSensitivity ) \ - NRD_CONSTANT( float, gBlurRadiusScale ) \ - NRD_CONSTANT( float, gContinueAccumulation ) \ + NRD_CONSTANT( float, gStabilizationStrength ) \ NRD_CONSTANT( float, gDebug ) \ NRD_CONSTANT( float, gSplitScreen ) \ NRD_CONSTANT( uint, gFrameIndex ) diff --git a/Shaders/Include/SIGMA_SplitScreen.hlsli b/Shaders/Include/SIGMA_SplitScreen.hlsli index ad75497..7f41e45 100644 --- a/Shaders/Include/SIGMA_SplitScreen.hlsli +++ b/Shaders/Include/SIGMA_SplitScreen.hlsli @@ -22,7 +22,11 @@ NRD_EXPORT void NRD_CS_MAIN( int2 pixelPos : SV_DispatchThreadId) #ifdef SIGMA_TRANSLUCENT s = gIn_Shadow_Translucency[ pixelPos ]; #else - s = float( data.x == NRD_FP16_MAX ); + s = IsLit( data.x ); + #endif + + #if( SIGMA_SHOW_PENUMBRA_SIZE == 1 ) + s.x = PackShadow( data.x ); #endif gOut_Shadow_Translucency[ pixelPos ] = s * float( viewZ < gDenoisingRange ); diff --git a/Shaders/Include/SIGMA_TemporalStabilization.hlsli b/Shaders/Include/SIGMA_TemporalStabilization.hlsli index 582ed11..3fbb865 100644 --- a/Shaders/Include/SIGMA_TemporalStabilization.hlsli +++ b/Shaders/Include/SIGMA_TemporalStabilization.hlsli @@ -21,7 +21,7 @@ void Preload( uint2 sharedPos, int2 globalPos ) s_Data[ sharedPos.y ][ sharedPos.x ] = data; SIGMA_TYPE s = gIn_Shadow_Translucency[ globalPos ]; - s = UnpackShadow( s ); + s = SIGMA_BackEnd_UnpackShadow( s ); s_Shadow_Translucency[ sharedPos.y ][ sharedPos.x ] = s; } @@ -120,7 +120,7 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : BicubicFilterNoCorners( saturate( pixelUvPrev ) * gRectSizePrev, gResourceSizeInvPrev, SIGMA_USE_CATROM, gIn_History, history ); history = max( history, 0.0 ); - history = UnpackShadow( history ); + history = SIGMA_BackEnd_UnpackShadow( history ); // Clamp history float2 a = m1.xx; @@ -148,16 +148,15 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : // History weight float isInScreen = IsInScreenNearest( pixelUvPrev ); float motionLength = length( pixelUvPrev - pixelUv ); - float2 historyWeight = 0.93 * lerp( 1.0, 0.7, ratioNorm ); // use FPS-dependent value, like 0.25 * FPS + float2 historyWeight = 0.93 * lerp( 1.0, 0.7, ratioNorm ); historyWeight = lerp( historyWeight, 0.1, saturate( motionLength / SIGMA_TS_MOTION_MAX_REUSE ) ); historyWeight *= isInScreen; - historyWeight *= gContinueAccumulation; + historyWeight *= gStabilizationStrength; // Reduce history in regions with hard shadows - float worldRadius = centerHitDist * gBlurRadiusScale; float unprojectZ = PixelRadiusToWorld( gUnproject, gOrthoMode, 1.0, viewZ ); - float pixelRadius = worldRadius * STL::Math::PositiveRcp( unprojectZ ); - historyWeight *= STL::Math::LinearStep( 0.0, 3.0, pixelRadius ); + float pixelRadius = GetKernelRadiusInPixels( centerHitDist, unprojectZ ); + historyWeight *= STL::Math::LinearStep( 0.0, 0.5, pixelRadius ); // Combine with current frame SIGMA_TYPE result; diff --git a/Source/Denoisers/Sigma_Shadow.hpp b/Source/Denoisers/Sigma_Shadow.hpp index 14f5b7a..08e5ca0 100644 --- a/Source/Denoisers/Sigma_Shadow.hpp +++ b/Source/Denoisers/Sigma_Shadow.hpp @@ -66,17 +66,22 @@ void nrd::InstanceImpl::Add_SigmaShadow(DenoiserData& denoiserData) AddDispatch( SIGMA_Shadow_Blur, SIGMA_Blur, USE_MAX_DIMS ); } - PushPass("Post-blur"); + for (int i = 0; i < SIGMA_POST_BLUR_PERMUTATION_NUM; i++) { - PushInput( AsUint(ResourceType::IN_NORMAL_ROUGHNESS) ); - PushInput( AsUint(Transient::DATA_1) ); - PushInput( AsUint(Transient::SMOOTHED_TILES) ); - PushInput( AsUint(Transient::TEMP_1) ); + bool isStabilizationEnabled = ( ( ( i >> 0 ) & 0x1 ) != 0 ); + + PushPass("Post-blur"); + { + PushInput( AsUint(ResourceType::IN_NORMAL_ROUGHNESS) ); + PushInput( AsUint(Transient::DATA_1) ); + PushInput( AsUint(Transient::SMOOTHED_TILES) ); + PushInput( AsUint(Transient::TEMP_1) ); - PushOutput( AsUint(Transient::DATA_2) ); - PushOutput( AsUint(Transient::TEMP_2) ); + PushOutput( AsUint(Transient::DATA_2) ); + PushOutput( isStabilizationEnabled ? AsUint(Transient::TEMP_2) : AsUint(ResourceType::OUT_SHADOW_TRANSLUCENCY) ); - AddDispatch( SIGMA_Shadow_PostBlur, SIGMA_Blur, 1 ); + AddDispatch( SIGMA_Shadow_PostBlur, SIGMA_Blur, 1 ); + } } PushPass("Temporal stabilization"); diff --git a/Source/Denoisers/Sigma_ShadowTranslucency.hpp b/Source/Denoisers/Sigma_ShadowTranslucency.hpp index 5cbe894..4cd7c9f 100644 --- a/Source/Denoisers/Sigma_ShadowTranslucency.hpp +++ b/Source/Denoisers/Sigma_ShadowTranslucency.hpp @@ -68,17 +68,22 @@ void nrd::InstanceImpl::Add_SigmaShadowTranslucency(nrd::DenoiserData& denoiserD AddDispatch( SIGMA_ShadowTranslucency_Blur, SIGMA_Blur, USE_MAX_DIMS ); } - PushPass("Post-blur"); + for (int i = 0; i < SIGMA_POST_BLUR_PERMUTATION_NUM; i++) { - PushInput( AsUint(ResourceType::IN_NORMAL_ROUGHNESS) ); - PushInput( AsUint(Transient::DATA_1) ); - PushInput( AsUint(Transient::SMOOTHED_TILES) ); - PushInput( AsUint(Transient::TEMP_1) ); + bool isStabilizationEnabled = ( ( ( i >> 0 ) & 0x1 ) != 0 ); + + PushPass("Post-blur"); + { + PushInput( AsUint(ResourceType::IN_NORMAL_ROUGHNESS) ); + PushInput( AsUint(Transient::DATA_1) ); + PushInput( AsUint(Transient::SMOOTHED_TILES) ); + PushInput( AsUint(Transient::TEMP_1) ); - PushOutput( AsUint(Transient::DATA_2) ); - PushOutput( AsUint(Transient::TEMP_2) ); + PushOutput( AsUint(Transient::DATA_2) ); + PushOutput( isStabilizationEnabled ? AsUint(Transient::TEMP_2) : AsUint(ResourceType::OUT_SHADOW_TRANSLUCENCY) ); - AddDispatch( SIGMA_ShadowTranslucency_PostBlur, SIGMA_Blur, 1 ); + AddDispatch( SIGMA_ShadowTranslucency_PostBlur, SIGMA_Blur, 1 ); + } } PushPass("Temporal stabilization"); diff --git a/Source/InstanceImpl.cpp b/Source/InstanceImpl.cpp index 58d6943..eeedd45 100644 --- a/Source/InstanceImpl.cpp +++ b/Source/InstanceImpl.cpp @@ -286,21 +286,19 @@ nrd::Result nrd::InstanceImpl::SetCommonSettings(const CommonSettings& commonSet memcpy(&m_CommonSettings, &commonSettings, sizeof(commonSettings)); // Rotators - float4 rndScale = float4(1.0f) + Rand::sf4(&m_FastRandState) * 0.25f; float4 rndAngle = Rand::uf4(&m_FastRandState) * DegToRad(360.0f); - rndAngle.w = DegToRad( 120.0f * float(m_CommonSettings.frameIndex % 3) ); float ca = Cos( rndAngle.x ); float sa = Sin( rndAngle.x ); - m_Rotator_PrePass = float4( ca, sa, -sa, ca ) * rndScale.x; + m_Rotator_PrePass = float4( ca, sa, -sa, ca ); ca = Cos( rndAngle.y ); sa = Sin( rndAngle.y ); - m_Rotator_Blur = float4( ca, sa, -sa, ca ) * rndScale.y; + m_Rotator_Blur = float4( ca, sa, -sa, ca ); ca = Cos( rndAngle.z ); sa = Sin( rndAngle.z ); - m_Rotator_PostBlur = float4( ca, sa, -sa, ca ) * rndScale.z; + m_Rotator_PostBlur = float4( ca, sa, -sa, ca ); // Main matrices m_ViewToClip = float4x4 diff --git a/Source/Sigma.cpp b/Source/Sigma.cpp index a677fd0..d7b2e40 100644 --- a/Source/Sigma.cpp +++ b/Source/Sigma.cpp @@ -17,16 +17,20 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #include "../Shaders/Resources/SIGMA_TemporalStabilization.resources.hlsli" #include "../Shaders/Resources/SIGMA_SplitScreen.resources.hlsli" +// Permutations +#define SIGMA_POST_BLUR_PERMUTATION_NUM 2 +#define SIGMA_NO_PERMUTATIONS 1 + void nrd::InstanceImpl::Update_SigmaShadow(const DenoiserData& denoiserData) { enum class Dispatch { CLASSIFY_TILES, - SMOOTH_TILES, - BLUR, - POST_BLUR, - TEMPORAL_STABILIZATION, - SPLIT_SCREEN, + SMOOTH_TILES = CLASSIFY_TILES + SIGMA_NO_PERMUTATIONS, + BLUR = SMOOTH_TILES + SIGMA_NO_PERMUTATIONS, + POST_BLUR = BLUR + SIGMA_NO_PERMUTATIONS, + TEMPORAL_STABILIZATION = POST_BLUR + SIGMA_POST_BLUR_PERMUTATION_NUM, + SPLIT_SCREEN = TEMPORAL_STABILIZATION + SIGMA_NO_PERMUTATIONS, }; const SigmaSettings& settings = denoiserData.settings.sigma; @@ -57,12 +61,15 @@ void nrd::InstanceImpl::Update_SigmaShadow(const DenoiserData& denoiserData) } { // POST_BLUR - SIGMA_BlurConstants* consts = (SIGMA_BlurConstants*)PushDispatch(denoiserData, AsUint(Dispatch::POST_BLUR)); + uint32_t passIndex = AsUint(Dispatch::POST_BLUR) + (settings.stabilizationStrength != 0.0f ? 1 : 0); + SIGMA_BlurConstants* consts = (SIGMA_BlurConstants*)PushDispatch(denoiserData, passIndex); AddSharedConstants_Sigma(settings, consts); consts->gRotator = m_Rotator_PostBlur; // TODO: push constant } - { // TEMPORAL_STABILIZATION + // TEMPORAL_STABILIZATION + if (settings.stabilizationStrength != 0.0f) + { void* consts = PushDispatch(denoiserData, AsUint(Dispatch::TEMPORAL_STABILIZATION)); AddSharedConstants_Sigma(settings, consts); } @@ -88,10 +95,13 @@ void nrd::InstanceImpl::AddSharedConstants_Sigma(const SigmaSettings& settings, uint16_t tilesW = DivideUp(rectW, 16); uint16_t tilesH = DivideUp(rectH, 16); + float3 lightDirectionView = Rotate(m_WorldToView, float3(settings.lightDirection[0], settings.lightDirection[1], settings.lightDirection[2])); + SharedConstants* consts = (SharedConstants*)data; consts->gWorldToView = m_WorldToView; consts->gViewToClip = m_ViewToClip; consts->gWorldToClipPrev = m_WorldToClipPrev; + consts->gLightDirectionView = float4(lightDirectionView.x, lightDirectionView.y, lightDirectionView.z, 0.0f); consts->gFrustum = m_Frustum; consts->gMvScale = float4(m_CommonSettings.motionVectorScale[0], m_CommonSettings.motionVectorScale[1], m_CommonSettings.motionVectorScale[2], m_CommonSettings.isMotionVectorInWorldSpace ? 1.0f : 0.0f); consts->gResourceSizeInv = float2(1.0f / float(resourceW), 1.0f / float(resourceH)); @@ -109,8 +119,7 @@ void nrd::InstanceImpl::AddSharedConstants_Sigma(const SigmaSettings& settings, consts->gUnproject = unproject; consts->gDenoisingRange = m_CommonSettings.denoisingRange; consts->gPlaneDistSensitivity = settings.planeDistanceSensitivity; - consts->gBlurRadiusScale = settings.blurRadiusScale; - consts->gContinueAccumulation = m_CommonSettings.accumulationMode != AccumulationMode::CONTINUE ? 0.0f : 1.0f; + consts->gStabilizationStrength = m_CommonSettings.accumulationMode == AccumulationMode::CONTINUE ? settings.stabilizationStrength : 0.0f; consts->gDebug = m_CommonSettings.debug; consts->gSplitScreen = m_CommonSettings.splitScreen; consts->gFrameIndex = m_CommonSettings.frameIndex; diff --git a/UPDATE.md b/UPDATE.md index 2553c96..36e38d7 100644 --- a/UPDATE.md +++ b/UPDATE.md @@ -224,3 +224,13 @@ A single NRD instance can now include any combination of denoisers, including re - *REBLUR*: - `blurRadius` renamed to `maxBlurRadius` - exposed `minBlurRadius` with the default value matching older versions + +## To v4.7 + +- *SIGMA*: + - removed `blurRadiusScale` + - exposed `lightDirection`, which is needed only for directional light sources + - exposed `stabilizationStrength' + - clarified usage: + - `float shadow = SIGMA_BackEnd_UnpackShadow( OUT_SHADOW_TRANSLUCENCY );` + - `float3 translucentShadow = SIGMA_BackEnd_UnpackShadow( OUT_SHADOW_TRANSLUCENCY ).yzw;`