From f20158d5a35e0ceefde3564dbfa1108c460e94a3 Mon Sep 17 00:00:00 2001
From: dzhdan <dzhdan@nvidia.com>
Date: Mon, 15 Apr 2024 17:01:50 +0800
Subject: [PATCH] v4.7.0:

HIGHLIGHTS:

- REBLUR: minor performance optimization
- SIGMA: numerous improvements and bug fixes
- SIGMA: temporal stabilization pass can be disabled, what makes SIGMA more useful for per-light shadow denoising if lights are many

DETAILS:

- REBLUR: number of steps in "prev-prev" test reduced to 1 (was 2)
- SIGMA: taken kernel from REBLUR
- SIGMA: per pixel rotations replaced with per frame rotations
- SIGMA: tuned random rotators (affects REBLUR too)
- SIGMA: improved "umbra in wide penumbra" behavior
- SIGMA: clarified usage of "SIGMA_BackEnd_UnpackShadow"
- SIGMA: reduced bias
- SIGMA: removed "SigmaSettings::blurRadiusScale"
- SIGMA: exposed "SigmaSettings::lightDirection"
- SIGMA: fixed mismatched behavior between opaque and translucent shadows
- SIGMA: anisotropic filtering support (currently only for directional light sources)
- SIGMA: reduced flickering in areas with a small blur radius (if TS is on)
- SIGMA: exposed "stabilizationStrength" (TS pass is disabled if 0)
- SIGMA: reduced umbra blurring when in wide penumbra
- NRD INTEGRATION: bug fixes for debug logging
- Updated deps
- Updated docs
---
 External/MathLib                              |   2 +-
 Include/NRD.h                                 |   6 +-
 Include/NRDDescs.h                            |   2 +-
 Include/NRDSettings.h                         |  12 +-
 Integration/NRDIntegration.h                  |   4 +-
 Integration/NRDIntegration.hpp                |  11 +-
 README.md                                     |  19 +--
 Resources/Version.h                           |   4 +-
 Shaders/Include/Common.hlsli                  |  28 ++++
 Shaders/Include/NRD.hlsli                     |  25 ++--
 Shaders/Include/REBLUR_Common.hlsli           |  29 +---
 Shaders/Include/REBLUR_Config.hlsli           |   2 +-
 Shaders/Include/SIGMA_Blur.hlsli              | 136 ++++++++++--------
 Shaders/Include/SIGMA_ClassifyTiles.hlsli     |   7 +-
 Shaders/Include/SIGMA_Common.hlsli            |  21 ++-
 Shaders/Include/SIGMA_Config.hlsli            |  14 +-
 Shaders/Include/SIGMA_SplitScreen.hlsli       |   6 +-
 .../Include/SIGMA_TemporalStabilization.hlsli |  13 +-
 Source/Denoisers/Sigma_Shadow.hpp             |  21 +--
 Source/Denoisers/Sigma_ShadowTranslucency.hpp |  21 +--
 Source/InstanceImpl.cpp                       |   8 +-
 Source/Sigma.cpp                              |  27 ++--
 UPDATE.md                                     |  10 ++
 23 files changed, 239 insertions(+), 189 deletions(-)
diff --git a/External/MathLib b/External/MathLib
index 407ecd0..310266c 160000
--- a/External/MathLib
+++ b/External/MathLib
@@ -1 +1 @@
-Subproject commit 407ecd0d1892d12ee1ec98c3d46cbeed73b79a0d
+Subproject commit 310266c8cec4dd5408485c4ae9ffe6567e5e0683
diff --git a/Include/NRD.h b/Include/NRD.h
index 06e2aa8..3efed55 100644
--- a/Include/NRD.h
+++ b/Include/NRD.h
@@ -28,9 +28,9 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
 #include <cstddef>
 
 #define NRD_VERSION_MAJOR 4
-#define NRD_VERSION_MINOR 6
-#define NRD_VERSION_BUILD 1
-#define NRD_VERSION_DATE "25 March 2024"
+#define NRD_VERSION_MINOR 7
+#define NRD_VERSION_BUILD 0
+#define NRD_VERSION_DATE "17 April 2024"
 
 #if defined(_MSC_VER)
     #define NRD_CALL __fastcall
diff --git a/Include/NRDDescs.h b/Include/NRDDescs.h
index 770301e..e38bef3 100644
--- a/Include/NRDDescs.h
+++ b/Include/NRDDescs.h
@@ -11,7 +11,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
 #pragma once
 
 #define NRD_DESCS_VERSION_MAJOR 4
-#define NRD_DESCS_VERSION_MINOR 6
+#define NRD_DESCS_VERSION_MINOR 7
 
 static_assert(NRD_VERSION_MAJOR == NRD_DESCS_VERSION_MAJOR && NRD_VERSION_MINOR == NRD_DESCS_VERSION_MINOR, "Please, update all NRD SDK files");
 
diff --git a/Include/NRDSettings.h b/Include/NRDSettings.h
index cd48012..f0f5a8a 100644
--- a/Include/NRDSettings.h
+++ b/Include/NRDSettings.h
@@ -11,7 +11,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
 #pragma once
 
 #define NRD_SETTINGS_VERSION_MAJOR 4
-#define NRD_SETTINGS_VERSION_MINOR 6
+#define NRD_SETTINGS_VERSION_MINOR 7
 
 static_assert(NRD_VERSION_MAJOR == NRD_SETTINGS_VERSION_MAJOR && NRD_VERSION_MINOR == NRD_SETTINGS_VERSION_MINOR, "Please, update all NRD SDK files");
 
@@ -223,6 +223,7 @@ namespace nrd
         float responsiveAccumulationRoughnessThreshold = 0.0f;
 
         // (normalized %) - stabilizes output, more stabilization improves antilag (clean signals can use lower values)
+        // 0 - disables the stabilization pass
         float stabilizationStrength = 1.0f;
 
         // (normalized %) - represents maximum allowed deviation from local tangent plane
@@ -260,11 +261,16 @@ namespace nrd
 
     struct SigmaSettings
     {
+        // Direction to the light source
+        // IMPORTANT: it is needed only for directional light sources (sun)
+        float lightDirection[3] = {0.0f, 0.0f, 0.0f};
+
         // (normalized %) - represents maximum allowed deviation from local tangent plane
         float planeDistanceSensitivity = 0.005f;
 
-        // [1; 3] - adds bias and stability if > 1
-        float blurRadiusScale = 2.0f;
+        // (normalized %) - stabilizes output, more stabilization improves antilag (clean signals can use lower values)
+        // 0 - disables the stabilization pass and makes denoising spatial only (no history)
+        float stabilizationStrength = 1.0f;
     };
 
     // RELAX
diff --git a/Integration/NRDIntegration.h b/Integration/NRDIntegration.h
index 24e5767..a782817 100644
--- a/Integration/NRDIntegration.h
+++ b/Integration/NRDIntegration.h
@@ -23,8 +23,8 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
 #include <map>
 
 #define NRD_INTEGRATION_MAJOR 1
-#define NRD_INTEGRATION_MINOR 11
-#define NRD_INTEGRATION_DATE "19 March 2024"
+#define NRD_INTEGRATION_MINOR 12
+#define NRD_INTEGRATION_DATE "17 April 2024"
 #define NRD_INTEGRATION 1
 
 #define NRD_INTEGRATION_DEBUG_LOGGING 0
diff --git a/Integration/NRDIntegration.hpp b/Integration/NRDIntegration.hpp
index 4ac4f2c..2018dc9 100644
--- a/Integration/NRDIntegration.hpp
+++ b/Integration/NRDIntegration.hpp
@@ -312,7 +312,7 @@ void NrdIntegration::CreateResources(uint16_t resourceWidth, uint16_t resourceHe
             m_TransientPoolSize += memoryDesc.size;
 
     #if( NRD_INTEGRATION_DEBUG_LOGGING == 1 )
-        printf("%s %ux%u format=%u mips=%u\n", name, nrdTextureDesc.width, nrdTextureDesc.height, nrdTextureDesc.format, nrdTextureDesc.mipNum);
+        printf("%s format=%u downsampleFactor=%u\n", name, nrdTextureDesc.format, nrdTextureDesc.downsampleFactor);
     #endif
     }
 
@@ -403,7 +403,7 @@ void NrdIntegration::NewFrame()
     NRD_INTEGRATION_ASSERT(m_Instance, "Uninitialized! Did you forget to call 'Initialize'?");
 
 #if( NRD_INTEGRATION_DEBUG_LOGGING == 1 )
-        printf("%s (frame %u) ==============================================================================\n\n", m_Name, frameIndex);
+        printf("%s (frame %u) ==============================================================================\n\n", m_Name, m_FrameIndex);
     #endif
 
     m_DescriptorPoolIndex = m_FrameIndex % m_BufferedFramesNum;
@@ -616,12 +616,7 @@ void NrdIntegration::Dispatch(nri::CommandBuffer& commandBuffer, nri::Descriptor
             if( r.type == nrd::ResourceType::PERMANENT_POOL )
                 printf("P(%u) ", r.indexInPool);
             else if( r.type == nrd::ResourceType::TRANSIENT_POOL )
-            {
-                if (r.mipNum != 1 || r.mipOffset != 0)
-                    printf("T(%u)[%u:%u] ", r.indexInPool, r.mipOffset, r.mipNum);
-                else
-                    printf("T(%u) ", r.indexInPool);
-            }
+                printf("T(%u) ", r.indexInPool);
             else
             {
                 const char* s = nrd::GetResourceTypeString(r.type);
diff --git a/README.md b/README.md
index cf4dace..c7d10a3 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# NVIDIA REAL-TIME DENOISERS v4.6.1 (NRD)
+# NVIDIA REAL-TIME DENOISERS v4.7.0 (NRD)
 
 [![Build NRD SDK](https://github.com/NVIDIAGameWorks/RayTracingDenoiser/actions/workflows/build.yml/badge.svg)](https://github.com/NVIDIAGameWorks/RayTracingDenoiser/actions/workflows/build.yml)
 
@@ -18,7 +18,8 @@ For quick starting see *[NRD sample](https://github.com/NVIDIAGameWorks/NRDSampl
 Performance on RTX 4080 @ 1440p (native resolution, default denoiser settings):
 - `REBLUR_DIFFUSE_SPECULAR` - 2.45 ms
 - `RELAX_DIFFUSE_SPECULAR` - 2.90 ms
-- `SIGMA_SHADOW` - 0.30 ms
+- `SIGMA_SHADOW` - 0.30 ms (0.24 mns if temporal stabilization is off)
+- `SIGMA_SHADOW_TRANSLUCENCY` - 0.40 ms (0.30 ms if temporal stabilization is off)
 
 Supported signal types:
 - *RELAX*:
@@ -589,7 +590,7 @@ Denoising is not a panacea or miracle. Denoising works best with ray tracing res
     float3 preintegratedBRDF = PreintegratedBRDF( Rf0, N, V, roughness )
     Denoising( specularRadiance * BRDF ) → NRD( specularRadiance * BRDF / preintegratedBRDF ) * preintegratedBRDF
 
-A good approximation for pre-integrated specular BRDF can be found *[here](https://github.com/NVIDIAGameWorks/Falcor/blob/056f7b7c73b69fa8140d211bbf683ddf297a2ae0/Source/Falcor/Rendering/Materials/Microfacet.slang#L213)*.
+A good approximation for pre-integrated specular BRDF can be found *[here](https://github.com/NVIDIAGameWorks/MathLib/blob/407ecd0d1892d12ee1ec98c3d46cbeed73b79a0d/STL.hlsli#L2147*. Pre-integrated specular BRDF can also be referenced as "specular albedo" or "environment BRDF".
 
 ## COMBINED DENOISING OF DIRECT AND INDIRECT LIGHTING
 
@@ -710,7 +711,7 @@ Hair strands tangent vectors *can't* be used as "normals guide" for *NRD* due to
 
 **[NRD]** Hit distances should come from an importance sampling method. But if denoising of AO/SO is needed, AO/SO can come from cos-weighted (or VNDF) sampling in a tradeoff of IQ.
 
-**[NRD]** Low discrepancy sampling (blue noise) helps to have more stable output in 0.5-1 rpp mode. It's a must for REBLUR-based Ambient and Specular Occlusion denoisers and SIGMA.
+**[NRD]** Low discrepancy sampling (blue noise) helps to get more stable output in 0.5-1 rpp mode. It's a must for REBLUR-based Ambient and Specular Occlusion denoisers and SIGMA.
 
 **[NRD]** It's recommended to set `CommonSettings::accumulationMode` to `RESET` for a single frame, if a history reset is needed. If history buffers are recreated or contain garbage, it's recommended to use `CLEAR_AND_RESET` for a single frame. `CLEAR_AND_RESET` is not free because clearing is done in a compute shader. Render target clears on the application side should be prioritized over this solution.
 
@@ -742,13 +743,11 @@ maxAccumulatedFrameNum > maxFastAccumulatedFrameNum > historyFixFrameNum
 
 **[REBLUR]** *REBLUR* expects hit distances in a normalized form. To avoid mismatching, `REBLUR_FrontEnd_GetNormHitDist` must be used for normalization. Normalization parameters should be passed into *NRD* as `HitDistanceParameters` for internal hit distance denormalization. Some tweaking can be needed here, but in most cases default `HitDistanceParameters` works well. *REBLUR* outputs denoised normalized hit distance, which can be used by the application as ambient or specular occlusion (AO & SO) (see unpacking functions from `NRD.hlsli`).
 
-**[REBLUR]** Intensity antilag parameters need to be carefully tuned. The defaults are good but `AntilagIntensitySettings::sensitivityToDarkness` needs to be tuned for a given HDR range. Initial integration should work with intensity antilag turned off.
-
-**[REBLUR]** Even if antilag is off, it's recommended to tune `AntilagIntensitySettings::sensitivityToDarkness`, because it is used for error estimation.
+**[REBLUR/RELAX]** Antilag parameters need to be carefully tuned. Initial integration should be done with disabled antilag.
 
 **[RELAX]** *RELAX* works well with signals produced by *RTXDI* or very clean high RPP signals. The Sweet Home of *RELAX* is *RTXDI* sample. Please, consider getting familiar with this application.
 
-**[SIGMA]** Using "blue" noise can help to avoid shadow shimmering, it works best if the pattern is static on the screen. Additionally, `blurRadiusScale` can be set to `2-4` to mitigate such problems in complicated cases.
+**[SIGMA]** Using "blue" noise can help to avoid shadow shimmering. It works best if the pattern is static on the screen.
 
 **[SIGMA]** *SIGMA_TRANSLUCENT_SHADOW* can be used for shadow denoising from multiple light sources:
 
@@ -798,3 +797,7 @@ Is this a biased solution? If spatial filtering is off - no, because we just reo
 - if shadows overlap, a separate pass is needed to analyze noisy input and classify pixels as *umbra* - *penumbra* (and optionally *empty space*). Raster shadow maps can be used for this if available
 - it is not recommended to mix 1 cd and 100000 cd lights, since FP32 texture will be needed for a weighted sum.
 In this case, it's better to process the sun and other bright light sources separately.
+
+**[SIGMA]** *SIGMA* can be used for multi-light shadow denoising if applied "per light". `SigmaSettings::stabilizationStrength` can be set to `0` to disable temporal history. It provides the followinmg benefits:
+ - light count independent memory usage
+ - no need to manage history buffers for lights
diff --git a/Resources/Version.h b/Resources/Version.h
index 9987937..02ca3ba 100644
--- a/Resources/Version.h
+++ b/Resources/Version.h
@@ -22,7 +22,7 @@ Versioning rules:
 */
 
 #define VERSION_MAJOR                   4
-#define VERSION_MINOR                   6
-#define VERSION_BUILD                   1
+#define VERSION_MINOR                   7
+#define VERSION_BUILD                   0
 
 #define VERSION_STRING STR(VERSION_MAJOR.VERSION_MINOR.VERSION_BUILD encoding=NRD_NORMAL_ENCODING.NRD_ROUGHNESS_ENCODING)
diff --git a/Shaders/Include/Common.hlsli b/Shaders/Include/Common.hlsli
index 504e1d6..176c9ea 100644
--- a/Shaders/Include/Common.hlsli
+++ b/Shaders/Include/Common.hlsli
@@ -129,6 +129,34 @@ Usage:
             printf(__VA_ARGS__)
 #endif
 
+//==================================================================================================================
+// KERNELS
+//==================================================================================================================
+
+static const float3 g_Special6[ 6 ] =
+{
+    // https://www.desmos.com/calculator/e5mttzlg6v
+    float3( -0.50 * sqrt( 3.0 ) , -0.50             , 1.0 ),
+    float3(  0.00               ,  1.00             , 1.0 ),
+    float3(  0.50 * sqrt( 3.0 ) , -0.50             , 1.0 ),
+    float3(  0.00               , -0.30             , 0.3 ),
+    float3(  0.15 * sqrt( 3.0 ) ,  0.15             , 0.3 ),
+    float3( -0.15 * sqrt( 3.0 ) ,  0.15             , 0.3 ),
+};
+
+static const float3 g_Special8[ 8 ] =
+{
+    // https://www.desmos.com/calculator/abaqyvswem
+    float3( -1.00               ,  0.00               , 1.0 ),
+    float3(  0.00               ,  1.00               , 1.0 ),
+    float3(  1.00               ,  0.00               , 1.0 ),
+    float3(  0.00               , -1.00               , 1.0 ),
+    float3( -0.25 * sqrt( 2.0 ) ,  0.25 * sqrt( 2.0 ) , 0.5 ),
+    float3(  0.25 * sqrt( 2.0 ) ,  0.25 * sqrt( 2.0 ) , 0.5 ),
+    float3(  0.25 * sqrt( 2.0 ) , -0.25 * sqrt( 2.0 ) , 0.5 ),
+    float3( -0.25 * sqrt( 2.0 ) , -0.25 * sqrt( 2.0 ) , 0.5 )
+};
+
 //==================================================================================================================
 // SHARED FUNCTIONS
 //==================================================================================================================
diff --git a/Shaders/Include/NRD.hlsli b/Shaders/Include/NRD.hlsli
index 787051a..9c37039 100644
--- a/Shaders/Include/NRD.hlsli
+++ b/Shaders/Include/NRD.hlsli
@@ -8,7 +8,7 @@ distribution of this software and related documentation without an express
 license agreement from NVIDIA CORPORATION is strictly prohibited.
 */
 
-// NRD v4.6
+// NRD v4.7
 
 //=================================================================================================================================
 // INPUT PARAMETERS
@@ -56,7 +56,7 @@ float distanceToOccluder:
     - distance to occluder, must follow the rules:
         - NoL <= 0         - 0 ( it's very important )
         - NoL > 0 ( hit )  - hit distance
-        - NoL > 0 ( miss ) - NRD_FP16_MAX
+        - NoL > 0 ( miss ) - >= NRD_FP16_MAX
 
 float tanOfLightAngularRadius:
     - tan( lightAngularSize * 0.5 )
@@ -747,12 +747,12 @@ float2 SIGMA_FrontEnd_PackShadow( float viewZ, float distanceToOccluder, float t
     r.x = 0.0;
     r.y = _NRD_PackViewZ( viewZ );
 
-    if( distanceToOccluder == NRD_FP16_MAX )
+    if( distanceToOccluder >= NRD_FP16_MAX )
         r.x = NRD_FP16_MAX;
     else if( distanceToOccluder != 0.0 )
     {
-        float distanceToOccluderProj = distanceToOccluder * tanOfLightAngularRadius;
-        r.x = min( distanceToOccluderProj, 32768.0 );
+        float penumbraRadius = distanceToOccluder * tanOfLightAngularRadius;
+        r.x = min( penumbraRadius, 32768.0 );
     }
 
     return r;
@@ -762,7 +762,7 @@ float2 SIGMA_FrontEnd_PackShadow( float viewZ, float distanceToOccluder, float t
 float2 SIGMA_FrontEnd_PackShadow( float viewZ, float distanceToOccluder, float tanOfLightAngularRadius, float3 translucency, out float4 out2 )
 {
     // IN_SHADOW_TRANSLUCENCY
-    out2.x = float( distanceToOccluder == NRD_FP16_MAX );
+    out2.x = float( distanceToOccluder >= NRD_FP16_MAX );
     out2.yzw = saturate( translucency );
 
     // IN_SHADOWDATA
@@ -879,16 +879,11 @@ NRD_SG RELAX_BackEnd_UnpackSh( float4 sh0, float4 sh1 )
 //=================================================================================================================================
 
 // OUT_SHADOW_TRANSLUCENCY => X
-//   SIGMA_SHADOW:
-//      float shadowData = SIGMA_BackEnd_UnpackShadow( shadowData );
-//      shadow = shadowData;
+//   SIGMA_SHADOW / SIGMA_SHADOW_TRANSLUCENCY:
+//      float shadow = SIGMA_BackEnd_UnpackShadow( OUT_SHADOW_TRANSLUCENCY );
 //   SIGMA_SHADOW_TRANSLUCENCY:
-//      float4 shadowData = SIGMA_BackEnd_UnpackShadow( shadowData );
-//      float3 finalShadowCommon = lerp( shadowData.yzw, 1.0, shadowData.x ); // or
-//      float3 finalShadowExotic = shadowData.yzw * shadowData.x; // or
-//      float3 finalShadowMoreExotic = shadowData.yzw;
-// IMPORTANT: use "^ 3" to compensate over-blurring ( it really makes the result closer to the reference )
-#define SIGMA_BackEnd_UnpackShadow( color )  ( color * color * color )
+//      float3 translucentShadow = SIGMA_BackEnd_UnpackShadow( OUT_SHADOW_TRANSLUCENCY ).yzw;
+#define SIGMA_BackEnd_UnpackShadow( shadow ) ( shadow * shadow )
 
 //=================================================================================================================================
 // BACK-END - HIGH QUALITY RESOLVE
diff --git a/Shaders/Include/REBLUR_Common.hlsli b/Shaders/Include/REBLUR_Common.hlsli
index a3b3321..cc0ecb0 100644
--- a/Shaders/Include/REBLUR_Common.hlsli
+++ b/Shaders/Include/REBLUR_Common.hlsli
@@ -14,31 +14,6 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
 #define REBLUR_BLUR                                     1
 #define REBLUR_POST_BLUR                                2
 
-// Kernels
-static const float3 g_Special6[ 6 ] =
-{
-    // https://www.desmos.com/calculator/e5mttzlg6v
-    float3( -0.50 * sqrt( 3.0 ) , -0.50             , 1.0 ),
-    float3(  0.00               ,  1.00             , 1.0 ),
-    float3(  0.50 * sqrt( 3.0 ) , -0.50             , 1.0 ),
-    float3(  0.00               , -0.30             , 0.3 ),
-    float3(  0.15 * sqrt( 3.0 ) ,  0.15             , 0.3 ),
-    float3( -0.15 * sqrt( 3.0 ) ,  0.15             , 0.3 ),
-};
-
-static const float3 g_Special8[ 8 ] =
-{
-    // https://www.desmos.com/calculator/abaqyvswem
-    float3( -1.00               ,  0.00               , 1.0 ),
-    float3(  0.00               ,  1.00               , 1.0 ),
-    float3(  1.00               ,  0.00               , 1.0 ),
-    float3(  0.00               , -1.00               , 1.0 ),
-    float3( -0.25 * sqrt( 2.0 ) ,  0.25 * sqrt( 2.0 ) , 0.5 ),
-    float3(  0.25 * sqrt( 2.0 ) ,  0.25 * sqrt( 2.0 ) , 0.5 ),
-    float3(  0.25 * sqrt( 2.0 ) , -0.25 * sqrt( 2.0 ) , 0.5 ),
-    float3( -0.25 * sqrt( 2.0 ) , -0.25 * sqrt( 2.0 ) , 0.5 )
-};
-
 // Storage
 
 #define REBLUR_MAX_ACCUM_FRAME_NUM                      63.0
@@ -360,7 +335,9 @@ float2x3 GetKernelBasis( float3 D, float3 N, float NoD, float roughness = 1.0, f
         B = cross( R, T );
 
         float skewFactor = lerp( 0.5 + 0.5 * roughness, 1.0, NoD );
-        T *= lerp( skewFactor, 1.0, anisoFade );
+        skewFactor = lerp( skewFactor, 1.0, anisoFade );
+
+        T *= skewFactor; // TODO: B /= skewFactor?
     }
 
     return float2x3( T, B );
diff --git a/Shaders/Include/REBLUR_Config.hlsli b/Shaders/Include/REBLUR_Config.hlsli
index bb48def..85c858a 100644
--- a/Shaders/Include/REBLUR_Config.hlsli
+++ b/Shaders/Include/REBLUR_Config.hlsli
@@ -68,7 +68,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
 #define REBLUR_HIT_DIST_MIN_WEIGHT( smc )                       ( 0.1 * smc ) // was 0.1
 
 #define REBLUR_MAX_PERCENT_OF_LOBE_VOLUME                       0.75
-#define REBLUR_VIRTUAL_MOTION_PREV_PREV_WEIGHT_ITERATION_NUM    2
+#define REBLUR_VIRTUAL_MOTION_PREV_PREV_WEIGHT_ITERATION_NUM    1
 #define REBLUR_COLOR_CLAMPING_SIGMA_SCALE                       2.0 // using smaller values leads to bias if camera rotates slowly due to reprojection instabilities
 #define REBLUR_FIREFLY_SUPPRESSOR_MAX_RELATIVE_INTENSITY        float2( 10.0, 1.1 )
 #define REBLUR_FIREFLY_SUPPRESSOR_RADIUS_SCALE                  0.1
diff --git a/Shaders/Include/SIGMA_Blur.hlsli b/Shaders/Include/SIGMA_Blur.hlsli
index 7bfb39d..98ecfb4 100644
--- a/Shaders/Include/SIGMA_Blur.hlsli
+++ b/Shaders/Include/SIGMA_Blur.hlsli
@@ -24,11 +24,11 @@ void Preload( uint2 sharedPos, int2 globalPos )
     #if( !defined SIGMA_FIRST_PASS || defined SIGMA_TRANSLUCENT )
         s = gIn_Shadow_Translucency[ globalPos ];
     #else
-        s = float( data.x == NRD_FP16_MAX );
+        s = IsLit( data.x );
     #endif
 
     #ifndef SIGMA_FIRST_PASS
-        s = UnpackShadowSpecial( s );
+        s = SIGMA_BackEnd_UnpackShadow( s );
     #endif
 
     s_Shadow_Translucency[ sharedPos.y ][ sharedPos.x ] = s;
@@ -58,7 +58,8 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
 
     // Copy history
     #ifdef SIGMA_FIRST_PASS
-        gOut_History[ pixelPos ] = gIn_History[ pixelPos ];
+        if( gStabilizationStrength != 0 )
+            gOut_History[ pixelPos ] = gIn_History[ pixelPos ];
     #endif
 
     // Tile-based early out ( potentially )
@@ -70,19 +71,11 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
 
     if( ( tileValue == 0.0 && NRD_USE_TILE_CHECK ) || centerHitDist == 0.0 )
     {
-        gOut_Shadow_Translucency[ pixelPos ] = PackShadow( s_Shadow_Translucency[ smemPos.y ][ smemPos.x ] );
         gOut_Hit_ViewZ[ pixelPos ] = float2( 0.0, viewZ * NRD_FP16_VIEWZ_SCALE );
-
-        return;
-    }
-
-    // Reference
-    #if( SIGMA_REFERENCE == 1 )
         gOut_Shadow_Translucency[ pixelPos ] = PackShadow( s_Shadow_Translucency[ smemPos.y ][ smemPos.x ] );
-        gOut_Hit_ViewZ[ pixelPos ] = float2( centerHitDist * centerSignNoL, viewZ * NRD_FP16_VIEWZ_SCALE );
 
         return;
-    #endif
+    }
 
     // Position
     float3 Xv = STL::Geometry::ReconstructViewPosition( pixelUv, gFrustum, viewZ, gOrthoMode );
@@ -92,8 +85,12 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
     float3 N = normalAndRoughness.xyz;
     float3 Nv = STL::Geometry::RotateVector( gWorldToView, N );
 
+    // Parameters
+    float frustumSize = PixelRadiusToWorld( gUnproject, gOrthoMode, min( gRectSize.x, gRectSize.y ), viewZ ); // TODO: use GetFrustumSize
+    float2 geometryWeightParams = GetGeometryWeightParams( gPlaneDistSensitivity, frustumSize, Xv, Nv, 1.0 );
+
     // Estimate average distance to occluder
-    float sum = 0;
+    float2 sum = 0;
     float hitDist = 0;
     SIGMA_TYPE result = 0;
 
@@ -104,9 +101,8 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
         for( i = 0; i <= BORDER * 2; i++ )
         {
             int2 pos = threadPos + int2( i, j );
-            float2 data = s_Data[ pos.y ][ pos.x ];
 
-            SIGMA_TYPE s = s_Shadow_Translucency[ pos.y ][ pos.x ];
+            float2 data = s_Data[ pos.y ][ pos.x ];
             float h = data.x;
             float signNoL = float( data.x != 0.0 );
             float z = data.y;
@@ -114,50 +110,67 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
             float w = 1.0;
             if( !( i == BORDER && j == BORDER ) )
             {
-                w = GetBilateralWeight( z, viewZ );
-                w *= saturate( 1.0 - abs( centerSignNoL - signNoL ) );
+                float2 uv = pixelUv + float2( i - BORDER, j - BORDER ) * gRectSizeInv;
+                float3 Xvs = STL::Geometry::ReconstructViewPosition( uv, gFrustum, z, gOrthoMode );
+                float NoX = dot( Nv, Xvs );
+
+                w = ComputeWeight( NoX, geometryWeightParams.x, geometryWeightParams.y );
+                w *= GetGaussianWeight( length( float2( i - BORDER, j - BORDER ) / BORDER ) );
+                w *= float( z < gDenoisingRange );
+                w *= float( centerSignNoL == signNoL );
             }
 
-            result += s * w;
-            hitDist += h * float( s.x != 1.0 ) * w;
-            sum += w;
+            SIGMA_TYPE s = s_Shadow_Translucency[ pos.y ][ pos.x ];
+            s = Denanify( w, s );
+
+            float2 ww = w;
+            ww.y *= float( s.x != 1.0 ); // TODO: what if s.x == 1.0, but h < NRD_FP16_MAX?
+            ww.y *= 1.0 / ( 1.0 + h * SIGMA_PENUMBRA_WEIGHT_SCALE ); // prefer smaller penumbra
+
+            result += s * ww.x;
+            hitDist += h * ww.y;
+            sum += ww;
         }
     }
 
-    float invSum = 1.0 / sum;
-    result *= invSum;
-    hitDist *= invSum;
+    result /= sum.x;
+    hitDist /= max( sum.y, NRD_EPS ); // yes, without patching
+
+    float invHitDist = 1.0 / max( hitDist, NRD_EPS );
 
     // Blur radius
     float unprojectZ = PixelRadiusToWorld( gUnproject, gOrthoMode, 1.0, viewZ );
+    float worldRadius = GetKernelRadiusInPixels( hitDist, unprojectZ ) * unprojectZ;
+    worldRadius *= tileValue; // helps to prevent blurring "inside" umbra
+    worldRadius /= SIGMA_SPATIAL_PASSES_NUM;
 
-    float innerShadowRadiusScale = lerp( 0.5, 1.0, result.x );
-    float outerShadowRadiusScale = 1.0; // TODO: find a way to improve penumbra
-    float pixelRadius = innerShadowRadiusScale * outerShadowRadiusScale;
-    pixelRadius *= tileValue;
-    pixelRadius *= hitDist / unprojectZ;
-    pixelRadius *= gBlurRadiusScale;
+    // Tangent basis with anisotropy
+    float3x3 mWorldToLocal = STL::Geometry::GetBasis( Nv );
+    float3 Tv = mWorldToLocal[ 0 ];
+    float3 Bv = mWorldToLocal[ 1 ];
 
-    float centerWeight = STL::Math::LinearStep( 0.9, 1.0, result.x );
-    float penumbraFixWeight = lerp( saturate( pixelRadius / 1.5 ), 1.0, centerWeight ) * result.x;
-    pixelRadius += SIGMA_PENUMBRA_FIX_BLUR_RADIUS_ADDON * penumbraFixWeight; // TODO: improve
+    float3 t = cross( gLightDirectionView.xyz, Nv ); // TODO: add support for other light types to bring proper anisotropic filtering
+    if( length( t ) > 0.001 )
+    {
+        Tv = normalize( t );
+        Bv = cross( Tv, Nv );
 
-    pixelRadius = min( pixelRadius, SIGMA_MAX_PIXEL_RADIUS );
+        float cosa = abs( dot( Nv, gLightDirectionView.xyz ) );
+        float skewFactor = lerp( 0.25, 1.0, cosa );
 
-    // Tangent basis
-    float worldRadius = pixelRadius * unprojectZ;
-    float3x3 mWorldToLocal = STL::Geometry::GetBasis( Nv );
-    float3 Tv = mWorldToLocal[ 0 ] * worldRadius;
-    float3 Bv = mWorldToLocal[ 1 ] * worldRadius;
+        //Tv *= skewFactor; // TODO: needed?
+        Bv /= skewFactor;
+    }
+
+    Tv *= worldRadius;
+    Bv *= worldRadius;
 
     // Random rotation
     float4 rotator = GetBlurKernelRotation( SIGMA_ROTATOR_MODE, pixelPos, gRotator, gFrameIndex );
 
     // Denoising
-    sum = 1.0;
-
-    float frustumSize = PixelRadiusToWorld( gUnproject, gOrthoMode, min( gRectSize.x, gRectSize.y ), viewZ );
-    float2 geometryWeightParams = GetGeometryWeightParams( gPlaneDistSensitivity, frustumSize, Xv, Nv, 1.0 );
+    sum.x = 1.0;
+    sum.y = float( sum.y != 0.0 );
 
     [unroll]
     for( uint n = 0; n < SIGMA_POISSON_SAMPLE_NUM; n++ )
@@ -184,40 +197,45 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
 
         float w = IsInScreenNearest( uv );
         w *= GetGaussianWeight( offset.z );
-        w *= float( z < gDenoisingRange );
         w *= ComputeWeight( NoX, geometryWeightParams.x, geometryWeightParams.y );
-        w *= saturate( 1.0 - abs( centerSignNoL - signNoL ) );
+        w *= float( z < gDenoisingRange );
+        w *= float( centerSignNoL == signNoL );
+
+        // Avoid umbra leaking inside wide penumbra
+        float t = saturate( h * invHitDist );
+        w *= STL::Math::LinearStep( 0.0, 0.1, t );
 
+        // Fetch shadow
         SIGMA_TYPE s;
         #if( !defined SIGMA_FIRST_PASS || defined SIGMA_TRANSLUCENT )
             s = gIn_Shadow_Translucency.SampleLevel( gNearestClamp, uvScaled, 0 );
         #else
-            s = float( h == NRD_FP16_MAX );
+            s = IsLit( h );
         #endif
         s = Denanify( w, s );
 
         #ifndef SIGMA_FIRST_PASS
-            s = UnpackShadowSpecial( s );
+            s = SIGMA_BackEnd_UnpackShadow( s );
         #endif
 
-        // Weight for outer shadow ( to avoid blurring of ~umbra )
-        w *= lerp( 1.0, s.x, centerWeight );
-
         // Accumulate
-        sum += w;
+        float2 ww = w;
+        ww.y *= float( s.x != 1.0 ); // TODO: what if s.x == 1.0, but h < NRD_FP16_MAX?
+        ww.y *= 1.0 / ( 1.0 + h * SIGMA_PENUMBRA_WEIGHT_SCALE ); // prefer smaller penumbra
 
-        result += s * w;
-        hitDist += h * float( s.x != 1.0 ) * w;
+        result += s * ww.x;
+        hitDist += h * ww.y;
+        sum += ww;
     }
 
-    invSum = 1.0 / sum;
-    result *= invSum;
-    hitDist *= invSum;
-
-    hitDist *= tileValue;
-    hitDist *= centerSignNoL;
+    result /= sum.x;
+    hitDist = sum.y == 0.0 ? centerHitDist : hitDist / sum.y;
 
     // Output
+    #ifndef SIGMA_FIRST_PASS
+        if( gStabilizationStrength != 0 )
+    #endif
+            gOut_Hit_ViewZ[ pixelPos ] = float2( hitDist, viewZ * NRD_FP16_VIEWZ_SCALE );
+
     gOut_Shadow_Translucency[ pixelPos ] = PackShadow( result );
-    gOut_Hit_ViewZ[ pixelPos ] = float2( hitDist, viewZ * NRD_FP16_VIEWZ_SCALE );
 }
diff --git a/Shaders/Include/SIGMA_ClassifyTiles.hlsli b/Shaders/Include/SIGMA_ClassifyTiles.hlsli
index 062656e..8fcd422 100644
--- a/Shaders/Include/SIGMA_ClassifyTiles.hlsli
+++ b/Shaders/Include/SIGMA_ClassifyTiles.hlsli
@@ -40,7 +40,7 @@ NRD_EXPORT void NRD_CS_MAIN( uint2 threadPos : SV_GroupThreadId, uint2 tilePos :
 
             bool isInf = viewZ > gDenoisingRange;
             bool isShadow = data.x == 0;
-            bool isLit = data.x == NRD_FP16_MAX;
+            bool isLit = IsLit( data.x );
 
             bool isOpaque = true;
             #ifdef SIGMA_TRANSLUCENT
@@ -52,10 +52,9 @@ NRD_EXPORT void NRD_CS_MAIN( uint2 threadPos : SV_GroupThreadId, uint2 tilePos :
             mask += ( ( ( !isLit && isOpaque ) || isInf || isShadow ) ? 1 : 0 ) << 9;
             mask += ( isInf ? 1 : 0 ) << 18;
 
-            float worldRadius = ( isLit || isInf ) ? 0 : ( data.x * gBlurRadiusScale );
+            float hitDist = ( isLit || isInf ) ? 0 : data.x;
             float unprojectZ = PixelRadiusToWorld( gUnproject, gOrthoMode, 1.0, viewZ );
-            float pixelRadius = worldRadius * STL::Math::PositiveRcp( unprojectZ );
-            pixelRadius = min( pixelRadius, SIGMA_MAX_PIXEL_RADIUS );
+            float pixelRadius = GetKernelRadiusInPixels( hitDist, unprojectZ );
 
             maxRadius = max( pixelRadius, maxRadius );
         }
diff --git a/Shaders/Include/SIGMA_Common.hlsli b/Shaders/Include/SIGMA_Common.hlsli
index 321547e..b779b06 100644
--- a/Shaders/Include/SIGMA_Common.hlsli
+++ b/Shaders/Include/SIGMA_Common.hlsli
@@ -10,17 +10,16 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
 
 // Misc
 
-#define PackShadow( s )                                 STL::Math::Sqrt01( s )
-#define UnpackShadow( s )                               ( s * s )
-
-// TODO: shadow unpacking is less trivial
-// 2.0 - closer to reference ( dictated by encoding )
-// 2.0 - s.x - looks better
-#if 0
-    #define UnpackShadowSpecial( s )                    STL::Math::Pow01( s, 2.0 - s.x * ( 1 - SIGMA_REFERENCE ) )
-#else
-    #define UnpackShadowSpecial( s )                    UnpackShadow( s )
-#endif
+#define PackShadow( s )         STL::Math::Sqrt01( s ) // must match "SIGMA_BackEnd_UnpackShadow"
+#define IsLit( h )              ( h >= NRD_FP16_MAX )
+
+float GetKernelRadiusInPixels( float hitDist, float unprojectZ )
+{
+    float pixelRadius = hitDist / unprojectZ;
+    pixelRadius = min( pixelRadius, SIGMA_MAX_PIXEL_RADIUS );
+
+    return pixelRadius;
+}
 
 // TODO: move code below to STL.hlsl
 
diff --git a/Shaders/Include/SIGMA_Config.hlsli b/Shaders/Include/SIGMA_Config.hlsli
index 5be9710..f62ef56 100644
--- a/Shaders/Include/SIGMA_Config.hlsli
+++ b/Shaders/Include/SIGMA_Config.hlsli
@@ -16,17 +16,17 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
 #define SIGMA_5X5_BLUR_RADIUS_ESTIMATION_KERNEL         1 // helps to improve stability, but adds 10% of overhead
 
 // Switches ( default 0 )
-#define SIGMA_REFERENCE                                 0 // works better with 16-bit precision
 #define SIGMA_SHOW_TILES                                0
+#define SIGMA_SHOW_PENUMBRA_SIZE                        0
 
 // Settings
-#define SIGMA_ROTATOR_MODE                              NRD_PIXEL // NRD_FRAME?
+#define SIGMA_ROTATOR_MODE                              NRD_FRAME
 #define SIGMA_POISSON_SAMPLE_NUM                        8
-#define SIGMA_POISSON_SAMPLES                           g_Poisson8
+#define SIGMA_POISSON_SAMPLES                           g_Special8
 #define SIGMA_MAX_PIXEL_RADIUS                          32.0
-#define SIGMA_MIN_HIT_DISTANCE_OUTPUT                   0.0001
-#define SIGMA_PENUMBRA_FIX_BLUR_RADIUS_ADDON            5.0
+#define SIGMA_PENUMBRA_WEIGHT_SCALE                     10.0
 #define SIGMA_MAX_SIGMA_SCALE                           3.0
+#define SIGMA_SPATIAL_PASSES_NUM                        2
 #define SIGMA_TS_MOTION_MAX_REUSE                       0.11
 
 // Data type
@@ -41,6 +41,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
     NRD_CONSTANT( float4x4, gWorldToView ) \
     NRD_CONSTANT( float4x4, gViewToClip ) \
     NRD_CONSTANT( float4x4, gWorldToClipPrev ) \
+    NRD_CONSTANT( float4, gLightDirectionView ) \
     NRD_CONSTANT( float4, gFrustum ) \
     NRD_CONSTANT( float4, gMvScale ) \
     NRD_CONSTANT( float2, gResourceSizeInv ) \
@@ -58,8 +59,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
     NRD_CONSTANT( float, gUnproject ) \
     NRD_CONSTANT( float, gDenoisingRange ) \
     NRD_CONSTANT( float, gPlaneDistSensitivity ) \
-    NRD_CONSTANT( float, gBlurRadiusScale ) \
-    NRD_CONSTANT( float, gContinueAccumulation ) \
+    NRD_CONSTANT( float, gStabilizationStrength ) \
     NRD_CONSTANT( float, gDebug ) \
     NRD_CONSTANT( float, gSplitScreen ) \
     NRD_CONSTANT( uint, gFrameIndex )
diff --git a/Shaders/Include/SIGMA_SplitScreen.hlsli b/Shaders/Include/SIGMA_SplitScreen.hlsli
index ad75497..7f41e45 100644
--- a/Shaders/Include/SIGMA_SplitScreen.hlsli
+++ b/Shaders/Include/SIGMA_SplitScreen.hlsli
@@ -22,7 +22,11 @@ NRD_EXPORT void NRD_CS_MAIN( int2 pixelPos : SV_DispatchThreadId)
     #ifdef SIGMA_TRANSLUCENT
         s = gIn_Shadow_Translucency[ pixelPos ];
     #else
-        s = float( data.x == NRD_FP16_MAX );
+        s = IsLit( data.x );
+    #endif
+
+    #if( SIGMA_SHOW_PENUMBRA_SIZE == 1 )
+        s.x = PackShadow( data.x );
     #endif
 
     gOut_Shadow_Translucency[ pixelPos ] = s * float( viewZ < gDenoisingRange );
diff --git a/Shaders/Include/SIGMA_TemporalStabilization.hlsli b/Shaders/Include/SIGMA_TemporalStabilization.hlsli
index 582ed11..3fbb865 100644
--- a/Shaders/Include/SIGMA_TemporalStabilization.hlsli
+++ b/Shaders/Include/SIGMA_TemporalStabilization.hlsli
@@ -21,7 +21,7 @@ void Preload( uint2 sharedPos, int2 globalPos )
     s_Data[ sharedPos.y ][ sharedPos.x ] = data;
 
     SIGMA_TYPE s = gIn_Shadow_Translucency[ globalPos ];
-    s = UnpackShadow( s );
+    s = SIGMA_BackEnd_UnpackShadow( s );
 
     s_Shadow_Translucency[ sharedPos.y ][ sharedPos.x ] = s;
 }
@@ -120,7 +120,7 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
     BicubicFilterNoCorners( saturate( pixelUvPrev ) * gRectSizePrev, gResourceSizeInvPrev, SIGMA_USE_CATROM, gIn_History, history );
 
     history = max( history, 0.0 );
-    history = UnpackShadow( history );
+    history = SIGMA_BackEnd_UnpackShadow( history );
 
     // Clamp history
     float2 a = m1.xx;
@@ -148,16 +148,15 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
     // History weight
     float isInScreen = IsInScreenNearest( pixelUvPrev );
     float motionLength = length( pixelUvPrev - pixelUv );
-    float2 historyWeight = 0.93 * lerp( 1.0, 0.7, ratioNorm ); // use FPS-dependent value, like 0.25 * FPS
+    float2 historyWeight = 0.93 * lerp( 1.0, 0.7, ratioNorm );
     historyWeight = lerp( historyWeight, 0.1, saturate( motionLength / SIGMA_TS_MOTION_MAX_REUSE ) );
     historyWeight *= isInScreen;
-    historyWeight *= gContinueAccumulation;
+    historyWeight *= gStabilizationStrength;
 
     // Reduce history in regions with hard shadows
-    float worldRadius = centerHitDist * gBlurRadiusScale;
     float unprojectZ = PixelRadiusToWorld( gUnproject, gOrthoMode, 1.0, viewZ );
-    float pixelRadius = worldRadius * STL::Math::PositiveRcp( unprojectZ );
-    historyWeight *= STL::Math::LinearStep( 0.0, 3.0, pixelRadius );
+    float pixelRadius = GetKernelRadiusInPixels( centerHitDist, unprojectZ );
+    historyWeight *= STL::Math::LinearStep( 0.0, 0.5, pixelRadius );
 
     // Combine with current frame
     SIGMA_TYPE result;
diff --git a/Source/Denoisers/Sigma_Shadow.hpp b/Source/Denoisers/Sigma_Shadow.hpp
index 14f5b7a..08e5ca0 100644
--- a/Source/Denoisers/Sigma_Shadow.hpp
+++ b/Source/Denoisers/Sigma_Shadow.hpp
@@ -66,17 +66,22 @@ void nrd::InstanceImpl::Add_SigmaShadow(DenoiserData& denoiserData)
         AddDispatch( SIGMA_Shadow_Blur, SIGMA_Blur, USE_MAX_DIMS );
     }
 
-    PushPass("Post-blur");
+    for (int i = 0; i < SIGMA_POST_BLUR_PERMUTATION_NUM; i++)
     {
-        PushInput( AsUint(ResourceType::IN_NORMAL_ROUGHNESS) );
-        PushInput( AsUint(Transient::DATA_1) );
-        PushInput( AsUint(Transient::SMOOTHED_TILES) );
-        PushInput( AsUint(Transient::TEMP_1) );
+        bool isStabilizationEnabled = ( ( ( i >> 0 ) & 0x1 ) != 0 );
+
+        PushPass("Post-blur");
+        {
+            PushInput( AsUint(ResourceType::IN_NORMAL_ROUGHNESS) );
+            PushInput( AsUint(Transient::DATA_1) );
+            PushInput( AsUint(Transient::SMOOTHED_TILES) );
+            PushInput( AsUint(Transient::TEMP_1) );
 
-        PushOutput( AsUint(Transient::DATA_2) );
-        PushOutput( AsUint(Transient::TEMP_2) );
+            PushOutput( AsUint(Transient::DATA_2) );
+            PushOutput( isStabilizationEnabled ? AsUint(Transient::TEMP_2) : AsUint(ResourceType::OUT_SHADOW_TRANSLUCENCY) );
 
-        AddDispatch( SIGMA_Shadow_PostBlur, SIGMA_Blur, 1 );
+            AddDispatch( SIGMA_Shadow_PostBlur, SIGMA_Blur, 1 );
+        }
     }
 
     PushPass("Temporal stabilization");
diff --git a/Source/Denoisers/Sigma_ShadowTranslucency.hpp b/Source/Denoisers/Sigma_ShadowTranslucency.hpp
index 5cbe894..4cd7c9f 100644
--- a/Source/Denoisers/Sigma_ShadowTranslucency.hpp
+++ b/Source/Denoisers/Sigma_ShadowTranslucency.hpp
@@ -68,17 +68,22 @@ void nrd::InstanceImpl::Add_SigmaShadowTranslucency(nrd::DenoiserData& denoiserD
         AddDispatch( SIGMA_ShadowTranslucency_Blur, SIGMA_Blur, USE_MAX_DIMS );
     }
 
-    PushPass("Post-blur");
+    for (int i = 0; i < SIGMA_POST_BLUR_PERMUTATION_NUM; i++)
     {
-        PushInput( AsUint(ResourceType::IN_NORMAL_ROUGHNESS) );
-        PushInput( AsUint(Transient::DATA_1) );
-        PushInput( AsUint(Transient::SMOOTHED_TILES) );
-        PushInput( AsUint(Transient::TEMP_1) );
+        bool isStabilizationEnabled = ( ( ( i >> 0 ) & 0x1 ) != 0 );
+
+        PushPass("Post-blur");
+        {
+            PushInput( AsUint(ResourceType::IN_NORMAL_ROUGHNESS) );
+            PushInput( AsUint(Transient::DATA_1) );
+            PushInput( AsUint(Transient::SMOOTHED_TILES) );
+            PushInput( AsUint(Transient::TEMP_1) );
 
-        PushOutput( AsUint(Transient::DATA_2) );
-        PushOutput( AsUint(Transient::TEMP_2) );
+            PushOutput( AsUint(Transient::DATA_2) );
+            PushOutput( isStabilizationEnabled ? AsUint(Transient::TEMP_2) : AsUint(ResourceType::OUT_SHADOW_TRANSLUCENCY) );
 
-        AddDispatch( SIGMA_ShadowTranslucency_PostBlur, SIGMA_Blur, 1 );
+            AddDispatch( SIGMA_ShadowTranslucency_PostBlur, SIGMA_Blur, 1 );
+        }
     }
 
     PushPass("Temporal stabilization");
diff --git a/Source/InstanceImpl.cpp b/Source/InstanceImpl.cpp
index 58d6943..eeedd45 100644
--- a/Source/InstanceImpl.cpp
+++ b/Source/InstanceImpl.cpp
@@ -286,21 +286,19 @@ nrd::Result nrd::InstanceImpl::SetCommonSettings(const CommonSettings& commonSet
     memcpy(&m_CommonSettings, &commonSettings, sizeof(commonSettings));
 
     // Rotators
-    float4 rndScale = float4(1.0f) + Rand::sf4(&m_FastRandState) * 0.25f;
     float4 rndAngle = Rand::uf4(&m_FastRandState) * DegToRad(360.0f);
-    rndAngle.w = DegToRad( 120.0f * float(m_CommonSettings.frameIndex % 3) );
 
     float ca = Cos( rndAngle.x );
     float sa = Sin( rndAngle.x );
-    m_Rotator_PrePass = float4( ca, sa, -sa, ca ) * rndScale.x;
+    m_Rotator_PrePass = float4( ca, sa, -sa, ca );
 
     ca = Cos( rndAngle.y );
     sa = Sin( rndAngle.y );
-    m_Rotator_Blur = float4( ca, sa, -sa, ca ) * rndScale.y;
+    m_Rotator_Blur = float4( ca, sa, -sa, ca );
 
     ca = Cos( rndAngle.z );
     sa = Sin( rndAngle.z );
-    m_Rotator_PostBlur = float4( ca, sa, -sa, ca ) * rndScale.z;
+    m_Rotator_PostBlur = float4( ca, sa, -sa, ca );
 
     // Main matrices
     m_ViewToClip = float4x4
diff --git a/Source/Sigma.cpp b/Source/Sigma.cpp
index a677fd0..d7b2e40 100644
--- a/Source/Sigma.cpp
+++ b/Source/Sigma.cpp
@@ -17,16 +17,20 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
 #include "../Shaders/Resources/SIGMA_TemporalStabilization.resources.hlsli"
 #include "../Shaders/Resources/SIGMA_SplitScreen.resources.hlsli"
 
+// Permutations
+#define SIGMA_POST_BLUR_PERMUTATION_NUM     2
+#define SIGMA_NO_PERMUTATIONS               1
+
 void nrd::InstanceImpl::Update_SigmaShadow(const DenoiserData& denoiserData)
 {
     enum class Dispatch
     {
         CLASSIFY_TILES,
-        SMOOTH_TILES,
-        BLUR,
-        POST_BLUR,
-        TEMPORAL_STABILIZATION,
-        SPLIT_SCREEN,
+        SMOOTH_TILES            = CLASSIFY_TILES + SIGMA_NO_PERMUTATIONS,
+        BLUR                    = SMOOTH_TILES + SIGMA_NO_PERMUTATIONS,
+        POST_BLUR               = BLUR + SIGMA_NO_PERMUTATIONS,
+        TEMPORAL_STABILIZATION  = POST_BLUR + SIGMA_POST_BLUR_PERMUTATION_NUM,
+        SPLIT_SCREEN            = TEMPORAL_STABILIZATION + SIGMA_NO_PERMUTATIONS,
     };
 
     const SigmaSettings& settings = denoiserData.settings.sigma;
@@ -57,12 +61,15 @@ void nrd::InstanceImpl::Update_SigmaShadow(const DenoiserData& denoiserData)
     }
 
     { // POST_BLUR
-        SIGMA_BlurConstants* consts = (SIGMA_BlurConstants*)PushDispatch(denoiserData, AsUint(Dispatch::POST_BLUR));
+        uint32_t passIndex = AsUint(Dispatch::POST_BLUR) + (settings.stabilizationStrength != 0.0f ? 1 : 0);
+        SIGMA_BlurConstants* consts = (SIGMA_BlurConstants*)PushDispatch(denoiserData, passIndex);
         AddSharedConstants_Sigma(settings, consts);
         consts->gRotator = m_Rotator_PostBlur; // TODO: push constant
     }
 
-    { // TEMPORAL_STABILIZATION
+    // TEMPORAL_STABILIZATION
+    if (settings.stabilizationStrength != 0.0f)
+    {
         void* consts = PushDispatch(denoiserData, AsUint(Dispatch::TEMPORAL_STABILIZATION));
         AddSharedConstants_Sigma(settings, consts);
     }
@@ -88,10 +95,13 @@ void nrd::InstanceImpl::AddSharedConstants_Sigma(const SigmaSettings& settings,
     uint16_t tilesW = DivideUp(rectW, 16);
     uint16_t tilesH = DivideUp(rectH, 16);
 
+    float3 lightDirectionView = Rotate(m_WorldToView, float3(settings.lightDirection[0], settings.lightDirection[1], settings.lightDirection[2]));
+
     SharedConstants* consts         = (SharedConstants*)data;
     consts->gWorldToView            = m_WorldToView;
     consts->gViewToClip             = m_ViewToClip;
     consts->gWorldToClipPrev        = m_WorldToClipPrev;
+    consts->gLightDirectionView     = float4(lightDirectionView.x, lightDirectionView.y, lightDirectionView.z, 0.0f);
     consts->gFrustum                = m_Frustum;
     consts->gMvScale                = float4(m_CommonSettings.motionVectorScale[0], m_CommonSettings.motionVectorScale[1], m_CommonSettings.motionVectorScale[2], m_CommonSettings.isMotionVectorInWorldSpace ? 1.0f : 0.0f);
     consts->gResourceSizeInv        = float2(1.0f / float(resourceW), 1.0f / float(resourceH));
@@ -109,8 +119,7 @@ void nrd::InstanceImpl::AddSharedConstants_Sigma(const SigmaSettings& settings,
     consts->gUnproject              = unproject;
     consts->gDenoisingRange         = m_CommonSettings.denoisingRange;
     consts->gPlaneDistSensitivity   = settings.planeDistanceSensitivity;
-    consts->gBlurRadiusScale        = settings.blurRadiusScale;
-    consts->gContinueAccumulation   = m_CommonSettings.accumulationMode != AccumulationMode::CONTINUE ? 0.0f : 1.0f;
+    consts->gStabilizationStrength  = m_CommonSettings.accumulationMode == AccumulationMode::CONTINUE ? settings.stabilizationStrength : 0.0f;
     consts->gDebug                  = m_CommonSettings.debug;
     consts->gSplitScreen            = m_CommonSettings.splitScreen;
     consts->gFrameIndex             = m_CommonSettings.frameIndex;
diff --git a/UPDATE.md b/UPDATE.md
index 2553c96..36e38d7 100644
--- a/UPDATE.md
+++ b/UPDATE.md
@@ -224,3 +224,13 @@ A single NRD instance can now include any combination of denoisers, including re
 - *REBLUR*:
   - `blurRadius` renamed to `maxBlurRadius`
   - exposed `minBlurRadius` with the default value matching older versions
+
+## To v4.7
+
+- *SIGMA*:
+  - removed `blurRadiusScale`
+  - exposed `lightDirection`, which is needed only for directional light sources
+  - exposed `stabilizationStrength'
+  - clarified usage:
+    - `float shadow = SIGMA_BackEnd_UnpackShadow( OUT_SHADOW_TRANSLUCENCY );`
+    - `float3 translucentShadow = SIGMA_BackEnd_UnpackShadow( OUT_SHADOW_TRANSLUCENCY ).yzw;`