Files
UnrealEngine/Engine/Shaders/Private/ScreenSpaceDenoise/SSDCommon.ush
2025-05-18 13:04:45 +08:00

330 lines
9.8 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
SSDCommon.ush: All the code commonly shared between uniform buffers.
=============================================================================*/
#pragma once
#if !defined(SPHERICAL_HARMONIC_ORDER)
#define SPHERICAL_HARMONIC_ORDER 0
#endif
#include "../Common.ush"
#include "../SceneTextureParameters.ush"
#include "../ColorSpace.ush"
#include "../SphericalGaussian.ush"
#include "/Engine/Public/WaveBroadcastIntrinsics.ush"
#include "/Engine/Private/Substrate/Substrate.ush"
#if SPHERICAL_HARMONIC_ORDER > 0
#include "../SHCommon.ush"
#endif
#include "SSDPublic.ush"
#include "SSDDefinitions.ush"
#include "SSDMetadata.ush"
//------------------------------------------------------- SHARED CONSTANTS DEPENDING ON CONFIG OF SHADER
#if CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_REFLECTIONS
#define TARGETED_SAMPLE_COUNT 1024
#elif CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_SHADOW_VISIBILITY_MASK
// A gray scale valued encoded into a 16bit float only have 10bits mantissa.
#define TARGETED_SAMPLE_COUNT 1024
#elif CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_AO
#define TARGETED_SAMPLE_COUNT 256
#elif CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_POLYCHROMATIC_PENUMBRA_HARMONIC
// Polychromatic penumbra is encded into 16bit float that only have 10bits mantissa.
#define TARGETED_SAMPLE_COUNT 1024
#elif CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_DIFFUSE_INDIRECT_AND_AO
// Indirect diffuse requires really large spatial kernel to clean the noise.
#define TARGETED_SAMPLE_COUNT 4096
#elif CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_DIFFUSE_SPHERICAL_HARMONIC
#define TARGETED_SAMPLE_COUNT 128
#elif CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_SSGI
#define TARGETED_SAMPLE_COUNT 128
// #define CONFIG_METADATA_BUFFER_LAYOUT METADATA_BUFFER_LAYOUT_DEPTH_VIEWNORMAL // TODO Guillaume:
//#define CONFIG_USE_VIEW_SPACE 1
#elif CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_DIFFUSE_PROBE_HIERARCHY
#define TARGETED_SAMPLE_COUNT 32
#define CONFIG_METADATA_BUFFER_LAYOUT METADATA_BUFFER_LAYOUT_FED_DEPTH_SHADINGMODEL
#else
#error Unconfigured TARGETED_SAMPLE_COUNT
#endif
#ifndef CONFIG_METADATA_BUFFER_LAYOUT
#define CONFIG_METADATA_BUFFER_LAYOUT METADATA_BUFFER_LAYOUT_DISABLED
#endif
#ifndef CONFIG_USE_VIEW_SPACE
#define CONFIG_USE_VIEW_SPACE 0
#endif
#if CONFIG_USE_VIEW_SPACE && CONFIG_METADATA_BUFFER_LAYOUT != METADATA_BUFFER_LAYOUT_DEPTH_VIEWNORMAL
#error The all point of using view space is to avoid matrix view -> world computation of the metadata.
#endif
//------------------------------------------------------- SHARED SHADER PARAMETERS
uint2 ViewportMin;
uint2 ViewportMax;
float4 ThreadIdToBufferUV;
float4 DenoiserBufferSizeAndInvSize;
float4 DenoiserBufferBilinearUVMinMax;
float2 BufferUVToOutputPixelPosition;
float4 SceneBufferUVToScreenPosition;
float4x4 ScreenToView;
float2 BufferUVBilinearCorrection;
float4 PrevSceneBufferUVToScreenPosition;
Texture2D PrevDepthBuffer;
Texture2D PrevGBufferA;
Texture2D PrevGBufferB;
// There is intentionally no PrevScreenToView, because the all point of is to avoid full matrix computation in inner loops.
#if CONFIG_METADATA_BUFFER_LAYOUT == METADATA_BUFFER_LAYOUT_FED_DEPTH_SHADINGMODEL
Texture2D<float> CompressedMetadata_0;
Texture2D<uint> CompressedMetadata_1;
Texture2D<float> PrevCompressedMetadata_0;
Texture2D<uint> PrevCompressedMetadata_1;
#else
Texture2D<uint> CompressedMetadata_0;
Texture2D<uint> CompressedMetadata_1;
Texture2D<uint> PrevCompressedMetadata_0;
Texture2D<uint> PrevCompressedMetadata_1;
#endif
#if DEBUG_OUTPUT
RWTexture2D<float4> DebugOutput;
#endif
//------------------------------------------------------- SHARED FUNCTION
float SafeRcp(float x)
{
if (x > 0)
return rcp(x);
return 0;
}
#if CONFIG_USE_VIEW_SPACE
float4x4 GetScreenToViewDistanceMatrix()
{
float4x4 LocalScreenToView = ScreenToView;
// Intentionally nullout some values, so the compiler can remove some of the VALU, that is all the point of CONFIG_USE_VIEW_SPACE.
// TODO(Denoiser): off axis projection.
{
LocalScreenToView[0][1] = 0.0;
LocalScreenToView[0][2] = 0.0;
LocalScreenToView[0][3] = 0.0;
LocalScreenToView[1][0] = 0.0;
LocalScreenToView[1][2] = 0.0;
LocalScreenToView[1][3] = 0.0;
LocalScreenToView[2][0] = 0.0;
LocalScreenToView[2][1] = 0.0;
LocalScreenToView[2][2] = 1.0;
LocalScreenToView[2][3] = 0.0;
LocalScreenToView[3][0] = 0.0;
LocalScreenToView[3][1] = 0.0;
LocalScreenToView[3][2] = 0.0;
LocalScreenToView[3][3] = 1.0;
}
return LocalScreenToView;
}
#endif
//------------------------------------------------------- SPHERICAL HARMONIC INTERFACE
#if SPHERICAL_HARMONIC_ORDER == 0
// NOP
#elif SPHERICAL_HARMONIC_ORDER == 2
#define FSSDSphericalHarmonic FTwoBandSHVector
#define FSSDSphericalHarmonicRGB FTwoBandSHVectorRGB
#elif SPHERICAL_HARMONIC_ORDER == 3
#define FSSDSphericalHarmonic FThreeBandSHVector
#define FSSDSphericalHarmonicRGB FThreeBandSHVectorRGB
#else
#error Unknown spherical harmonic configuration.
#endif
/** Compute the anisotropy value. The lower the value is, the more anysotropic the sample is. */
float ComputeAnisotropyInvFactor(FSSDSampleSceneInfos SceneMetadata)
{
if (!CONFIG_NORMAL_ANISOTROPY)
{
return 1;
}
float Anysotropy = abs(dot(GetWorldNormal(SceneMetadata), View.ViewForward));
return max(Anysotropy, rcp(8));
}
/** Returns the radius of a pixel in world space. */
float ComputeWorldBluringRadiusCausedByPixelSize(FSSDSampleSceneInfos SceneMetadata)
{
// Should be multiplied 0.5* for the diameter to radius, and by 2.0 because GetTanHalfFieldOfView() cover only half of the pixels.
return GetDepthPixelRadiusForProjectionType(GetWorldDepth(SceneMetadata));
}
/** Convert to bilateral world distance. */
float WorldBluringRadiusToBilateralWorldDistance(float WorldBluringRadius)
{
float Multiplier = 1;
// The distance between two pixel is 2 times the radius of the pixel.
Multiplier *= 2;
// Need to take into account the furthearest pixel of 3x3 neighborhood.
Multiplier *= sqrt(2);
// Can take into account how much the signal is getting stored into higher resolution.
Multiplier *= BLURING_ESTIMATION_MULTIPLIER;
return WorldBluringRadius * Multiplier;
}
uint2 BufferUVToBufferPixelCoord(float2 SceneBufferUV)
{
return uint2(SceneBufferUV * BufferUVToOutputPixelPosition);
}
float2 DenoiserBufferUVToScreenPosition(float2 SceneBufferUV)
{
return SceneBufferUV * SceneBufferUVToScreenPosition.xy + SceneBufferUVToScreenPosition.zw;
}
FSSDCompressedSceneInfos MaterialToCompressedSceneMetadata(float Depth, float3 WorldNormal, float Roughness, uint ShadingID)
{
FSSDCompressedSceneInfos CompressedMetadata = CreateCompressedSceneInfos();
CompressedMetadata.VGPR[0] = asuint(Depth);
CompressedMetadata.VGPR[1] = asuint(WorldNormal.x);
CompressedMetadata.VGPR[2] = asuint(WorldNormal.y);
CompressedMetadata.VGPR[3] = asuint(WorldNormal.z);
CompressedMetadata.VGPR[4] = asuint(Roughness);
CompressedMetadata.VGPR[5] = ShadingID;
return CompressedMetadata;
}
FSSDCompressedSceneInfos SampleCompressedSceneMetadata(
const bool bPrevFrame,
float2 BufferUV, uint2 PixelCoord)
#if CONFIG_METADATA_BUFFER_LAYOUT == METADATA_BUFFER_LAYOUT_DISABLED
{
#if SUBTRATE_GBUFFER_FORMAT==1
float SceneDepth = 0;
if (bPrevFrame)
{
SceneDepth = ConvertFromDeviceZ(PrevDepthBuffer.SampleLevel(GlobalPointClampedSampler, BufferUV, 0).r);
}
else
{
SceneDepth = ConvertFromDeviceZ(SampleDeviceZFromSceneTextures(BufferUV));
}
// SUBSTRATE_TODO: Manage bPrevFrame
const FSubstrateTopLayerData TopLayerData = SubstrateUnpackTopLayerData(Substrate.TopLayerTexture.Load(uint3(PixelCoord, 0)));
return MaterialToCompressedSceneMetadata(SceneDepth, TopLayerData.WorldNormal, TopLayerData.Roughness, IsSubstrateMaterial(TopLayerData) ? SHADINGMODELID_SUBSTRATE : SHADINGMODELID_UNLIT);
#else // SUBTRATE_GBUFFER_FORMAT==1
FGBufferData GBufferData;
if (bPrevFrame)
{
float DeviceZ = PrevDepthBuffer.SampleLevel(GlobalPointClampedSampler, BufferUV, 0).r;
uint CustomStencil = 0;
float CustomNativeDepth = 0;
float SceneDepth = ConvertFromDeviceZ(DeviceZ);
float4 GBufferA = PrevGBufferA.SampleLevel(GlobalPointClampedSampler, BufferUV, 0);
float4 GBufferB = PrevGBufferB.SampleLevel(GlobalPointClampedSampler, BufferUV, 0);
float4 GBufferC = 0.0;
float4 GBufferD = 0.0;
float4 GBufferE = 0.0;
float4 GBufferF = 0.5f;
float4 GBufferVelocity = 0.0;
bool bGetNormalizedNormal = false;
GBufferData = DecodeGBufferData(
GBufferA, GBufferB, GBufferC, GBufferD, GBufferE, GBufferF, GBufferVelocity,
CustomNativeDepth, CustomStencil, SceneDepth, bGetNormalizedNormal, CheckerFromSceneColorUV(BufferUV));
}
else
{
GBufferData = GetGBufferDataFromSceneTextures(BufferUV);
}
return MaterialToCompressedSceneMetadata(GBufferData.Depth, GBufferData.WorldNormal, GBufferData.Roughness, GBufferData.ShadingModelID);
#endif // SUBTRATE_GBUFFER_FORMAT==1
}
#elif CONFIG_METADATA_BUFFER_LAYOUT == METADATA_BUFFER_LAYOUT_FED_DEPTH_SHADINGMODEL
{
FSSDCompressedSceneInfos CompressedMetadata = CreateCompressedSceneInfos();
int3 Coord = int3(PixelCoord, /* MipLevel = */ 0);
if (bPrevFrame)
{
CompressedMetadata.VGPR[0] = asuint(PrevCompressedMetadata_0.Load(Coord));
CompressedMetadata.VGPR[1] = PrevCompressedMetadata_1.Load(Coord);
}
else
{
CompressedMetadata.VGPR[0] = asuint(CompressedMetadata_0.Load(Coord));
CompressedMetadata.VGPR[1] = CompressedMetadata_1.Load(Coord);
}
return CompressedMetadata;
}
#else
{
FSSDCompressedSceneInfos CompressedMetadata = CreateCompressedSceneInfos();
int3 Coord = int3(PixelCoord, /* MipLevel = */ 0);
if (bPrevFrame)
{
CompressedMetadata.VGPR[0] = PrevCompressedMetadata_0.Load(Coord);
}
else
{
CompressedMetadata.VGPR[0] = CompressedMetadata_0.Load(Coord);
}
return CompressedMetadata;
}
#endif