Files
UnrealEngine/Engine/Shaders/Private/DiaphragmDOF/DOFGatherPass.usf
2025-05-18 13:04:45 +08:00

465 lines
16 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
DiaphragmDOF/DOFGatherPass.usf: gather pass entry point for diaphragm DOF.
=============================================================================*/
#include "DOFGatherCommon.ush"
#include "DOFGatherAccumulator.ush"
#include "DOFGatherKernel.ush"
#include "DOFGatherTileSuggest.ush"
//------------------------------------------------------- COMPILE TIME CONSTANTS
#define THREADGROUP_SIZEX (COC_TILE_SIZE)
#define THREADGROUP_SIZEY (THREADGROUP_SIZEX)
#define THREADGROUP_TOTALSIZE (THREADGROUP_SIZEX * THREADGROUP_SIZEY)
//------------------------------------------------------- PARAMETERS
// Frame's temporal offset in pixels.
float2 TemporalJitterPixels;
// Mip bias for the gathering kernel.
float MipBias;
uint4 ViewportRect;
float2 DispatchThreadIdToInputBufferUV;
float MaxRecombineAbsCocRadius;
// Affine transformation to consider the CocRadius.
float2 ConsiderCocRadiusAffineTransformation0;
float2 ConsiderCocRadiusAffineTransformation1;
float2 ConsiderAbsCocRadiusAffineTransformation;
float2 InputBufferUVToOutputPixel;
Texture2D TileClassification_Foreground;
Texture2D TileClassification_Background;
//------------------------------------------------------- OUTPUTS
RWTexture2D<float4> ConvolutionOutput_SceneColor;
RWTexture2D<float4> ConvolutionOutput_SeparateAlpha;
#if CONFIG_GENERATE_SCATTER_OCCLUSION_BUFFER
RWTexture2D<float4> ScatterOcclusionOutput;
#endif
//------------------------------------------------------- FUNCTION
/** Store the accumulator output to RWTexture.
*
* This is implemented as separate function to be called in different branch of GatherMainCS to help
* the compiler reduce VGPR pressure that is critical for this amount of samples.
*/
void StoreAccumulatorOutput(in uint2 OutputPixelPosition, in FAccumulatorOutput AccumulatorOutput)
{
if (any(OutputPixelPosition >= ViewportRect.zw))
return;
float4 OutScatterOcclusion = 0;
#if DIM_LAYER_PROCESSING == LAYER_PROCESSING_BACKGROUND_ONLY && CONFIG_GENERATE_SCATTER_OCCLUSION_BUFFER
{
float CocRadiusAvg = AccumulatorOutput.BackgroundCocAvgAndSquareAvg.x;
// Max() coc variance to ensure sqrt() does not generate NaN simply on numerical errors.
float CocRadiusVariance = max(0.0,
AccumulatorOutput.BackgroundCocAvgAndSquareAvg.y -
AccumulatorOutput.BackgroundCocAvgAndSquareAvg.x * AccumulatorOutput.BackgroundCocAvgAndSquareAvg.x);
float CocRadiusStdDeviation = sqrt(CocRadiusVariance);
OutScatterOcclusion.x = CocRadiusAvg;
OutScatterOcclusion.y = max(CocRadiusVariance, 1);
}
#endif
// Only output what we needs, trusting the compiler to compile out unecessary stuf.
#if CONFIG_DOF_ALPHA
{
#if DIM_LAYER_PROCESSING == LAYER_PROCESSING_FOREGROUND_ONLY
ConvolutionOutput_SceneColor[OutputPixelPosition] = AccumulatorOutput.ForegroundColor * AccumulatorOutput.ForegroundAlpha;
ConvolutionOutput_SeparateAlpha[OutputPixelPosition] = AccumulatorOutput.ForegroundAlpha;
#elif DIM_LAYER_PROCESSING == LAYER_PROCESSING_FOREGROUND_HOLE_FILLING
ConvolutionOutput_SceneColor[OutputPixelPosition] = AccumulatorOutput.ForegroundHoleFillingColor;
ConvolutionOutput_SeparateAlpha[OutputPixelPosition] = AccumulatorOutput.ForegroundHoleFillingAlpha;
#elif DIM_LAYER_PROCESSING == LAYER_PROCESSING_BACKGROUND_ONLY
ConvolutionOutput_SceneColor[OutputPixelPosition] = AccumulatorOutput.BackgroundColor;
ConvolutionOutput_SeparateAlpha[OutputPixelPosition] = AccumulatorOutput.BackgroundWeight > 0 ? 1 : 0;
#elif DIM_LAYER_PROCESSING == LAYER_PROCESSING_SLIGHT_OUT_OF_FOCUS
ConvolutionOutput_SceneColor[OutputPixelPosition] = float4(
AccumulatorOutput.SlightFocusColor.rgb,
AccumulatorOutput.SlightFocusOpacity);
#else
#error Unknown layer processing.
#endif
}
#else // !CONFIG_DOF_ALPHA
{
#if DIM_LAYER_PROCESSING == LAYER_PROCESSING_FOREGROUND_ONLY
ConvolutionOutput_SceneColor[OutputPixelPosition] = float4(
AccumulatorOutput.ForegroundColor.rgb * AccumulatorOutput.ForegroundAlpha,
AccumulatorOutput.ForegroundAlpha);
#elif DIM_LAYER_PROCESSING == LAYER_PROCESSING_FOREGROUND_HOLE_FILLING
ConvolutionOutput_SceneColor[OutputPixelPosition] = float4(
AccumulatorOutput.ForegroundHoleFillingColor.rgb,
AccumulatorOutput.ForegroundHoleFillingAlpha);
#elif DIM_LAYER_PROCESSING == LAYER_PROCESSING_BACKGROUND_ONLY
ConvolutionOutput_SceneColor[OutputPixelPosition] = float4(
AccumulatorOutput.BackgroundColor.rgb,
AccumulatorOutput.BackgroundWeight > 0 ? 1 : 0);
#elif DIM_LAYER_PROCESSING == LAYER_PROCESSING_SLIGHT_OUT_OF_FOCUS
ConvolutionOutput_SceneColor[OutputPixelPosition] = float4(
AccumulatorOutput.SlightFocusColor.rgb,
AccumulatorOutput.SlightFocusOpacity);
#else
#error Unknown layer processing.
#endif
}
#endif // !CONFIG_DOF_ALPHA
#if CONFIG_GENERATE_SCATTER_OCCLUSION_BUFFER
ScatterOcclusionOutput[OutputPixelPosition] = OutScatterOcclusion;
#endif
}
//------------------------------------------------------- LDS
groupshared uint RequiresAccurateGather;
//------------------------------------------------------- ENTRY POINT
[numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)]
void GatherMainCS(
uint2 DispatchThreadId : SV_DispatchThreadID,
uint2 GroupId : SV_GroupID)
{
float4 Debug = 0;
// Get output's UVs.
float2 ViewportUV = (DispatchThreadId + 0.5) * ViewportSize.zw;
float2 InputBufferCenterUV = (DispatchThreadId + 0.5) * DispatchThreadIdToInputBufferUV;
// Sample color from larger convolution color that was gathered at lower resolution.
FLargerConvolution LargerConvolution;
LargerConvolution.Color = 0;
LargerConvolution.Weight = 0;
LargerConvolution.CocAvgAndSquareAvg = 0;
// Sets the gather input compile time parameters.
FGatherInputParameters GatherParameters;
{
GatherParameters.MaxRingCount = DIM_GATHER_RING_COUNT;
GatherParameters.bFastGathering = false;
GatherParameters.KernelSamplingDensityMode = KERNEL_DENSITY_CONSTANT;
GatherParameters.DensityChangeRingCount = GatherParameters.MaxRingCount / 2;
GatherParameters.MaxDensityChangeCount = CONFIG_MAX_SAMPLING_DENSITY_CHANGES;
// TODO: variable number of ring.
GatherParameters.RingCount = GatherParameters.MaxRingCount;
GatherParameters.ConsiderCocRadiusAffineTransformation0 = kContantlyPassingAffineTransformation;
GatherParameters.ConsiderCocRadiusAffineTransformation1 = kContantlyPassingAffineTransformation;
GatherParameters.ConsiderAbsCocRadiusAffineTransformation = kContantlyPassingAffineTransformation;
}
// Setups what Coc radius are considered.
#if 1
{
#if DIM_LAYER_PROCESSING == LAYER_PROCESSING_FOREGROUND_HOLE_FILLING
{
GatherParameters.ConsiderCocRadiusAffineTransformation0 = ConsiderAbsCocRadiusAffineTransformation;
}
#else
{
GatherParameters.ConsiderAbsCocRadiusAffineTransformation = ConsiderAbsCocRadiusAffineTransformation;
}
#endif
}
#else
{
GatherParameters.ConsiderCocRadiusAffineTransformation0 = ConsiderCocRadiusAffineTransformation0;
GatherParameters.ConsiderCocRadiusAffineTransformation1 = ConsiderCocRadiusAffineTransformation1;
GatherParameters.ConsiderAbsCocRadiusAffineTransformation = ConsiderAbsCocRadiusAffineTransformation;
}
#endif
// Sets the per wave gather parameters
{
GatherParameters.DispatchThreadIdToInputBufferUV = DispatchThreadIdToInputBufferUV;
GatherParameters.MaxRecombineAbsCocRadius = MaxRecombineAbsCocRadius;
}
// Sets the per wave line gather parameters
{
GatherParameters.InputBufferCenterUV = InputBufferCenterUV;
GatherParameters.ViewportUV = ViewportUV;
// Generate random signals
GatherParameters.Random[0] = InterleavedGradientNoise(DispatchThreadId, 0);
GatherParameters.Random[1] = InterleavedGradientNoise(DispatchThreadId, 1);
}
// Sample Coc tile texture.
FGatheringTileSuggestion TileSuggestion;
{
// Actually sample the Coc tile buffer.
#if GROUP_SIZE_IS_LARGER_THAN_COC_TILE_SIZE
const uint2 TilePos = DispatchThreadId / COC_TILE_SIZE;
#else
const uint2 TilePos = GroupId;
#endif
const FCocTileSample CocTileInfos = LoadCocTile(COC_TILE_LAYOUT_FGD_SEP_BGD, TileClassification_Foreground, TileClassification_Background, int2(TilePos));
TileSuggestion = InferGatherTileSuggestion(CocTileInfos, /* LayerProcessing = */ DIM_LAYER_PROCESSING);
}
// Setup first gathering's radius.
{
// Kernel radius is simply set to the maximum absolute Coc size.
GatherParameters.ClosestCocRadius = TileSuggestion.ClosestCocRadius;
GatherParameters.KernelRadius = TileSuggestion.MaxCocRadiusAbs;
GatherParameters.MinIntersectableCocRadiusAbs = TileSuggestion.MinIntersectableCocRadiusAbs;
#if DIM_LAYER_PROCESSING == LAYER_PROCESSING_SLIGHT_OUT_OF_FOCUS
{
// Dynamically control number rings.
GatherParameters.KernelSamplingDensityMode = KERNEL_DENSITY_CONSTANT_DYNAMIC_RING_COUNT;
GatherParameters.MaxRingCount = round(min(TileSuggestion.MaxCocRadiusAbs, SlightOutOfFocusRadiusBoundary)); // shader parameter for maximum number of rings
}
#endif
#if CONFIG_SGPR_HINTS
{
GatherParameters.ClosestCocRadius = ToScalarMemory(GatherParameters.ClosestCocRadius);
GatherParameters.KernelRadius = ToScalarMemory(GatherParameters.KernelRadius);
GatherParameters.MinIntersectableCocRadiusAbs = ToScalarMemory(GatherParameters.MinIntersectableCocRadiusAbs);
GatherParameters.MaxRingCount = ToScalarMemory(GatherParameters.MaxRingCount);
}
#endif
}
// Whether can entirely skeep gathering.
bool bSkipGathering = TileSuggestion.bCanEarlyReturn;
// Whether can switch to fast gathering.
#if CONFIG_ENABLE_FAST_ACCUMULATOR && DEBUG_NO_FAST_ACCUMULATOR == 0
bool bDoFastGathering;
{
#if !GROUP_SIZE_IS_LARGER_THAN_COC_TILE_SIZE
bDoFastGathering = TileSuggestion.bCanDofastGathering;
#elif COMPILER_SUPPORTS_WAVE_VOTE
bDoFastGathering = WaveActiveAllTrue(TileSuggestion.bCanDofastGathering || bSkipGathering);
#else
RequiresAccurateGather = 0;
GroupMemoryBarrierWithGroupSync();
{
uint Ignored;
InterlockedAdd(RequiresAccurateGather, uint(!TileSuggestion.bCanDofastGathering && !bSkipGathering), Ignored);
}
GroupMemoryBarrierWithGroupSync();
bDoFastGathering = (RequiresAccurateGather == 0);
#endif
}
#endif
// Early return if there is no convolution to do.
BRANCH
if (bSkipGathering)
{
uint2 OutputPixelPosition = DispatchThreadId;
FAccumulatorOutput AccumulatorOutput = CreateAccumulatorOutput();
// TODO: that is not fine for LARGEST_RING_FIRST == 0
AccumulatorOutput.BackgroundColor = LargerConvolution.Color;
AccumulatorOutput.BackgroundWeight = LargerConvolution.Weight;
AccumulatorOutput.BackgroundCocAvgAndSquareAvg = LargerConvolution.CocAvgAndSquareAvg;
StoreAccumulatorOutput(OutputPixelPosition, AccumulatorOutput);
#if DEBUG_OUTPUT
{
DebugOutput[OutputPixelPosition] = Debug;
}
#endif
}
// Do fast gathering because all Coc in the neighborhood are same.
#if CONFIG_ENABLE_FAST_ACCUMULATOR && DEBUG_NO_FAST_ACCUMULATOR == 0
else if (bDoFastGathering)
{
// That is sad there is not function templating, so set at compile time in the gather parameters that
// we are actually fast gathering.
GatherParameters.bFastGathering = true;
GatherParameters.KernelSamplingDensityMode = CONFIG_FAST_ACCUMULATOR_KERNEL_DENSITY;
GatherParameters.MaxRingCount = CONFIG_FAST_ACCUMULATOR_RING_COUNT;
GatherParameters.RingCount = GatherParameters.MaxRingCount;
// Create an accumulator to make the compiler happy, but should be entirely compile out.
FGatherAccumulator UnusedAccumulator = CreateGatherAccumulator(GatherParameters, LargerConvolution);
FAccumulatorOutput UnusedAccumulatorOutput = CreateAccumulatorOutput();
// Gather the samples.
FFastAccumulatorOutput FastAccumulatorOutput;
GatherSamplesAndResolve(GatherParameters, UnusedAccumulator, UnusedAccumulatorOutput, FastAccumulatorOutput);
// Ignore Accumulator, just return the fast accumulator.
FAccumulatorOutput AccumulatorOutput = CreateAccumulatorOutput();
#if DIM_LAYER_PROCESSING == LAYER_PROCESSING_FOREGROUND_ONLY
{
AccumulatorOutput.ForegroundColor = FastAccumulatorOutput.Color;
AccumulatorOutput.ForegroundAlpha = saturate(GatherParameters.KernelRadius - (GatherParameters.MaxRecombineAbsCocRadius - 1.0));
}
#elif DIM_LAYER_PROCESSING == LAYER_PROCESSING_BACKGROUND_ONLY
{
AccumulatorOutput.BackgroundColor = FastAccumulatorOutput.Color;
AccumulatorOutput.BackgroundWeight = 1;
AccumulatorOutput.BackgroundCocAvgAndSquareAvg.x = GatherParameters.KernelRadius;
AccumulatorOutput.BackgroundCocAvgAndSquareAvg.y = 0;
}
#else
#error Fast gathering is not supported.
#endif
#if DEBUG_FAST_ACCUMULATOR
AccumulatorOutput.ForegroundColor.rgb *= float3(0.5, 1, 0.5);
AccumulatorOutput.BackgroundColor.rgb *= float3(0.5, 1, 0.5);
#endif
#if 1 // Lower VGPR footprint.
uint2 OutputPixelPosition = uint2(GatherParameters.InputBufferCenterUV * InputBufferUVToOutputPixel);
#else
uint2 OutputPixelPosition = DispatchThreadId;
#endif
Debug = float4(0, 1, 0, 0);
StoreAccumulatorOutput(OutputPixelPosition, AccumulatorOutput);
#if DEBUG_OUTPUT
{
DebugOutput[OutputPixelPosition] = Debug;
}
#endif
}
#endif // ALLOW_FAST_ACCUMULATOR
// Do gathering with FGatherAccumulator.
else
{
// Drives kernel sampling density.
#if CONFIG_KERNEL_SAMPLING_DENSITY_DRIVER == 0
// NOP
#elif CONFIG_KERNEL_SAMPLING_DENSITY_DRIVER == 1
{
#if DIM_LAYER_PROCESSING != LAYER_PROCESSING_BACKGROUND_ONLY
#error Kernel sampling density driver only available for background.
#endif
#if CONFIG_LAYER_GATHERING
GatherParameters.KernelSamplingDensityMode = KERNEL_DENSITY_RECURSIVE_DISKS;
#else
GatherParameters.KernelSamplingDensityMode = KERNEL_DENSITY_HIGHER_IN_CENTER_DISK;
#endif
}
#endif
// Create, use and resolve gather accumulator.
FAccumulatorOutput AccumulatorOutput = CreateAccumulatorOutput();
uint DebugGatherDensityChanges;
{
FFastAccumulatorOutput UnusedFastAccumulatorOutput;
FGatherAccumulator FirstAccumulator = CreateGatherAccumulator(GatherParameters, LargerConvolution);
DebugGatherDensityChanges = GatherSamplesAndResolve(GatherParameters, FirstAccumulator, AccumulatorOutput, UnusedFastAccumulatorOutput);
}
#if DEBUG_FAST_ACCUMULATOR
{
#if CONFIG_KERNEL_SAMPLING_DENSITY_DRIVER > 0
UNROLL_N(CONFIG_MAX_SAMPLING_DENSITY_CHANGES)
for (uint i = 0; i < CONFIG_MAX_SAMPLING_DENSITY_CHANGES; i++)
{
if (i < DebugGatherDensityChanges)
{
AccumulatorOutput.ForegroundColor.rgb *= float3(1, 0.5, 0.5);
AccumulatorOutput.BackgroundColor.rgb *= float3(1, 0.5, 0.5);
}
}
#endif
AccumulatorOutput.ForegroundColor.rgb *= float3(1, 0.5, 0.5);
AccumulatorOutput.BackgroundColor.rgb *= float3(1, 0.5, 0.5);
}
#endif
#if 1 // Lower VGPR footprint.
uint2 OutputPixelPosition = uint2(GatherParameters.InputBufferCenterUV * InputBufferUVToOutputPixel);
#else
uint2 OutputPixelPosition = DispatchThreadId;
#endif
#if 1
{
#if DIM_LAYER_PROCESSING == LAYER_PROCESSING_FOREGROUND_ONLY
{
}
#elif DIM_LAYER_PROCESSING == LAYER_PROCESSING_FOREGROUND_HOLE_FILLING
{
}
#elif DIM_LAYER_PROCESSING == LAYER_PROCESSING_BACKGROUND_ONLY
{
}
#elif DIM_LAYER_PROCESSING == LAYER_PROCESSING_SLIGHT_OUT_OF_FOCUS
{
Debug = float4(GatherParameters.MaxRingCount, 0, 0, 0);
}
#else
#error Unknown layer processing.
#endif
}
#elif 1
{
Debug = float4(GatherParameters.KernelRadius, 0, 0, 0);
}
#elif 0
{
Debug = float4(1, 0, 0, 0);
}
#endif
StoreAccumulatorOutput(OutputPixelPosition, AccumulatorOutput);
#if DEBUG_OUTPUT
{
DebugOutput[OutputPixelPosition] = Debug;
}
#endif
}
}