Files
2025-05-18 13:04:45 +08:00

741 lines
23 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
DiaphragmDOF/DOFReduce.usf: Diaphragm DOF's reduce pass.
=============================================================================*/
#define EYE_ADAPTATION_LOOSE_PARAMETERS 1
#include "DOFDownsample.ush"
#include "DOFHybridScatterCompilation.ush"
#include "DOFVignetteCommon.ush"
#include "../ReductionCommon.ush"
//------------------------------------------------------- ENUM VALUES
/** Method used to allocate scatter group globally. */
// Uses bit or atomic
#define SCATTER_ALLOC_METHOD_ATOMIC 0
// Uses wave bit or instruction.
#define SCATTER_ALLOC_METHOD_WAVE 1
// Uses LDS entry for each thread.
#define SCATTER_ALLOC_METHOD_LDS 2
//------------------------------------------------------- COMPILE TIME CONSTANTS
#define SHARED_REDUCE_COUNT 3
#define OUTPUT_MIP_COUNT (SHARED_REDUCE_COUNT + 1)
#define THREADGROUP_TILE_SIZE (1 << SHARED_REDUCE_COUNT)
#define THREADGROUP_TOTALSIZE (THREADGROUP_TILE_SIZE * THREADGROUP_TILE_SIZE)
#if CONFIG_DOF_ALPHA
#define CONFIG_GATHER_INPUT_LAYOUT (GATHER_INPUT_LAYOUT_RGB_ALPHA_COC)
#elif DIM_RGB_COLOR_BUFFER
#define CONFIG_GATHER_INPUT_LAYOUT (GATHER_INPUT_LAYOUT_RGB_SEPARATE_COC)
#else
#define CONFIG_GATHER_INPUT_LAYOUT (GATHER_INPUT_LAYOUT_RGB_COC)
#endif
#define CONFIG_WAVE_BROADCAST_REDUCTION (PLATFORM_SUPPORTS_WAVE_BROADCAST)
// Configures the neighborhood analysis method to use for slight out of focus early out.
#if COMPILER_SUPPORTS_WAVE_BIT_ORAND && (COMPILER_PSSL || XBOXONE_PROFILE)
// GCN only optimisation
#define SCATTER_ALLOC_METHOD (SCATTER_ALLOC_METHOD_WAVE)
#elif COMPILER_HLSLCC
// Compiler does not like InterlockedOr().
#define SCATTER_ALLOC_METHOD (SCATTER_ALLOC_METHOD_LDS)
#else
#define SCATTER_ALLOC_METHOD (SCATTER_ALLOC_METHOD_ATOMIC)
#endif
//------------------------------------------------------- PARAMETERS
uint4 ViewportRect;
float4 ScatteringViewportSize;
float ScatteringScaling;
uint MaxScatteringGroupCount;
float2 MaxInputBufferUV;
float MinScatteringCocRadius;
float PreProcessingToProcessingCocRadiusFactor;
float BokehGatherDistinctionLimit;
float2 SensorSize;
float Aperture;
float LensImageDistance;
float BarrelRadius;
float BarrelLength;
uint4 MatteBoxPlanes[MAX_MATTE_BOX_FLAGS];
float4 GatherInputSize;
Texture2D GatherInput_SceneColor;
Texture2D GatherInput_SeparateCoc;
float4 QuarterResGatherInputSize;
Texture2D QuarterResGatherInput_SceneColor;
//------------------------------------------------------- OUTPUTS
RWTexture2D<float4> OutputMips_0_SceneColor;
RWTexture2D<float4> OutputMips_1_SceneColor;
RWTexture2D<float4> OutputMips_2_SceneColor;
RWTexture2D<float4> OutputMips_3_SceneColor;
#if CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_ALPHA_COC || CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_SEPARATE_COC
RWTexture2D<float> OutputMips_0_SeparateCoc;
RWTexture2D<float> OutputMips_1_SeparateCoc;
RWTexture2D<float> OutputMips_2_SeparateCoc;
RWTexture2D<float> OutputMips_3_SeparateCoc;
#endif
#if DIM_HYBRID_SCATTER_FGD || DIM_HYBRID_SCATTER_BGD
RWBuffer<uint> OutScatterDrawIndirectParameters;
RWStructuredBuffer<float4> OutForegroundScatterDrawList;
RWStructuredBuffer<float4> OutBackgroundScatterDrawList;
#endif
//------------------------------------------------------- LDS
#if SCATTER_ALLOC_METHOD == SCATTER_ALLOC_METHOD_ATOMIC
groupshared uint SharedForegroundScatterGroupMask;
groupshared uint SharedBackgroundScatterGroupMask;
#elif SCATTER_ALLOC_METHOD == SCATTER_ALLOC_METHOD_LDS
groupshared uint SharedForegroundScatterGroupMask[THREADGROUP_TOTALSIZE];
groupshared uint SharedBackgroundScatterGroupMask[THREADGROUP_TOTALSIZE];
#endif
groupshared uint SharedForegroundAtomic;
groupshared uint SharedBackgroundAtomic;
groupshared float4 GroupSharedArray[THREADGROUP_TOTALSIZE];
#if CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_ALPHA_COC
groupshared float GroupSharedArray2[THREADGROUP_TOTALSIZE];
#endif
//------------------------------------------------------- FUNCTIONS
// Reduce using GroupSharedArray
void ReduceOperator(FCocDownsampleParams DownsampleParams, uint OutId, uint ReduceBankSize)
{
// Gather the 4 imput samples. Eaches samples are on banks of size <ReduceBankSize> for coherent LDS memory access.
float4 Colors[4];
float CocRadii[4];
UNROLL
for (uint i = 0; i < 4; i++)
{
uint InSharedId = OutId + i * ReduceBankSize;
#if CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_ALPHA_COC
{
Colors[i] = GroupSharedArray[InSharedId];
CocRadii[i] = GroupSharedArray2[InSharedId];
}
#else
{
Colors[i] = GroupSharedArray[InSharedId];
CocRadii[i] = Colors[i].a;
}
#endif
}
// Downsample the 4 sample to one according to Coc.
float4 OutColor;
float OutCocRadius;
DownsampleSceneColorWithCoc(DownsampleParams, Colors, CocRadii, OutColor, OutCocRadius);
// Technically need a barrier, but in practice with a warp size >= 32, no need.
// GroupMemoryBarrierWithGroupSync();
// Output to shared memory.
#if CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_ALPHA_COC
{
GroupSharedArray[OutId] = OutColor;
GroupSharedArray2[OutId] = OutCocRadius;
}
#else
{
GroupSharedArray[OutId] = float4(OutColor.rgb, OutCocRadius);
}
#endif
}
// Output buffer.
void OutputMipLevel(const uint MipLevel, uint2 MipPixelPos, float4 Color, float CocRadius)
{
#if CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_COC
float4 OutputSceneColor = float4(Color.rgb, CocRadius);
#elif CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_SEPARATE_COC
float4 OutputSceneColor = float4(Color.rgb, 0);
float OutputSeparateCoc = CocRadius;
#else
float4 OutputSceneColor = Color;
float OutputSeparateCoc = CocRadius;
#endif
// This is hugly, but compile time.
#if CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_COC
{
if (MipLevel == 0)
{
OutputMips_0_SceneColor[MipPixelPos] = OutputSceneColor;
}
#if DIM_REDUCE_MIP_COUNT > 1
else if (MipLevel == 1)
{
OutputMips_1_SceneColor[MipPixelPos] = OutputSceneColor;
}
#if DIM_REDUCE_MIP_COUNT > 2
else if (MipLevel == 2)
{
OutputMips_2_SceneColor[MipPixelPos] = OutputSceneColor;
}
#if DIM_REDUCE_MIP_COUNT > 3
else if (MipLevel == 3)
{
OutputMips_3_SceneColor[MipPixelPos] = OutputSceneColor;
}
#endif
#endif
#endif
}
#elif CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_ALPHA_COC || CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_SEPARATE_COC
{
if (MipLevel == 0)
{
OutputMips_0_SceneColor[MipPixelPos] = OutputSceneColor;
OutputMips_0_SeparateCoc[MipPixelPos] = OutputSeparateCoc;
}
#if DIM_REDUCE_MIP_COUNT > 1
else if (MipLevel == 1)
{
OutputMips_1_SceneColor[MipPixelPos] = OutputSceneColor;
OutputMips_1_SeparateCoc[MipPixelPos] = OutputSeparateCoc;
}
#if DIM_REDUCE_MIP_COUNT > 2
else if (MipLevel == 2)
{
OutputMips_2_SceneColor[MipPixelPos] = OutputSceneColor;
OutputMips_2_SeparateCoc[MipPixelPos] = OutputSeparateCoc;
}
#if DIM_REDUCE_MIP_COUNT > 3
else if (MipLevel == 3)
{
OutputMips_3_SceneColor[MipPixelPos] = OutputSceneColor;
OutputMips_3_SeparateCoc[MipPixelPos] = OutputSeparateCoc;
}
#endif
#endif
#endif
}
#else
#error Unknown gather input layout.
#endif
}
//------------------------------------------------------- ENTRY POINT
// Pixels that are bright and have a large CoC should be scattered instead of gathered, to avoid gather noise.
// If they aren't, e.g. due to performance reasons, we may prefer to cull them instead of gathering them anyway.
// This will bias the image, as bokeh may appear slightly darker, but helps clean up noise in large bokeh.
float ApplyBokehGatherDistinctionFilter(float CocRadius, float4 Color, float FrameExposureScale)
{
if (BokehGatherDistinctionLimit != 0)
{
// see also ComputeLuminanceOnlyBasedScatterFactor
float PerceivedLuma = Luma4(Color.rgb) * FrameExposureScale;
float BokehDistinction = abs(CocRadius) * PerceivedLuma;
return smoothstep(BokehGatherDistinctionLimit + 200, BokehGatherDistinctionLimit - 200, BokehDistinction);
}
else
{
return 1;
}
}
void ComputeVignetteData(uint2 DispatchThreadId, float CocRadius, out float4 Output[VIGNETTE_DATA_PER_PIXEL])
{
const float2 DrawPosition = (DispatchThreadId & 0xFFFFFFFE) + 0.5;
const float2 BokehCenterScreenPos = ScatteringScaling * DrawPosition;
const float2 BokehCenterClipPos = (BokehCenterScreenPos * ScatteringViewportSize.zw) * 2.0 - 1.0;
const float3 ObjectDirection = normalize(-float3(BokehCenterClipPos * (SensorSize*0.5), -LensImageDistance));
const float Focus = View.DepthOfFieldFocalDistance;
const float2 Barrel = asfloat(ProjectAndPackCylinder(
CocRadius,
BokehCenterClipPos,
BarrelRadius,
BarrelLength,
SensorSize,
Focus,
Aperture,
CocInfinityRadius,
LensImageDistance));
float2 ProjectedMatteBoxFlags[MAX_MATTE_BOX_FLAGS];
UNROLL
for (int Index = 0; Index < MAX_MATTE_BOX_FLAGS; ++Index)
{
ProjectedMatteBoxFlags[Index] = asfloat(ProjectAndPackMatteBoxFlag(ObjectDirection, BarrelRadius, BarrelLength, MatteBoxPlanes[Index]));
}
Output[0] = float4(Barrel, ProjectedMatteBoxFlags[0]);
Output[1] = float4(ProjectedMatteBoxFlags[1], ProjectedMatteBoxFlags[2]);
}
[numthreads(THREADGROUP_TILE_SIZE, THREADGROUP_TILE_SIZE, 1)]
void ReduceCS(
uint2 GroupId : SV_GroupID,
uint GroupThreadIndex : SV_GroupIndex)
{
// Init LDS.
#if (DIM_HYBRID_SCATTER_FGD || DIM_HYBRID_SCATTER_BGD) && SCATTER_ALLOC_METHOD == SCATTER_ALLOC_METHOD_ATOMIC
{
SharedForegroundScatterGroupMask = 0;
SharedBackgroundScatterGroupMask = 0;
GroupMemoryBarrierWithGroupSync();
}
#endif
// Gets group thread id to be fully LDS coherent.
#if CONFIG_WAVE_BROADCAST_REDUCTION
uint2 GroupThreadId = uint2(GroupThreadIndex % THREADGROUP_TILE_SIZE, GroupThreadIndex / THREADGROUP_TILE_SIZE);
uint ScatteringGroupIndex = (
((GroupThreadIndex & 0x06) >> 1) |
((GroupThreadIndex & 0x30) >> 2));
uint ScatteringGroupThreadIndex = (
( GroupThreadIndex & 0x1) |
((GroupThreadIndex & 0x8) >> 2));
#else
uint2 GroupThreadId = InitialTilePixelPositionForReduction2x2(SHARED_REDUCE_COUNT, GroupThreadIndex);
uint ScatteringGroupIndex = GroupThreadIndex & 0xF;
uint ScatteringGroupThreadIndex = GroupThreadIndex >> 4;
#endif
uint2 DispatchThreadId = THREADGROUP_TILE_SIZE * GroupId + GroupThreadId;
float2 BufferUV = (DispatchThreadId + 0.5) * GatherInputSize.zw;
if (true)
{
BufferUV = min(BufferUV, MaxInputBufferUV);
}
// Fetch scene color.
float4 GatheredColor = 0;
float CocRadius = 0;
#if CONFIG_DOF_ALPHA
{
GatheredColor = GatherInput_SceneColor.SampleLevel(GlobalPointClampedSampler, BufferUV, 0);
CocRadius = GatherInput_SeparateCoc.SampleLevel(GlobalPointClampedSampler, BufferUV, 0).r;
// Convert from translucency to opacity.
#if !DIM_DISABLE_OUTPUT_MIP0
GatheredColor.a = 1.0 - GatheredColor.a;
#endif
}
#else
{
GatheredColor = GatherInput_SceneColor.SampleLevel(GlobalPointClampedSampler, BufferUV, 0);
CocRadius = GatheredColor.a;
}
#endif
// Sample the frame exposure to SGPR.
float FrameExposureScale = ToScalarMemory(EyeAdaptationLookup() * View.OneOverPreExposure);
// Indirect dispatch
#if DIM_HYBRID_SCATTER_FGD || DIM_HYBRID_SCATTER_BGD
{
const uint IndirectParameterSize = SCATTERING_INDIRECT_PARAMETER_SIZE;
// Convert Coc radius from preprocessing basis to processing basis.
float ScatterCocRadius = CocRadius * PreProcessingToProcessingCocRadiusFactor;
// Decides whether should hybrid scatter or not.
// It is fine to do it for sample outside viewport UV because they are discarded when the bokeh
// is getting close to viewport edge.
float ScatterFactor;
{
FHybridScatterInputs Parameters;
Parameters.NeighborhoodComparisonBuffer = QuarterResGatherInput_SceneColor;
Parameters.NeighborhoodComparisonBufferInvSize = QuarterResGatherInputSize.zw;
Parameters.NeighborhoodComparisonMaxBufferUV = MaxInputBufferUV;
Parameters.FrameExposureScale = FrameExposureScale;
Parameters.Color = GatheredColor;
Parameters.CocRadius = ScatterCocRadius;
Parameters.BufferUV = BufferUV;
ScatterFactor = ComputeLuminanceOnlyBasedScatterFactor(Parameters);
// Do not scater if the sample becomes really small.
if (1)
{
#if DIM_HYBRID_SCATTER_FGD && !DIM_HYBRID_SCATTER_BGD
ScatterFactor *= saturate(-ScatterCocRadius - MinScatteringCocRadius);
#elif !DIM_HYBRID_SCATTER_FGD && DIM_HYBRID_SCATTER_BGD
ScatterFactor *= saturate(ScatterCocRadius - MinScatteringCocRadius);
#else
ScatterFactor *= saturate(abs(ScatterCocRadius) - MinScatteringCocRadius);
#endif
}
// Do not scater if the Coc start to draw outside the viewport.
if (1)
{
float2 SvPosition = (DispatchThreadId + 0.5);
float ClosestBorderDistance = float(min(
min(SvPosition.x, SvPosition.y),
min(ViewportRect.z - SvPosition.x, ViewportRect.w - SvPosition.y)));
ScatterFactor *= saturate(ClosestBorderDistance - abs(ScatterCocRadius));
}
// Compare this sample with neighbor if worth it.
BRANCH
if (ScatterFactor > 0.5)
{
ScatterFactor *= ComputeNeighborBasedScatterFactor(Parameters);
}
// Sharpen to remove sprites that are contributing not that much.
if (1)
{
const float Sharpen = 2;
ScatterFactor = saturate(ScatterFactor * Sharpen + (1 - Sharpen));
}
}
uint ScatteringGroupMask = 1u << ScatteringGroupIndex;
const float ScatterFactorThreshold = 0.01;
bool bScatterThisColor = ScatterFactor > ScatterFactorThreshold;
bool bIsForeground = IsForeground(CocRadius);
if (!bScatterThisColor)
{
GatheredColor *= ApplyBokehGatherDistinctionFilter(CocRadius, GatheredColor, View.OneOverPreExposure);
}
// Whether scattering group actually scatter or not.
uint ForegroundScatterGroupMask = 0;
uint BackgroundScatterGroupMask = 0;
#if SCATTER_ALLOC_METHOD == SCATTER_ALLOC_METHOD_WAVE
{
#if DIM_HYBRID_SCATTER_FGD
ForegroundScatterGroupMask = WaveActiveBitOr((bScatterThisColor && bIsForeground) ? ScatteringGroupMask : 0);
#endif
#if DIM_HYBRID_SCATTER_BGD
BackgroundScatterGroupMask = WaveActiveBitOr((bScatterThisColor && !bIsForeground) ? ScatteringGroupMask : 0);
#endif
}
#elif SCATTER_ALLOC_METHOD == SCATTER_ALLOC_METHOD_ATOMIC
// Atomic or whether scattering group actually scatter or not.
{
BRANCH
if (DIM_HYBRID_SCATTER_FGD && bIsForeground && bScatterThisColor)
{
uint Unused;
InterlockedOr(SharedForegroundScatterGroupMask, ScatteringGroupMask, Unused);
}
BRANCH
if (DIM_HYBRID_SCATTER_BGD && !bIsForeground && bScatterThisColor)
{
uint Unused;
InterlockedOr(SharedBackgroundScatterGroupMask, ScatteringGroupMask, Unused);
}
GroupMemoryBarrierWithGroupSync();
#if DIM_HYBRID_SCATTER_FGD
ForegroundScatterGroupMask = SharedForegroundScatterGroupMask;
#endif
#if DIM_HYBRID_SCATTER_BGD
BackgroundScatterGroupMask = SharedBackgroundScatterGroupMask;
#endif
}
#elif SCATTER_ALLOC_METHOD == SCATTER_ALLOC_METHOD_LDS
{
#if DIM_HYBRID_SCATTER_FGD
SharedForegroundScatterGroupMask[GroupThreadIndex] = (
(bScatterThisColor && bIsForeground) ? ScatteringGroupMask : 0);
#endif
#if DIM_HYBRID_SCATTER_BGD
SharedBackgroundScatterGroupMask[GroupThreadIndex] = (
(bScatterThisColor && !bIsForeground) ? ScatteringGroupMask : 0);
#endif
GroupMemoryBarrierWithGroupSync();
UNROLL
for (uint i = 0; i < THREADGROUP_TOTALSIZE; i++)
{
#if DIM_HYBRID_SCATTER_FGD
ForegroundScatterGroupMask |= SharedForegroundScatterGroupMask[i];
#endif
#if DIM_HYBRID_SCATTER_BGD
BackgroundScatterGroupMask |= SharedBackgroundScatterGroupMask[i];
#endif
}
}
#else
#error Unknown scatter group allocation method.
#endif // !COMPILER_SUPPORTS_WAVE_BIT_ORAND
// Allocate globally.
BRANCH
if (GroupThreadIndex == 0)
{
BRANCH
if (DIM_HYBRID_SCATTER_FGD && ForegroundScatterGroupMask)
{
uint ScatteringGroupCount = countbits(ForegroundScatterGroupMask);
InterlockedAdd(OutScatterDrawIndirectParameters[1 + 0 * IndirectParameterSize], ScatteringGroupCount, SharedForegroundAtomic);
}
BRANCH
if (DIM_HYBRID_SCATTER_BGD && BackgroundScatterGroupMask)
{
uint ScatteringGroupCount = countbits(BackgroundScatterGroupMask);
InterlockedAdd(OutScatterDrawIndirectParameters[1 + 1 * IndirectParameterSize], ScatteringGroupCount, SharedBackgroundAtomic);
}
}
#if COMPILER_SUPPORTS_WAVE_ONCE
if (WaveGetLaneCount() < THREADGROUP_TILE_SIZE * THREADGROUP_TILE_SIZE)
{
GroupMemoryBarrierWithGroupSync();
}
#else
GroupMemoryBarrierWithGroupSync();
#endif
// Intensity loss factor caused by the area of confusion.
// TODO: no need min()
float IntensityLossFactor = min(1, SafeRcp(PI * CocRadius * CocRadius, 1.0) * CocSqueeze);
// Enqueue scattering group in foreground scatter draw list.
BRANCH
if (DIM_HYBRID_SCATTER_FGD && (ScatteringGroupMask & ForegroundScatterGroupMask))
{
uint GroupScatteringOffset = countbits(ForegroundScatterGroupMask & (ScatteringGroupMask - 1));
uint IndirectDrawIndex = SharedForegroundAtomic + GroupScatteringOffset;
BRANCH
if (IndirectDrawIndex < MaxScatteringGroupCount)
{
BRANCH
if (ScatteringGroupThreadIndex == 0)
{
OutForegroundScatterDrawList[SCATTER_DRAW_LIST_GROUP_STRIDE * IndirectDrawIndex + 0] = float4(DispatchThreadId + 0.5, 0.0, 0.0);
}
float4 ScatterColor = GatheredColor * (ScatterFactor * IntensityLossFactor * (IsForeground(ScatterCocRadius) ? 1 : 0));
uint Offset = SCATTER_DRAW_LIST_GROUP_STRIDE * IndirectDrawIndex + 1 + (ScatteringGroupThreadIndex * SCATTER_DRAW_LIST_DATA_PER_PIXEL);
OutForegroundScatterDrawList[Offset] = float4(ScatterColor.rgb, abs(ScatterCocRadius));
if (BarrelLength >= 0)
{
float4 VignetteData[VIGNETTE_DATA_PER_PIXEL];
ComputeVignetteData(DispatchThreadId, ScatterCocRadius, VignetteData);
UNROLL
for (uint Index = 0; Index < VIGNETTE_DATA_PER_PIXEL; ++Index)
{
OutForegroundScatterDrawList[Offset + 1 + Index] = VignetteData[Index];
}
}
}
else
{
// If not scatering this Scattering group index, then ensure ScatterFactor = 1 in case of the OutScatterDrawList was full.
ScatterFactor = 0;
}
}
// Enqueue scattering group in background scatter draw list.
BRANCH
if (DIM_HYBRID_SCATTER_BGD && (ScatteringGroupMask & BackgroundScatterGroupMask))
{
uint GroupScatteringOffset = countbits(BackgroundScatterGroupMask & (ScatteringGroupMask - 1));
uint IndirectDrawIndex = SharedBackgroundAtomic + GroupScatteringOffset;
BRANCH
if (IndirectDrawIndex < MaxScatteringGroupCount)
{
BRANCH
if (ScatteringGroupThreadIndex == 0)
{
OutBackgroundScatterDrawList[SCATTER_DRAW_LIST_GROUP_STRIDE * IndirectDrawIndex + 0] = float4(DispatchThreadId + 0.5, 0.0, 0.0);
}
float4 ScatterColor = GatheredColor * (ScatterFactor * IntensityLossFactor * (IsForeground(ScatterCocRadius) ? 0 : 1));
uint Offset = SCATTER_DRAW_LIST_GROUP_STRIDE * IndirectDrawIndex + 1 + (ScatteringGroupThreadIndex * SCATTER_DRAW_LIST_DATA_PER_PIXEL);
OutBackgroundScatterDrawList[Offset] = float4(ScatterColor.rgb, abs(ScatterCocRadius));
if (BarrelLength >= 0)
{
float4 VignetteData[VIGNETTE_DATA_PER_PIXEL];
ComputeVignetteData(DispatchThreadId, ScatterCocRadius, VignetteData);
UNROLL
for (uint Index = 0; Index < VIGNETTE_DATA_PER_PIXEL; ++Index)
{
OutBackgroundScatterDrawList[Offset + 1 + Index] = VignetteData[Index];
}
}
}
else
{
// If not scatering this Scattering group index, then ensure ScatterFactor = 1 in case of the OutScatterDrawList was full.
ScatterFactor = 0;
}
}
// Remove color intensity that has been scattered.
GatheredColor.rgb *= (1 - ScatterFactor);
}
#endif // DIM_HYBRID_SCATTER_FGD || DIM_HYBRID_SCATTER_BGD
// for gathering pass to avoid having to clamp UV in gathering pass.
uint2 OutputPixelId = DispatchThreadId;
// Output mip 0 in output buffer that is always a multiple of group size. Unless reducing lower mips.
#if !DIM_DISABLE_OUTPUT_MIP0
{
OutputMipLevel(/* MipLevel = */ 0, OutputPixelId, GatheredColor, CocRadius);
}
#endif
FCocDownsampleParams DownsampleParams;
DownsampleParams.CocRadiusMultiplier = 1.0;
DownsampleParams.FrameExposureScale = FrameExposureScale;
DownsampleParams.bDoColorBasedWeighting = false;
#if CONFIG_WAVE_BROADCAST_REDUCTION
{
UNROLL
for (uint i = 0; i < (DIM_REDUCE_MIP_COUNT - 1); i++)
{
// Multiplier that needs to be applied on Coc computation to be resolution independent.
DownsampleParams.CocRadiusMultiplier = 0.5 / float(1u << (i));
// Scale of the reduction within the 8x8 tile.
const uint ReductionScale = 1u << i;
// Chooses output CoC across lanes.
float OutputCocRadius = DownsampleCoc_Wave8x8(CocRadius, ReductionScale);
// Compute the sample's bilateral weight.
float SampleBilateralWeight = ComputeDownsamplingBilateralWeight(DownsampleParams, OutputCocRadius, CocRadius, GatheredColor.rgb);
// Weight sum the color across the lanes.
GatheredColor.r = Sum2x2WithinWave8x8(GatheredColor.r * SampleBilateralWeight, ReductionScale);
GatheredColor.g = Sum2x2WithinWave8x8(GatheredColor.g * SampleBilateralWeight, ReductionScale);
GatheredColor.b = Sum2x2WithinWave8x8(GatheredColor.b * SampleBilateralWeight, ReductionScale);
GatheredColor.a = Sum2x2WithinWave8x8(GatheredColor.a * SampleBilateralWeight, ReductionScale);
// Compute normalising weight across lanes.
float InvTotalWeight = rcp(Sum2x2WithinWave8x8(SampleBilateralWeight, ReductionScale));
// Normalize the color.
GatheredColor *= InvTotalWeight;
CocRadius = OutputCocRadius;
// Output mip map.
OutputPixelId = OutputPixelId >> 1;
BRANCH
if (((DispatchThreadId.x | DispatchThreadId.y) & ((2u << i) - 1)) == 0)
{
OutputMipLevel(/* MipLevel = */ i + 1, OutputPixelId, GatheredColor, CocRadius);
}
}
}
#else // !CONFIG_WAVE_BROADCAST_REDUCTION
{
// Store color to LDS for reduction.
#if CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_ALPHA_COC
{
GroupSharedArray[GroupThreadIndex] = GatheredColor;
GroupSharedArray2[GroupThreadIndex] = CocRadius;
}
#else
{
GroupSharedArray[GroupThreadIndex] = float4(GatheredColor.rgb, CocRadius);
}
#endif
// Output mip > 0 of the gathering pass.
UNROLL
for (uint i = 0; i < (DIM_REDUCE_MIP_COUNT - 1); i++)
{
// Multiplier that needs to be applied on Coc computation to be resolution independent.
const uint TileSize = THREADGROUP_TILE_SIZE / (1u << (i + 1));
const uint ReduceBankSize = TileSize * TileSize;
// GroupSharedArray has been written before.
GroupMemoryBarrierWithGroupSync();
if (GroupThreadIndex < ReduceBankSize)
{
DownsampleParams.CocRadiusMultiplier = 0.5 / float(1u << (i));
// Reduce.
ReduceOperator(DownsampleParams, GroupThreadIndex, ReduceBankSize);
// Read LDS (compiler are generally smart here to actualy keep value on VGPR.
#if CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_ALPHA_COC
float4 Color = GroupSharedArray[GroupThreadIndex];
float OutCocRadius = GroupSharedArray2[GroupThreadIndex];
#else
float4 Color = GroupSharedArray[GroupThreadIndex];
float OutCocRadius = GroupSharedArray[GroupThreadIndex].a;
#endif
// Output mip map.
OutputPixelId = OutputPixelId >> 1;
OutputMipLevel(/* MipLevel = */ i + 1, OutputPixelId, Color, OutCocRadius);
}
}
}
#endif // !CONFIG_WAVE_BROADCAST_REDUCTION
}