741 lines
23 KiB
HLSL
741 lines
23 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
/*=============================================================================
|
|
DiaphragmDOF/DOFReduce.usf: Diaphragm DOF's reduce pass.
|
|
=============================================================================*/
|
|
|
|
#define EYE_ADAPTATION_LOOSE_PARAMETERS 1
|
|
|
|
#include "DOFDownsample.ush"
|
|
#include "DOFHybridScatterCompilation.ush"
|
|
#include "DOFVignetteCommon.ush"
|
|
#include "../ReductionCommon.ush"
|
|
|
|
|
|
//------------------------------------------------------- ENUM VALUES
|
|
|
|
/** Method used to allocate scatter group globally. */
|
|
// Uses bit or atomic
|
|
#define SCATTER_ALLOC_METHOD_ATOMIC 0
|
|
|
|
// Uses wave bit or instruction.
|
|
#define SCATTER_ALLOC_METHOD_WAVE 1
|
|
|
|
// Uses LDS entry for each thread.
|
|
#define SCATTER_ALLOC_METHOD_LDS 2
|
|
|
|
|
|
//------------------------------------------------------- COMPILE TIME CONSTANTS
|
|
|
|
#define SHARED_REDUCE_COUNT 3
|
|
|
|
#define OUTPUT_MIP_COUNT (SHARED_REDUCE_COUNT + 1)
|
|
|
|
#define THREADGROUP_TILE_SIZE (1 << SHARED_REDUCE_COUNT)
|
|
#define THREADGROUP_TOTALSIZE (THREADGROUP_TILE_SIZE * THREADGROUP_TILE_SIZE)
|
|
|
|
#if CONFIG_DOF_ALPHA
|
|
#define CONFIG_GATHER_INPUT_LAYOUT (GATHER_INPUT_LAYOUT_RGB_ALPHA_COC)
|
|
#elif DIM_RGB_COLOR_BUFFER
|
|
#define CONFIG_GATHER_INPUT_LAYOUT (GATHER_INPUT_LAYOUT_RGB_SEPARATE_COC)
|
|
#else
|
|
#define CONFIG_GATHER_INPUT_LAYOUT (GATHER_INPUT_LAYOUT_RGB_COC)
|
|
#endif
|
|
|
|
#define CONFIG_WAVE_BROADCAST_REDUCTION (PLATFORM_SUPPORTS_WAVE_BROADCAST)
|
|
|
|
|
|
// Configures the neighborhood analysis method to use for slight out of focus early out.
|
|
#if COMPILER_SUPPORTS_WAVE_BIT_ORAND && (COMPILER_PSSL || XBOXONE_PROFILE)
|
|
// GCN only optimisation
|
|
#define SCATTER_ALLOC_METHOD (SCATTER_ALLOC_METHOD_WAVE)
|
|
|
|
#elif COMPILER_HLSLCC
|
|
// Compiler does not like InterlockedOr().
|
|
#define SCATTER_ALLOC_METHOD (SCATTER_ALLOC_METHOD_LDS)
|
|
|
|
#else
|
|
#define SCATTER_ALLOC_METHOD (SCATTER_ALLOC_METHOD_ATOMIC)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
//------------------------------------------------------- PARAMETERS
|
|
|
|
uint4 ViewportRect;
|
|
float4 ScatteringViewportSize;
|
|
float ScatteringScaling;
|
|
uint MaxScatteringGroupCount;
|
|
float2 MaxInputBufferUV;
|
|
float MinScatteringCocRadius;
|
|
float PreProcessingToProcessingCocRadiusFactor;
|
|
float BokehGatherDistinctionLimit;
|
|
|
|
float2 SensorSize;
|
|
float Aperture;
|
|
float LensImageDistance;
|
|
float BarrelRadius;
|
|
float BarrelLength;
|
|
uint4 MatteBoxPlanes[MAX_MATTE_BOX_FLAGS];
|
|
|
|
float4 GatherInputSize;
|
|
Texture2D GatherInput_SceneColor;
|
|
Texture2D GatherInput_SeparateCoc;
|
|
|
|
float4 QuarterResGatherInputSize;
|
|
Texture2D QuarterResGatherInput_SceneColor;
|
|
|
|
|
|
//------------------------------------------------------- OUTPUTS
|
|
|
|
RWTexture2D<float4> OutputMips_0_SceneColor;
|
|
RWTexture2D<float4> OutputMips_1_SceneColor;
|
|
RWTexture2D<float4> OutputMips_2_SceneColor;
|
|
RWTexture2D<float4> OutputMips_3_SceneColor;
|
|
|
|
#if CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_ALPHA_COC || CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_SEPARATE_COC
|
|
|
|
RWTexture2D<float> OutputMips_0_SeparateCoc;
|
|
RWTexture2D<float> OutputMips_1_SeparateCoc;
|
|
RWTexture2D<float> OutputMips_2_SeparateCoc;
|
|
RWTexture2D<float> OutputMips_3_SeparateCoc;
|
|
|
|
#endif
|
|
|
|
#if DIM_HYBRID_SCATTER_FGD || DIM_HYBRID_SCATTER_BGD
|
|
RWBuffer<uint> OutScatterDrawIndirectParameters;
|
|
|
|
RWStructuredBuffer<float4> OutForegroundScatterDrawList;
|
|
RWStructuredBuffer<float4> OutBackgroundScatterDrawList;
|
|
#endif
|
|
|
|
|
|
//------------------------------------------------------- LDS
|
|
|
|
#if SCATTER_ALLOC_METHOD == SCATTER_ALLOC_METHOD_ATOMIC
|
|
groupshared uint SharedForegroundScatterGroupMask;
|
|
groupshared uint SharedBackgroundScatterGroupMask;
|
|
|
|
#elif SCATTER_ALLOC_METHOD == SCATTER_ALLOC_METHOD_LDS
|
|
groupshared uint SharedForegroundScatterGroupMask[THREADGROUP_TOTALSIZE];
|
|
groupshared uint SharedBackgroundScatterGroupMask[THREADGROUP_TOTALSIZE];
|
|
|
|
#endif
|
|
|
|
groupshared uint SharedForegroundAtomic;
|
|
groupshared uint SharedBackgroundAtomic;
|
|
|
|
groupshared float4 GroupSharedArray[THREADGROUP_TOTALSIZE];
|
|
#if CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_ALPHA_COC
|
|
groupshared float GroupSharedArray2[THREADGROUP_TOTALSIZE];
|
|
#endif
|
|
|
|
|
|
//------------------------------------------------------- FUNCTIONS
|
|
|
|
// Reduce using GroupSharedArray
|
|
void ReduceOperator(FCocDownsampleParams DownsampleParams, uint OutId, uint ReduceBankSize)
|
|
{
|
|
// Gather the 4 imput samples. Eaches samples are on banks of size <ReduceBankSize> for coherent LDS memory access.
|
|
float4 Colors[4];
|
|
float CocRadii[4];
|
|
UNROLL
|
|
for (uint i = 0; i < 4; i++)
|
|
{
|
|
uint InSharedId = OutId + i * ReduceBankSize;
|
|
|
|
#if CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_ALPHA_COC
|
|
{
|
|
Colors[i] = GroupSharedArray[InSharedId];
|
|
CocRadii[i] = GroupSharedArray2[InSharedId];
|
|
}
|
|
#else
|
|
{
|
|
Colors[i] = GroupSharedArray[InSharedId];
|
|
CocRadii[i] = Colors[i].a;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
// Downsample the 4 sample to one according to Coc.
|
|
float4 OutColor;
|
|
float OutCocRadius;
|
|
DownsampleSceneColorWithCoc(DownsampleParams, Colors, CocRadii, OutColor, OutCocRadius);
|
|
|
|
// Technically need a barrier, but in practice with a warp size >= 32, no need.
|
|
// GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Output to shared memory.
|
|
#if CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_ALPHA_COC
|
|
{
|
|
GroupSharedArray[OutId] = OutColor;
|
|
GroupSharedArray2[OutId] = OutCocRadius;
|
|
}
|
|
#else
|
|
{
|
|
GroupSharedArray[OutId] = float4(OutColor.rgb, OutCocRadius);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
// Output buffer.
|
|
void OutputMipLevel(const uint MipLevel, uint2 MipPixelPos, float4 Color, float CocRadius)
|
|
{
|
|
#if CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_COC
|
|
float4 OutputSceneColor = float4(Color.rgb, CocRadius);
|
|
#elif CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_SEPARATE_COC
|
|
float4 OutputSceneColor = float4(Color.rgb, 0);
|
|
float OutputSeparateCoc = CocRadius;
|
|
#else
|
|
float4 OutputSceneColor = Color;
|
|
float OutputSeparateCoc = CocRadius;
|
|
#endif
|
|
|
|
// This is hugly, but compile time.
|
|
#if CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_COC
|
|
{
|
|
if (MipLevel == 0)
|
|
{
|
|
OutputMips_0_SceneColor[MipPixelPos] = OutputSceneColor;
|
|
}
|
|
#if DIM_REDUCE_MIP_COUNT > 1
|
|
else if (MipLevel == 1)
|
|
{
|
|
OutputMips_1_SceneColor[MipPixelPos] = OutputSceneColor;
|
|
}
|
|
#if DIM_REDUCE_MIP_COUNT > 2
|
|
else if (MipLevel == 2)
|
|
{
|
|
OutputMips_2_SceneColor[MipPixelPos] = OutputSceneColor;
|
|
}
|
|
#if DIM_REDUCE_MIP_COUNT > 3
|
|
else if (MipLevel == 3)
|
|
{
|
|
OutputMips_3_SceneColor[MipPixelPos] = OutputSceneColor;
|
|
}
|
|
#endif
|
|
#endif
|
|
#endif
|
|
}
|
|
#elif CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_ALPHA_COC || CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_SEPARATE_COC
|
|
{
|
|
if (MipLevel == 0)
|
|
{
|
|
OutputMips_0_SceneColor[MipPixelPos] = OutputSceneColor;
|
|
OutputMips_0_SeparateCoc[MipPixelPos] = OutputSeparateCoc;
|
|
}
|
|
#if DIM_REDUCE_MIP_COUNT > 1
|
|
else if (MipLevel == 1)
|
|
{
|
|
OutputMips_1_SceneColor[MipPixelPos] = OutputSceneColor;
|
|
OutputMips_1_SeparateCoc[MipPixelPos] = OutputSeparateCoc;
|
|
}
|
|
#if DIM_REDUCE_MIP_COUNT > 2
|
|
else if (MipLevel == 2)
|
|
{
|
|
OutputMips_2_SceneColor[MipPixelPos] = OutputSceneColor;
|
|
OutputMips_2_SeparateCoc[MipPixelPos] = OutputSeparateCoc;
|
|
}
|
|
#if DIM_REDUCE_MIP_COUNT > 3
|
|
else if (MipLevel == 3)
|
|
{
|
|
OutputMips_3_SceneColor[MipPixelPos] = OutputSceneColor;
|
|
OutputMips_3_SeparateCoc[MipPixelPos] = OutputSeparateCoc;
|
|
}
|
|
#endif
|
|
#endif
|
|
#endif
|
|
}
|
|
#else
|
|
#error Unknown gather input layout.
|
|
#endif
|
|
}
|
|
|
|
|
|
//------------------------------------------------------- ENTRY POINT
|
|
|
|
// Pixels that are bright and have a large CoC should be scattered instead of gathered, to avoid gather noise.
|
|
// If they aren't, e.g. due to performance reasons, we may prefer to cull them instead of gathering them anyway.
|
|
// This will bias the image, as bokeh may appear slightly darker, but helps clean up noise in large bokeh.
|
|
float ApplyBokehGatherDistinctionFilter(float CocRadius, float4 Color, float FrameExposureScale)
|
|
{
|
|
if (BokehGatherDistinctionLimit != 0)
|
|
{
|
|
// see also ComputeLuminanceOnlyBasedScatterFactor
|
|
float PerceivedLuma = Luma4(Color.rgb) * FrameExposureScale;
|
|
float BokehDistinction = abs(CocRadius) * PerceivedLuma;
|
|
return smoothstep(BokehGatherDistinctionLimit + 200, BokehGatherDistinctionLimit - 200, BokehDistinction);
|
|
}
|
|
else
|
|
{
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
void ComputeVignetteData(uint2 DispatchThreadId, float CocRadius, out float4 Output[VIGNETTE_DATA_PER_PIXEL])
|
|
{
|
|
const float2 DrawPosition = (DispatchThreadId & 0xFFFFFFFE) + 0.5;
|
|
const float2 BokehCenterScreenPos = ScatteringScaling * DrawPosition;
|
|
const float2 BokehCenterClipPos = (BokehCenterScreenPos * ScatteringViewportSize.zw) * 2.0 - 1.0;
|
|
const float3 ObjectDirection = normalize(-float3(BokehCenterClipPos * (SensorSize*0.5), -LensImageDistance));
|
|
|
|
const float Focus = View.DepthOfFieldFocalDistance;
|
|
const float2 Barrel = asfloat(ProjectAndPackCylinder(
|
|
CocRadius,
|
|
BokehCenterClipPos,
|
|
BarrelRadius,
|
|
BarrelLength,
|
|
SensorSize,
|
|
Focus,
|
|
Aperture,
|
|
CocInfinityRadius,
|
|
LensImageDistance));
|
|
|
|
float2 ProjectedMatteBoxFlags[MAX_MATTE_BOX_FLAGS];
|
|
UNROLL
|
|
for (int Index = 0; Index < MAX_MATTE_BOX_FLAGS; ++Index)
|
|
{
|
|
ProjectedMatteBoxFlags[Index] = asfloat(ProjectAndPackMatteBoxFlag(ObjectDirection, BarrelRadius, BarrelLength, MatteBoxPlanes[Index]));
|
|
}
|
|
|
|
Output[0] = float4(Barrel, ProjectedMatteBoxFlags[0]);
|
|
Output[1] = float4(ProjectedMatteBoxFlags[1], ProjectedMatteBoxFlags[2]);
|
|
}
|
|
|
|
[numthreads(THREADGROUP_TILE_SIZE, THREADGROUP_TILE_SIZE, 1)]
|
|
void ReduceCS(
|
|
uint2 GroupId : SV_GroupID,
|
|
uint GroupThreadIndex : SV_GroupIndex)
|
|
{
|
|
// Init LDS.
|
|
#if (DIM_HYBRID_SCATTER_FGD || DIM_HYBRID_SCATTER_BGD) && SCATTER_ALLOC_METHOD == SCATTER_ALLOC_METHOD_ATOMIC
|
|
{
|
|
SharedForegroundScatterGroupMask = 0;
|
|
SharedBackgroundScatterGroupMask = 0;
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
}
|
|
#endif
|
|
|
|
// Gets group thread id to be fully LDS coherent.
|
|
#if CONFIG_WAVE_BROADCAST_REDUCTION
|
|
uint2 GroupThreadId = uint2(GroupThreadIndex % THREADGROUP_TILE_SIZE, GroupThreadIndex / THREADGROUP_TILE_SIZE);
|
|
|
|
uint ScatteringGroupIndex = (
|
|
((GroupThreadIndex & 0x06) >> 1) |
|
|
((GroupThreadIndex & 0x30) >> 2));
|
|
uint ScatteringGroupThreadIndex = (
|
|
( GroupThreadIndex & 0x1) |
|
|
((GroupThreadIndex & 0x8) >> 2));
|
|
|
|
#else
|
|
uint2 GroupThreadId = InitialTilePixelPositionForReduction2x2(SHARED_REDUCE_COUNT, GroupThreadIndex);
|
|
|
|
uint ScatteringGroupIndex = GroupThreadIndex & 0xF;
|
|
uint ScatteringGroupThreadIndex = GroupThreadIndex >> 4;
|
|
|
|
#endif
|
|
|
|
uint2 DispatchThreadId = THREADGROUP_TILE_SIZE * GroupId + GroupThreadId;
|
|
|
|
float2 BufferUV = (DispatchThreadId + 0.5) * GatherInputSize.zw;
|
|
|
|
if (true)
|
|
{
|
|
BufferUV = min(BufferUV, MaxInputBufferUV);
|
|
}
|
|
|
|
// Fetch scene color.
|
|
float4 GatheredColor = 0;
|
|
float CocRadius = 0;
|
|
#if CONFIG_DOF_ALPHA
|
|
{
|
|
GatheredColor = GatherInput_SceneColor.SampleLevel(GlobalPointClampedSampler, BufferUV, 0);
|
|
CocRadius = GatherInput_SeparateCoc.SampleLevel(GlobalPointClampedSampler, BufferUV, 0).r;
|
|
|
|
// Convert from translucency to opacity.
|
|
#if !DIM_DISABLE_OUTPUT_MIP0
|
|
GatheredColor.a = 1.0 - GatheredColor.a;
|
|
#endif
|
|
}
|
|
#else
|
|
{
|
|
GatheredColor = GatherInput_SceneColor.SampleLevel(GlobalPointClampedSampler, BufferUV, 0);
|
|
CocRadius = GatheredColor.a;
|
|
}
|
|
#endif
|
|
|
|
// Sample the frame exposure to SGPR.
|
|
float FrameExposureScale = ToScalarMemory(EyeAdaptationLookup() * View.OneOverPreExposure);
|
|
|
|
// Indirect dispatch
|
|
#if DIM_HYBRID_SCATTER_FGD || DIM_HYBRID_SCATTER_BGD
|
|
{
|
|
const uint IndirectParameterSize = SCATTERING_INDIRECT_PARAMETER_SIZE;
|
|
|
|
// Convert Coc radius from preprocessing basis to processing basis.
|
|
float ScatterCocRadius = CocRadius * PreProcessingToProcessingCocRadiusFactor;
|
|
|
|
// Decides whether should hybrid scatter or not.
|
|
// It is fine to do it for sample outside viewport UV because they are discarded when the bokeh
|
|
// is getting close to viewport edge.
|
|
float ScatterFactor;
|
|
{
|
|
FHybridScatterInputs Parameters;
|
|
Parameters.NeighborhoodComparisonBuffer = QuarterResGatherInput_SceneColor;
|
|
Parameters.NeighborhoodComparisonBufferInvSize = QuarterResGatherInputSize.zw;
|
|
Parameters.NeighborhoodComparisonMaxBufferUV = MaxInputBufferUV;
|
|
|
|
Parameters.FrameExposureScale = FrameExposureScale;
|
|
|
|
Parameters.Color = GatheredColor;
|
|
Parameters.CocRadius = ScatterCocRadius;
|
|
Parameters.BufferUV = BufferUV;
|
|
|
|
ScatterFactor = ComputeLuminanceOnlyBasedScatterFactor(Parameters);
|
|
|
|
// Do not scater if the sample becomes really small.
|
|
if (1)
|
|
{
|
|
#if DIM_HYBRID_SCATTER_FGD && !DIM_HYBRID_SCATTER_BGD
|
|
ScatterFactor *= saturate(-ScatterCocRadius - MinScatteringCocRadius);
|
|
|
|
#elif !DIM_HYBRID_SCATTER_FGD && DIM_HYBRID_SCATTER_BGD
|
|
ScatterFactor *= saturate(ScatterCocRadius - MinScatteringCocRadius);
|
|
|
|
#else
|
|
ScatterFactor *= saturate(abs(ScatterCocRadius) - MinScatteringCocRadius);
|
|
|
|
#endif
|
|
}
|
|
|
|
// Do not scater if the Coc start to draw outside the viewport.
|
|
if (1)
|
|
{
|
|
float2 SvPosition = (DispatchThreadId + 0.5);
|
|
|
|
float ClosestBorderDistance = float(min(
|
|
min(SvPosition.x, SvPosition.y),
|
|
min(ViewportRect.z - SvPosition.x, ViewportRect.w - SvPosition.y)));
|
|
|
|
ScatterFactor *= saturate(ClosestBorderDistance - abs(ScatterCocRadius));
|
|
}
|
|
|
|
// Compare this sample with neighbor if worth it.
|
|
BRANCH
|
|
if (ScatterFactor > 0.5)
|
|
{
|
|
ScatterFactor *= ComputeNeighborBasedScatterFactor(Parameters);
|
|
}
|
|
|
|
// Sharpen to remove sprites that are contributing not that much.
|
|
if (1)
|
|
{
|
|
const float Sharpen = 2;
|
|
ScatterFactor = saturate(ScatterFactor * Sharpen + (1 - Sharpen));
|
|
}
|
|
}
|
|
|
|
uint ScatteringGroupMask = 1u << ScatteringGroupIndex;
|
|
|
|
const float ScatterFactorThreshold = 0.01;
|
|
bool bScatterThisColor = ScatterFactor > ScatterFactorThreshold;
|
|
bool bIsForeground = IsForeground(CocRadius);
|
|
|
|
if (!bScatterThisColor)
|
|
{
|
|
GatheredColor *= ApplyBokehGatherDistinctionFilter(CocRadius, GatheredColor, View.OneOverPreExposure);
|
|
}
|
|
|
|
// Whether scattering group actually scatter or not.
|
|
uint ForegroundScatterGroupMask = 0;
|
|
uint BackgroundScatterGroupMask = 0;
|
|
#if SCATTER_ALLOC_METHOD == SCATTER_ALLOC_METHOD_WAVE
|
|
{
|
|
#if DIM_HYBRID_SCATTER_FGD
|
|
ForegroundScatterGroupMask = WaveActiveBitOr((bScatterThisColor && bIsForeground) ? ScatteringGroupMask : 0);
|
|
#endif
|
|
|
|
#if DIM_HYBRID_SCATTER_BGD
|
|
BackgroundScatterGroupMask = WaveActiveBitOr((bScatterThisColor && !bIsForeground) ? ScatteringGroupMask : 0);
|
|
#endif
|
|
}
|
|
#elif SCATTER_ALLOC_METHOD == SCATTER_ALLOC_METHOD_ATOMIC
|
|
// Atomic or whether scattering group actually scatter or not.
|
|
{
|
|
BRANCH
|
|
if (DIM_HYBRID_SCATTER_FGD && bIsForeground && bScatterThisColor)
|
|
{
|
|
uint Unused;
|
|
InterlockedOr(SharedForegroundScatterGroupMask, ScatteringGroupMask, Unused);
|
|
}
|
|
|
|
BRANCH
|
|
if (DIM_HYBRID_SCATTER_BGD && !bIsForeground && bScatterThisColor)
|
|
{
|
|
uint Unused;
|
|
InterlockedOr(SharedBackgroundScatterGroupMask, ScatteringGroupMask, Unused);
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
#if DIM_HYBRID_SCATTER_FGD
|
|
ForegroundScatterGroupMask = SharedForegroundScatterGroupMask;
|
|
#endif
|
|
#if DIM_HYBRID_SCATTER_BGD
|
|
BackgroundScatterGroupMask = SharedBackgroundScatterGroupMask;
|
|
#endif
|
|
}
|
|
#elif SCATTER_ALLOC_METHOD == SCATTER_ALLOC_METHOD_LDS
|
|
{
|
|
#if DIM_HYBRID_SCATTER_FGD
|
|
SharedForegroundScatterGroupMask[GroupThreadIndex] = (
|
|
(bScatterThisColor && bIsForeground) ? ScatteringGroupMask : 0);
|
|
#endif
|
|
|
|
#if DIM_HYBRID_SCATTER_BGD
|
|
SharedBackgroundScatterGroupMask[GroupThreadIndex] = (
|
|
(bScatterThisColor && !bIsForeground) ? ScatteringGroupMask : 0);
|
|
#endif
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
UNROLL
|
|
for (uint i = 0; i < THREADGROUP_TOTALSIZE; i++)
|
|
{
|
|
#if DIM_HYBRID_SCATTER_FGD
|
|
ForegroundScatterGroupMask |= SharedForegroundScatterGroupMask[i];
|
|
#endif
|
|
|
|
#if DIM_HYBRID_SCATTER_BGD
|
|
BackgroundScatterGroupMask |= SharedBackgroundScatterGroupMask[i];
|
|
#endif
|
|
}
|
|
}
|
|
#else
|
|
#error Unknown scatter group allocation method.
|
|
#endif // !COMPILER_SUPPORTS_WAVE_BIT_ORAND
|
|
|
|
// Allocate globally.
|
|
BRANCH
|
|
if (GroupThreadIndex == 0)
|
|
{
|
|
BRANCH
|
|
if (DIM_HYBRID_SCATTER_FGD && ForegroundScatterGroupMask)
|
|
{
|
|
uint ScatteringGroupCount = countbits(ForegroundScatterGroupMask);
|
|
InterlockedAdd(OutScatterDrawIndirectParameters[1 + 0 * IndirectParameterSize], ScatteringGroupCount, SharedForegroundAtomic);
|
|
}
|
|
|
|
BRANCH
|
|
if (DIM_HYBRID_SCATTER_BGD && BackgroundScatterGroupMask)
|
|
{
|
|
uint ScatteringGroupCount = countbits(BackgroundScatterGroupMask);
|
|
InterlockedAdd(OutScatterDrawIndirectParameters[1 + 1 * IndirectParameterSize], ScatteringGroupCount, SharedBackgroundAtomic);
|
|
}
|
|
}
|
|
|
|
#if COMPILER_SUPPORTS_WAVE_ONCE
|
|
if (WaveGetLaneCount() < THREADGROUP_TILE_SIZE * THREADGROUP_TILE_SIZE)
|
|
{
|
|
GroupMemoryBarrierWithGroupSync();
|
|
}
|
|
#else
|
|
GroupMemoryBarrierWithGroupSync();
|
|
#endif
|
|
|
|
// Intensity loss factor caused by the area of confusion.
|
|
// TODO: no need min()
|
|
float IntensityLossFactor = min(1, SafeRcp(PI * CocRadius * CocRadius, 1.0) * CocSqueeze);
|
|
|
|
|
|
|
|
|
|
// Enqueue scattering group in foreground scatter draw list.
|
|
BRANCH
|
|
if (DIM_HYBRID_SCATTER_FGD && (ScatteringGroupMask & ForegroundScatterGroupMask))
|
|
{
|
|
uint GroupScatteringOffset = countbits(ForegroundScatterGroupMask & (ScatteringGroupMask - 1));
|
|
uint IndirectDrawIndex = SharedForegroundAtomic + GroupScatteringOffset;
|
|
|
|
BRANCH
|
|
if (IndirectDrawIndex < MaxScatteringGroupCount)
|
|
{
|
|
BRANCH
|
|
if (ScatteringGroupThreadIndex == 0)
|
|
{
|
|
OutForegroundScatterDrawList[SCATTER_DRAW_LIST_GROUP_STRIDE * IndirectDrawIndex + 0] = float4(DispatchThreadId + 0.5, 0.0, 0.0);
|
|
}
|
|
|
|
float4 ScatterColor = GatheredColor * (ScatterFactor * IntensityLossFactor * (IsForeground(ScatterCocRadius) ? 1 : 0));
|
|
uint Offset = SCATTER_DRAW_LIST_GROUP_STRIDE * IndirectDrawIndex + 1 + (ScatteringGroupThreadIndex * SCATTER_DRAW_LIST_DATA_PER_PIXEL);
|
|
OutForegroundScatterDrawList[Offset] = float4(ScatterColor.rgb, abs(ScatterCocRadius));
|
|
|
|
if (BarrelLength >= 0)
|
|
{
|
|
float4 VignetteData[VIGNETTE_DATA_PER_PIXEL];
|
|
ComputeVignetteData(DispatchThreadId, ScatterCocRadius, VignetteData);
|
|
|
|
UNROLL
|
|
for (uint Index = 0; Index < VIGNETTE_DATA_PER_PIXEL; ++Index)
|
|
{
|
|
OutForegroundScatterDrawList[Offset + 1 + Index] = VignetteData[Index];
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// If not scatering this Scattering group index, then ensure ScatterFactor = 1 in case of the OutScatterDrawList was full.
|
|
ScatterFactor = 0;
|
|
}
|
|
}
|
|
|
|
// Enqueue scattering group in background scatter draw list.
|
|
BRANCH
|
|
if (DIM_HYBRID_SCATTER_BGD && (ScatteringGroupMask & BackgroundScatterGroupMask))
|
|
{
|
|
uint GroupScatteringOffset = countbits(BackgroundScatterGroupMask & (ScatteringGroupMask - 1));
|
|
uint IndirectDrawIndex = SharedBackgroundAtomic + GroupScatteringOffset;
|
|
|
|
BRANCH
|
|
if (IndirectDrawIndex < MaxScatteringGroupCount)
|
|
{
|
|
BRANCH
|
|
if (ScatteringGroupThreadIndex == 0)
|
|
{
|
|
OutBackgroundScatterDrawList[SCATTER_DRAW_LIST_GROUP_STRIDE * IndirectDrawIndex + 0] = float4(DispatchThreadId + 0.5, 0.0, 0.0);
|
|
}
|
|
|
|
float4 ScatterColor = GatheredColor * (ScatterFactor * IntensityLossFactor * (IsForeground(ScatterCocRadius) ? 0 : 1));
|
|
uint Offset = SCATTER_DRAW_LIST_GROUP_STRIDE * IndirectDrawIndex + 1 + (ScatteringGroupThreadIndex * SCATTER_DRAW_LIST_DATA_PER_PIXEL);
|
|
OutBackgroundScatterDrawList[Offset] = float4(ScatterColor.rgb, abs(ScatterCocRadius));
|
|
|
|
if (BarrelLength >= 0)
|
|
{
|
|
float4 VignetteData[VIGNETTE_DATA_PER_PIXEL];
|
|
ComputeVignetteData(DispatchThreadId, ScatterCocRadius, VignetteData);
|
|
|
|
UNROLL
|
|
for (uint Index = 0; Index < VIGNETTE_DATA_PER_PIXEL; ++Index)
|
|
{
|
|
OutBackgroundScatterDrawList[Offset + 1 + Index] = VignetteData[Index];
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// If not scatering this Scattering group index, then ensure ScatterFactor = 1 in case of the OutScatterDrawList was full.
|
|
ScatterFactor = 0;
|
|
}
|
|
}
|
|
|
|
// Remove color intensity that has been scattered.
|
|
GatheredColor.rgb *= (1 - ScatterFactor);
|
|
}
|
|
#endif // DIM_HYBRID_SCATTER_FGD || DIM_HYBRID_SCATTER_BGD
|
|
|
|
// for gathering pass to avoid having to clamp UV in gathering pass.
|
|
uint2 OutputPixelId = DispatchThreadId;
|
|
|
|
// Output mip 0 in output buffer that is always a multiple of group size. Unless reducing lower mips.
|
|
#if !DIM_DISABLE_OUTPUT_MIP0
|
|
{
|
|
OutputMipLevel(/* MipLevel = */ 0, OutputPixelId, GatheredColor, CocRadius);
|
|
}
|
|
#endif
|
|
|
|
FCocDownsampleParams DownsampleParams;
|
|
DownsampleParams.CocRadiusMultiplier = 1.0;
|
|
DownsampleParams.FrameExposureScale = FrameExposureScale;
|
|
DownsampleParams.bDoColorBasedWeighting = false;
|
|
|
|
#if CONFIG_WAVE_BROADCAST_REDUCTION
|
|
{
|
|
UNROLL
|
|
for (uint i = 0; i < (DIM_REDUCE_MIP_COUNT - 1); i++)
|
|
{
|
|
// Multiplier that needs to be applied on Coc computation to be resolution independent.
|
|
DownsampleParams.CocRadiusMultiplier = 0.5 / float(1u << (i));
|
|
|
|
// Scale of the reduction within the 8x8 tile.
|
|
const uint ReductionScale = 1u << i;
|
|
|
|
// Chooses output CoC across lanes.
|
|
float OutputCocRadius = DownsampleCoc_Wave8x8(CocRadius, ReductionScale);
|
|
|
|
// Compute the sample's bilateral weight.
|
|
float SampleBilateralWeight = ComputeDownsamplingBilateralWeight(DownsampleParams, OutputCocRadius, CocRadius, GatheredColor.rgb);
|
|
|
|
// Weight sum the color across the lanes.
|
|
GatheredColor.r = Sum2x2WithinWave8x8(GatheredColor.r * SampleBilateralWeight, ReductionScale);
|
|
GatheredColor.g = Sum2x2WithinWave8x8(GatheredColor.g * SampleBilateralWeight, ReductionScale);
|
|
GatheredColor.b = Sum2x2WithinWave8x8(GatheredColor.b * SampleBilateralWeight, ReductionScale);
|
|
GatheredColor.a = Sum2x2WithinWave8x8(GatheredColor.a * SampleBilateralWeight, ReductionScale);
|
|
|
|
// Compute normalising weight across lanes.
|
|
float InvTotalWeight = rcp(Sum2x2WithinWave8x8(SampleBilateralWeight, ReductionScale));
|
|
|
|
// Normalize the color.
|
|
GatheredColor *= InvTotalWeight;
|
|
CocRadius = OutputCocRadius;
|
|
|
|
// Output mip map.
|
|
OutputPixelId = OutputPixelId >> 1;
|
|
BRANCH
|
|
if (((DispatchThreadId.x | DispatchThreadId.y) & ((2u << i) - 1)) == 0)
|
|
{
|
|
OutputMipLevel(/* MipLevel = */ i + 1, OutputPixelId, GatheredColor, CocRadius);
|
|
}
|
|
}
|
|
}
|
|
#else // !CONFIG_WAVE_BROADCAST_REDUCTION
|
|
{
|
|
// Store color to LDS for reduction.
|
|
#if CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_ALPHA_COC
|
|
{
|
|
GroupSharedArray[GroupThreadIndex] = GatheredColor;
|
|
GroupSharedArray2[GroupThreadIndex] = CocRadius;
|
|
}
|
|
#else
|
|
{
|
|
GroupSharedArray[GroupThreadIndex] = float4(GatheredColor.rgb, CocRadius);
|
|
}
|
|
#endif
|
|
|
|
// Output mip > 0 of the gathering pass.
|
|
UNROLL
|
|
for (uint i = 0; i < (DIM_REDUCE_MIP_COUNT - 1); i++)
|
|
{
|
|
// Multiplier that needs to be applied on Coc computation to be resolution independent.
|
|
const uint TileSize = THREADGROUP_TILE_SIZE / (1u << (i + 1));
|
|
const uint ReduceBankSize = TileSize * TileSize;
|
|
|
|
// GroupSharedArray has been written before.
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (GroupThreadIndex < ReduceBankSize)
|
|
{
|
|
DownsampleParams.CocRadiusMultiplier = 0.5 / float(1u << (i));
|
|
|
|
// Reduce.
|
|
ReduceOperator(DownsampleParams, GroupThreadIndex, ReduceBankSize);
|
|
|
|
// Read LDS (compiler are generally smart here to actualy keep value on VGPR.
|
|
#if CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_ALPHA_COC
|
|
float4 Color = GroupSharedArray[GroupThreadIndex];
|
|
float OutCocRadius = GroupSharedArray2[GroupThreadIndex];
|
|
#else
|
|
float4 Color = GroupSharedArray[GroupThreadIndex];
|
|
float OutCocRadius = GroupSharedArray[GroupThreadIndex].a;
|
|
#endif
|
|
|
|
// Output mip map.
|
|
OutputPixelId = OutputPixelId >> 1;
|
|
OutputMipLevel(/* MipLevel = */ i + 1, OutputPixelId, Color, OutCocRadius);
|
|
}
|
|
}
|
|
}
|
|
#endif // !CONFIG_WAVE_BROADCAST_REDUCTION
|
|
}
|