Files
UnrealEngine/Engine/Shaders/Private/DiaphragmDOF/DOFGatherKernel.ush
2025-05-18 13:04:45 +08:00

1045 lines
35 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
DiaphragmDOF/DOFGatherKernel.usf: gather sampling kernel of Diaphragm DOF.
=============================================================================*/
#pragma once
#include "DOFGatherCommon.ush"
#include "DOFGatherAccumulator.ush"
//------------------------------------------------------- PARAMETERS
float4 ViewportSize;
float4 GatherInputSize;
float2 GatherInputViewportSize;
Texture2D GatherInput_SceneColor;
Texture2D GatherInput_SeparateCoc;
Texture2D BokehLUT;
float Petzval;
float PetzvalFalloffPower;
float2 PetzvalExclusionBoxExtents;
float PetzvalExclusionBoxRadius;
//------------------------------------------------------- GATHERING KERNEL
// LARGEST_RINGS_FIRST controles the order the rings must be proceded.
#ifndef LARGEST_RINGS_FIRST
#define LARGEST_RINGS_FIRST 0
#endif
// Returns the position of the sample on the unit circle (radius = 1) for a given ring.
float2 GetDiskSampleOnUnitCirle(uint RingId, uint RingSampleIteration, uint RingSampleId)
{
float SampleRingPos = RingSampleId;
// Do not allign all j == 0 samples of the different ring on the X axis to increase minimal distance between all
// samples, that reduce variance to clean by post filtering.
#if 1
SampleRingPos += (RingId - 2 * (RingId / 2)) * 0.5;
#endif
#if CONFIG_SLIGHT_RING_ROTATION
SampleRingPos += (RingId + 1) * 0.2;
#endif
float SampleAngle = PI * SampleRingPos / float(RingSampleIteration);
return float2(cos(SampleAngle), sin(SampleAngle));
}
// Returns the rotation matrix to use between sample of the ring.
float2x2 GetSampleRotationMatrix(uint RingSampleIteration)
{
float RotationAngle = PI / float(RingSampleIteration);
float C = cos(RotationAngle);
float S = sin(RotationAngle);
return float2x2(
float2( C, S),
float2(-S, C));
}
struct FFastAccumulatorOutput
{
// Accumulated Color.
float4 Color;
};
// Amends the weight of the previously accumulated samples in the fast accumulator.
void AmendAccumulatedRingsWeight(inout FFastAccumulatorOutput FastAccumulator, const float AccumulatedWeightMultiplier)
{
FastAccumulator.Color *= AccumulatedWeightMultiplier;
}
// Shared scope with the different functions of the kernel.
struct FPrivateKernelSharedScope
{
// ------- compile time constant category.
// sampler to use.
bool bBilinearSampleInput;
// Whether the kernel has enough samples enought sample to do the entire convolution (use by slight out of focus).
bool bKenrelHasEnoughSamples;
// ------- run time category.
// UV coordinate of the kernel center in GatherInput.
float2 KernelCenterUV;
// Multiplier to transform sample count unit to sample position in pixels of GatherInput's mip level 0.
// Set at compile time to 1 when bKenrelHasEnoughSamples==true.
float SampleCountToSamplePosition;
// Sampling mip level for GatherInput.
float GatherInputMipLevel;
// GatherInput's max UVs.
// TODO: this is currently MipLevel depent, but could save some VGPR by making it independent.
float2 GatherInputMaxUV;
// Multiplier to apply when computing sample intersection. Higher value sharpen, lower value feather.
float IntersectionMultiplier;
// Used to stretch sample positions for petzval lens effect
float2x2 PetzvalTransform;
};
// Computes intersection of sample from its Coc and its distance.
// Returns 0: no intersection, 1: intersection.
float ComputeSampleIntersection(FPrivateKernelSharedScope KernelScope, float SampleCocRadius, float SampleDistance)
{
// Intersection of output pixel's disk area with sample's Coc.
#if 1
// To have an intersection of 50% when the Sample's Coc radius == SampleDistance to be area conservative
// with bilinear sampled gathering or hybrid scattering.
const float Offset = 0.5;
#if DIM_LAYER_PROCESSING == LAYER_PROCESSING_SLIGHT_OUT_OF_FOCUS
// Uses a sharper intersection for slight out of focus like Recombine pass since it is being used
// for full res temporal statbility clamping box, so the aliasing this may introduce at half res is
// Fine.
const float Multiplier = 4.0;
#else
const float Multiplier = KernelScope.IntersectionMultiplier;
#endif
return saturate((abs(SampleCocRadius) - SampleDistance) * Multiplier + Offset);
// Same as Circle DOF.
#elif 0
// 0.5 because Coc radius is at half res, and 0.5 so that the fallof is only the radius of a ful res pixel.
const float IntersectionSharpen = 0.5 * 0.5;
// TODO: missing +0.5 to be energy conservative.
return saturate((abs(SampleCocRadius) - SampleDistance) * IntersectionSharpen);
#else // Hard cut of.
if (SampleDistance > abs(SampleCocRadius))
{
return 0.0;
}
return 1.0;
#endif
}
// Returns the total number of sampling iteration for a given ring id.
uint GetRingSamplingPairCount(const uint KernelSamplingDensityMode, uint RingId)
{
if (static_condition(KernelSamplingDensityMode == KERNEL_DENSITY_CONSTANT_HEXAWEB) ||
static_condition(KernelSamplingDensityMode == KERNEL_DENSITY_LOWER_IN_CENTER_HEXAWEB))
{
return (RingId + 1) * 3;
}
// This number of sample is carefully chosen to have exact number of sample a square shaped ring (SquarePos).
return (RingId + 1) * 4;
}
// Returns the total number of sample of the kernel.
uint GetKernelSampleCount(const uint KernelSamplingDensityMode, uint RingCount)
{
if (static_condition(KernelSamplingDensityMode == KERNEL_DENSITY_CONSTANT_HEXAWEB) ||
static_condition(KernelSamplingDensityMode == KERNEL_DENSITY_LOWER_IN_CENTER_HEXAWEB))
{
return 1 + 3 * RingCount * (RingCount + 1);
}
// Depends on GetRingSamplingPairCount().
return 1 + 4 * RingCount * (RingCount + 1);
}
// Fetch and unpack a sample from GatherInput.
FGatherSample FetchGatherSample(in FPrivateKernelSharedScope KernelScope, float2 SampleUV)
{
#if CONFIG_MIRRORED_SAMPLER
{
SampleUV = max(SampleUV, -SampleUV);
#if CONFIG_CLAMP_INPUT_BUFFER_UV
SampleUV = min(SampleUV, 2.0 * KernelScope.GatherInputMaxUV - SampleUV);
#else
SampleUV = min(SampleUV, 2.0 - SampleUV);
#endif
}
#elif CONFIG_CLAMP_INPUT_BUFFER_UV
{
SampleUV = min(SampleUV, KernelScope.GatherInputMaxUV);
}
#endif
SampleUV = min(SampleUV, KernelScope.GatherInputMaxUV);
float4 RawSample0 = 0;
float4 RawSample1 = 0;
if (KernelScope.bBilinearSampleInput)
{
RawSample0 = GatherInput_SceneColor.SampleLevel(
GlobalBilinearClampedSampler, SampleUV, KernelScope.GatherInputMipLevel);
RawSample1 = GatherInput_SeparateCoc.SampleLevel(
GlobalBilinearClampedSampler, SampleUV, KernelScope.GatherInputMipLevel);
}
else
{
RawSample0 = GatherInput_SceneColor.SampleLevel(
GlobalPointClampedSampler, SampleUV, KernelScope.GatherInputMipLevel);
RawSample1 = GatherInput_SeparateCoc.SampleLevel(
GlobalPointClampedSampler, SampleUV, KernelScope.GatherInputMipLevel);
}
FGatherSample Sample;
#if CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_COC
{
Sample.Color = float4(RawSample0.rgb, 1);
Sample.CocRadius = RawSample0.a;
}
#elif CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_ALPHA_COC
{
Sample.Color = RawSample0;
Sample.CocRadius = RawSample1.r;
}
#elif CONFIG_GATHER_INPUT_LAYOUT == GATHER_INPUT_LAYOUT_RGB_SEPARATE_COC
{
Sample.Color = float4(RawSample0.rgb, 1);
Sample.CocRadius = RawSample1.r;
}
#else
#error Unknown gather input layout.
#endif
Sample.Distance = 0.0;
Sample.Intersection = 1.0;
return Sample;
}
// Update run time kernel scope's members from compile time one, and gather parameters.
void UpdateRuntimeKernelScope(
in FGatherInputParameters GatherParameters,
inout FPrivateKernelSharedScope KernelScope)
{
// Radius of a sample in the unit circle.
const float UnitSampleRadius = rcp(GatherParameters.RingCount + 0.5);
// Compute the mip level to sample.
#if CONFIG_MIP_LEVEL_METHOD == MIP_LEVEL_METHOD_INTERLEAVED_OFFSET
// Compute random uniformly distributed on disk (centered, diameter = 1).
float2 RandomCircle = (0.48 * sqrt(GatherParameters.Random[0])) * float2(cos(2 * PI * GatherParameters.Random[1]), sin(2 * PI * GatherParameters.Random[1]));
float MipLevel = floor(0.5 + log2(GatherParameters.KernelRadius * UnitSampleRadius));
#elif CONFIG_MIP_LEVEL_METHOD == MIP_LEVEL_METHOD_CONSERVATIVE_NO_RANDOM
float2 RandomCircle = 0;
float MipLevel = ceil(log2(GatherParameters.KernelRadius * UnitSampleRadius));
#else
#error Unknown mip level technic.
#endif
// DEBUG: disable random offset.
#if DEBUG_NO_RANDOM_OFFSET
RandomCircle = 0;
#endif
// DEBUG: force to sample mip level that have same resolution as output.
#if DEBUG_FORCE_GATHER_HIGHEST_MIP_LEVEL
MipLevel = 0;
#endif
// When there is enough samples, can sample the MipLevel0 just fine.
if (static_condition(KernelScope.bKenrelHasEnoughSamples))
{
MipLevel = 0;
}
// Compute the randomising range of the kernel offset in pixels.
float KernelRandomRange = GatherParameters.KernelRadius * UnitSampleRadius;
if (static_condition(KernelScope.bBilinearSampleInput))
{
KernelRandomRange = max(GatherParameters.KernelRadius - GatherParameters.MaxRingCount - pow(2.0, uint(MipLevel)), 0.0) * UnitSampleRadius;
}
// No need to do kernel randomization when there is enough samples.
if (static_condition(KernelScope.bKenrelHasEnoughSamples))
{
KernelRandomRange = 0;
}
KernelScope.KernelCenterUV = GatherParameters.InputBufferCenterUV + (RandomCircle * KernelRandomRange) * GatherInputSize.zw;
KernelScope.PetzvalTransform = 0;
if (Petzval != 0)
{
const float2 BokehCenter = GatherParameters.ViewportUV * 2.0 - 1.0;
const float Radius = PetzvalExclusionBoxRadius * min(PetzvalExclusionBoxExtents.x, PetzvalExclusionBoxExtents.y);
const float2 BoxToBokeh = sign(BokehCenter) * max(abs(BokehCenter) - PetzvalExclusionBoxExtents + Radius, 0.0);
float BokehDistance = max(0.0, length(BoxToBokeh));
const float2 Normal = BoxToBokeh / BokehDistance;
BokehDistance = max(0.0, BokehDistance - Radius);
if (BokehDistance > 0)
{
const float PetzvalIntensity = pow(BokehDistance, PetzvalFalloffPower);
const float PetzvalStretchFactor = rcp(1 + abs(Petzval) * PetzvalIntensity);
const float2 Tangent = float2(Normal.y, -Normal.x);
const float2x2 ToSagittalSpace = {Normal.x, Normal.y, Tangent.x, Tangent.y};
bool bMask = Petzval > 0;
const float2x2 Scale = {
bMask ? PetzvalStretchFactor : 1.0, // Sagittal
0.0, 0.0,
!bMask ? PetzvalStretchFactor : 1.0 // Tangential
};
KernelScope.PetzvalTransform = mul(mul(ToSagittalSpace, Scale), transpose(ToSagittalSpace));
}
else
{
const float2x2 Identity = { 1, 0, 0, 1 };
KernelScope.PetzvalTransform = Identity;
}
}
KernelScope.SampleCountToSamplePosition = GatherParameters.KernelRadius * UnitSampleRadius;
float MipTexelSize = pow(2.0, uint(MipLevel));
KernelScope.GatherInputMaxUV = (GatherInputViewportSize.xy - 0.5 * MipTexelSize) * GatherInputSize.zw;
KernelScope.GatherInputMipLevel = MipLevel;
KernelScope.IntersectionMultiplier = rcp(MipTexelSize);
// When there is enough samples, the sample count measurement and the sample position measurement are same.
if (static_condition(KernelScope.bKenrelHasEnoughSamples))
{
KernelScope.SampleCountToSamplePosition = 1;
}
// Gives hints to the compiler that these var can be shifted to SGPR.
#if CONFIG_SGPR_HINTS
{
KernelScope.SampleCountToSamplePosition = ToScalarMemory(KernelScope.SampleCountToSamplePosition);
KernelScope.GatherInputMaxUV = ToScalarMemory(KernelScope.GatherInputMaxUV);
KernelScope.GatherInputMipLevel = ToScalarMemory(KernelScope.GatherInputMipLevel);
KernelScope.IntersectionMultiplier = ToScalarMemory(KernelScope.IntersectionMultiplier);
}
#endif
}
// Apply approximate Petzval bokeh lens effect.
// A more precise version would calculate the transform per sample, rather than per kernel.
// This version assumes the bokeh that are gathered are all centered on the kernel center, rather than on the sample center.
// This is incorrect and can cause bokeh to stretch into a 'banana shape', rather than oval.
// But it's significantly faster and usually not noticeable (more prominent bokeh are scattered anyway).
float2 ApplyApproxPetzval(in FPrivateKernelSharedScope KernelScope, float2 SampleUV)
{
#if CONFIG_PETZVAL
if (Petzval != 0)
{
return mul(SampleUV, KernelScope.PetzvalTransform);
}
else
#endif //CONFIG_PETZVAL
{
return SampleUV;
}
}
// Transform at compile time a 2 dimensional batch's constant into sample pair constant, by using rotation invariance.
float2 SampleConstFromBatchConst(const uint BatchSampleId, float2 BatchConst)
{
/**
* Y
* ^
* |
* 1 |
* |
* | 0
* |
* - - - - - - O - - - - > X
*/
if (BatchSampleId == 1)
return float2(-BatchConst.y, BatchConst.x);
return BatchConst;
}
// Gather a ring into the accumulator.
void GatherRingSamples(
in FGatherInputParameters GatherParameters,
in FPrivateKernelSharedScope KernelScope,
in uint RingId,
in bool bIsFirstRing,
inout FGatherAccumulator Accumulator,
inout FFastAccumulatorOutput FastAccumulator)
{
// Number of sample iteration for this ring.
const uint RingSamplePairCount = GetRingSamplingPairCount(GatherParameters.KernelSamplingDensityMode, RingId);
// Number of sample pair to process per batch.
// TODO: Could potentially do 4 using symetries? Might be unpracticable because of VGPR pressure.
const uint SamplePairBatchSize = (RingSamplePairCount % 2) ? 1 : 2;
// Number of batch to process.
const uint BatchCount = RingSamplePairCount / SamplePairBatchSize;
// Distance of the ring from the center of the kernel in sample count.
const uint RingDistance = uint(RingId + 1);
float RingRadius = ToScalarMemory(RingDistance * KernelScope.SampleCountToSamplePosition);
#if CONFIG_SGPR_HINTS
{
GatherParameters.KernelRadius = ToScalarMemory(GatherParameters.KernelRadius);
}
#endif
// Generate at compile time sample rotation matrix.
const float2x2 SampleRotationMatrix = GetSampleRotationMatrix(RingSamplePairCount);
// Generates at compile time first sample location on circle (radius = 1).
const float2 FirstCircleUnitPos = GetDiskSampleOnUnitCirle(RingId, RingSamplePairCount, /* BatchId = */ 0);
// Position of the first sample on circle with radius according to KernelRadius.
float2 FirstCircleSamplePosOffset = FirstCircleUnitPos * RingRadius;
// Setup iteratable SGPR
float2 CurrentCircleUnitPos = FirstCircleUnitPos;
float2 CurrentCircleSamplePosOffset = FirstCircleSamplePosOffset;
#if CONFIG_SGPR_HINTS
{
CurrentCircleUnitPos = ToScalarMemory(CurrentCircleUnitPos);
CurrentCircleSamplePosOffset = ToScalarMemory(CurrentCircleSamplePosOffset);
}
#endif
// Iterate over the batch of samples.
for (uint BatchId = 0; BatchId < BatchCount; BatchId++)
#if defined(WORKARROUND_UE_58394)
WORKARROUND_UE_58394 // TODO: delete once original issue is fixed.
#endif
{
// Rotate the samples position along the ring.
CurrentCircleUnitPos = mul(CurrentCircleUnitPos, SampleRotationMatrix);
CurrentCircleSamplePosOffset = mul(CurrentCircleSamplePosOffset, SampleRotationMatrix);
#if CONFIG_SGPR_HINTS
{
CurrentCircleUnitPos = ToScalarMemory(CurrentCircleUnitPos);
CurrentCircleSamplePosOffset = ToScalarMemory(CurrentCircleSamplePosOffset);
}
#endif
float2 BatchCirleUnitPos = CurrentCircleUnitPos;
float2 BatchCircleSamplePosOffset = CurrentCircleSamplePosOffset;
// Generates at compile time sample location if square shaped ring in sample count distance from the kernel's center.
//
// RingId = 0:
// Y
// |
// 3 2 1
// 4 0 -X
//
// RingId = 1:
// Y
// |
// 6 5 4 3 2
// 7 1
// 8 0 -X
//
// ...
float2 BatchSquarePos;
if (BatchId < RingDistance)
{
BatchSquarePos.x = RingDistance;
BatchSquarePos.y = BatchId;
}
else if (BatchId < RingDistance * 3)
{
BatchSquarePos.x = float(RingDistance) - float(BatchId - RingDistance);
BatchSquarePos.y = RingDistance;
}
else
{
BatchSquarePos.x = -float(RingDistance);
BatchSquarePos.y = RingDistance - (BatchId - RingDistance * 3);
}
// Iterate over the pair of sample of the batch to share vgpr setup and rotating ALU
for (uint BatchSampleId = 0; BatchSampleId < SamplePairBatchSize; BatchSampleId++)
{
// Swizzle the batch positions' X and Y components.
float2 CirleUnitPos = SampleConstFromBatchConst(BatchSampleId, BatchCirleUnitPos);
const float2 SquarePos = SampleConstFromBatchConst(BatchSampleId, BatchSquarePos);
float2 CircleSamplePosOffset = SampleConstFromBatchConst(BatchSampleId, BatchCircleSamplePosOffset);
float2 SamplePosOffset[2];
float SampleDistance;
#if DIM_BOKEH_SIMULATION != BOKEH_SIMULATION_DISABLED && CONFIG_KERNEL_LAYOUT == KERNEL_LAYOUT_SCALABLE_DISK
{
const float2 LookUpUV[2] = {
0.5 + (0.5 + CONFIG_POINT_MIRROR_BOKEH * SquarePos) / float(BOKEH_LUT_SIZE),
0.5 + (0.5 - CONFIG_POINT_MIRROR_BOKEH * SquarePos) / float(BOKEH_LUT_SIZE),
};
SamplePosOffset[0] = KernelScope.SampleCountToSamplePosition * BokehLUT.SampleLevel(GlobalBilinearWrappedSampler, LookUpUV[0], 0).xy;
#if DIM_BOKEH_SIMULATION == BOKEH_SIMULATION_SIMMETRIC && 0
SamplePosOffset[1] = -SamplePosOffset[0];
#else
SamplePosOffset[1] = KernelScope.SampleCountToSamplePosition * BokehLUT.SampleLevel(
GlobalBilinearWrappedSampler, LookUpUV[1], 0).xy;
#endif
SampleDistance = RingRadius;
}
#elif CONFIG_KERNEL_LAYOUT == KERNEL_LAYOUT_ALIGNED_SQUARE
{
// Do square shaped ring.
SamplePosOffset[0] = SquarePos * KernelScope.SampleCountToSamplePosition;
// Remove this sample at compile time if we already know there is no chance having a valid intersection.
if (static_condition(KernelScope.bKenrelHasEnoughSamples && (length(SquarePos) > GatherParameters.MaxRingCount + 0.5)))
{
continue;
}
SamplePosOffset[1] = -SamplePosOffset[0];
SampleDistance = length(SamplePosOffset[0] * float2(CocSqueeze, 1.0));
}
#elif CONFIG_KERNEL_LAYOUT == KERNEL_LAYOUT_SCALABLE_DISK
{
// Always keep a disk shaped kernel.
SamplePosOffset[0] = CircleSamplePosOffset;
SamplePosOffset[0].x *= CocInvSqueeze;
SamplePosOffset[1] = -SamplePosOffset[0];
SampleDistance = RingRadius;
}
#else
#error Unknown gathering kernel layout.
#endif
// Fetches the mirrored samples.
FGatherSample Sample[2];
UNROLL
for (uint k = 0; k < 2; k++)
{
const float SampleSign = (k == 0) ? 1.0 : -1.0;
SamplePosOffset[k] = ApplyApproxPetzval(KernelScope, SamplePosOffset[k]);
// Compute sample's input buffer UV.
float2 SampleUV = KernelScope.KernelCenterUV + SamplePosOffset[k] * GatherInputSize.zw;
// Sample input buffer.
Sample[k] = FetchGatherSample(KernelScope, SampleUV);
#if DIM_BOKEH_SIMULATION != BOKEH_SIMULATION_DISABLED && CONFIG_KERNEL_LAYOUT == KERNEL_LAYOUT_ALIGNED_SQUARE
{
const float LookUpRadius = 0.5 - 1.0 / float(BOKEH_LUT_SIZE);
#if DIM_BOKEH_SIMULATION == BOKEH_SIMULATION_SIMMETRIC
const float2 LookUpUV = LookUpRadius * CirleUnitPos;
#elif DIM_BOKEH_SIMULATION == BOKEH_SIMULATION_GENERAL
const float2 LookUpUV = (SampleSign * LookUpRadius) * CirleUnitPos;
#else
#error Unknown bokeh simulation.
#endif
float4 LookupSample = BokehLUT.SampleLevel(GlobalBilinearWrappedSampler, LookUpUV, 0);
float CocRadiusToBokehEdgeDistance = LookupSample.x;
Sample[k].CocRadius *= CocRadiusToBokehEdgeDistance;
}
#endif
Sample[k].Distance = SampleDistance;
Sample[k].Intersection = ComputeSampleIntersection(KernelScope, Sample[k].CocRadius, SampleDistance);
FastAccumulator.Color += Sample[k].Color;
}
// Hand the mirrored samples to accumulator.
AccumulateMirrorSamples(Accumulator, Sample[0], Sample[1]);
} // for (uint BatchSampleId = 0; BatchSampleId < SamplePairBatchSize; BatchSampleId++)
} // for (uint BatchId = 0; BatchId < RingSampleIteration; BatchId++)
}
// Gather a ring into the accumulator.
void GatherRing(
in FGatherInputParameters GatherParameters,
in FPrivateKernelSharedScope KernelScope,
in uint RingId,
in bool bIsFirstRing,
inout FGatherAccumulator Accumulator,
inout FFastAccumulatorOutput FastAccumulator)
{
// Number of sample iteration for this ring.
const uint RingSamplePairCount = GetRingSamplingPairCount(GatherParameters.KernelSamplingDensityMode, uint(RingId));
// Distance of the ring from the center of the kernel in sample count.
const uint RingDistance = uint(RingId + 1);
FGatherRingInfos RingInfos;
RingInfos.RingId = RingId;
RingInfos.bIsFirstRing = bIsFirstRing;
RingInfos.Radius = RingDistance * KernelScope.SampleCountToSamplePosition;
RingInfos.SampleCount = RingSamplePairCount * 2;
RingInfos.Weight = RingInfos.SampleCount * ComputeSampleWeight(RingInfos.Radius);
#if CONFIG_SGPR_HINTS
{
RingInfos.Radius = ToScalarMemory(RingInfos.Radius);
}
#endif
// Start accumulating ring samples
StartRingSamples(Accumulator, RingInfos);
GatherRingSamples(
GatherParameters,
KernelScope,
RingId,
bIsFirstRing,
/* inout */ Accumulator,
/* inout */ FastAccumulator);
// End accumulating ring samples.
EndRingSamples(Accumulator, RingInfos);
} // GatherRing()
// Gathers samples with an preconfigured accumulator and resolve its output.
uint GatherSamplesAndResolve(
in FGatherInputParameters GatherParameters,
in FGatherAccumulator Accumulator,
out FAccumulatorOutput AccumulatorOutput,
out FFastAccumulatorOutput FastAccumulator)
{
uint DebugGatherDensityChanges = 0;
#if DEBUG_FORCE_LOW_RING_COUNT
GatherParameters.MaxRingCount = 2;
#endif
FPrivateKernelSharedScope KernelScope;
// Set up initial compile time stuf in the scope of the kernel.
{
KernelScope.bKenrelHasEnoughSamples = DIM_LAYER_PROCESSING == LAYER_PROCESSING_SLIGHT_OUT_OF_FOCUS;
KernelScope.bBilinearSampleInput = (
static_condition(GatherParameters.bFastGathering) &&
!KernelScope.bKenrelHasEnoughSamples);
}
// Set up initial run time stuf in the scope of the kernel.
UpdateRuntimeKernelScope(GatherParameters, /* inout */ KernelScope);
// Initialize the fast accumulator.
FastAccumulator.Color = 0;
// Smallest ring first is only handled on KERNEL_DENSITY_CONSTANT;
const bool bLargestRingFirst = LARGEST_RINGS_FIRST || GatherParameters.KernelSamplingDensityMode != KERNEL_DENSITY_CONSTANT;
// Feed the center sample to the accumulator right away if smallest rings first.
if (!bLargestRingFirst)
{
FGatherSample CenterSample = FetchGatherSample(KernelScope, KernelScope.KernelCenterUV);
AccumulateCenterSampleAsItsOwnRing(Accumulator, CenterSample);
FastAccumulator.Color += CenterSample.Color;
}
// Boolean to track the first ring at compile time.
bool bIsFirstRing = LARGEST_RINGS_FIRST != 0;
FGatherResolveInfos GatherResolveInfos;
// Computes compile time known values about radius changes
const float LargeKernelRadius = 1 + 2 * GatherParameters.MaxRingCount;
const float SmallKernelRadius = 1 + 2 * (GatherParameters.MaxRingCount - GatherParameters.DensityChangeRingCount);
const float RadiusDownScaleFactor = SmallKernelRadius / LargeKernelRadius;
// TODO: reduce number of iteration as KernelRadius is getting lower than MaxCircleRingCount.
// TODO: reduce density of at the center of kernel for fast accumulator, to reduce load on texture unit.
if (static_condition(GatherParameters.KernelSamplingDensityMode == KERNEL_DENSITY_CONSTANT) ||
static_condition(GatherParameters.KernelSamplingDensityMode == KERNEL_DENSITY_CONSTANT_HEXAWEB))
{
#if LARGEST_RINGS_FIRST
for (int RingId = GatherParameters.MaxRingCount - 1; RingId >= 0; RingId--)
#else
for (int RingId = 0; RingId < int(GatherParameters.MaxRingCount); RingId++)
#endif
{
GatherRing(
GatherParameters,
KernelScope,
uint(RingId),
bIsFirstRing,
/* inout */ Accumulator,
/* inout */ FastAccumulator);
bIsFirstRing = false;
} // for (int RingId)
GatherResolveInfos.SamplesCount = GetKernelSampleCount(GatherParameters.KernelSamplingDensityMode, GatherParameters.MaxRingCount);
}
else if (static_condition(GatherParameters.KernelSamplingDensityMode == KERNEL_DENSITY_CONSTANT_DYNAMIC_RING_COUNT))
{
#if LARGEST_RINGS_FIRST
for (int RingId = GatherParameters.MaxRingCount - 1; RingId >= 0; RingId--)
#else
for (uint RingId = 0; RingId < uint(GatherParameters.MaxRingCount); RingId++)
#endif
{
GatherRing(
GatherParameters,
KernelScope,
uint(RingId),
bIsFirstRing,
/* inout */ Accumulator,
/* inout */ FastAccumulator);
bIsFirstRing = false;
} // for (int RingId)
GatherResolveInfos.SamplesCount = GetKernelSampleCount(GatherParameters.KernelSamplingDensityMode, GatherParameters.MaxRingCount);
}
else if (static_condition(GatherParameters.KernelSamplingDensityMode == KERNEL_DENSITY_HIGHER_IN_CENTER_DISK))
{
//static_assert(GatherParameters.MaxRingCount >= 2, "Assumes this LARGEST_RINGS_FIRST.");
int LowerRingId = GatherParameters.MaxRingCount - GatherParameters.DensityChangeRingCount;
int RingId;
UNROLL_N(5)
for (RingId = GatherParameters.MaxRingCount - 1; RingId >= LowerRingId; RingId--)
{
GatherRing(
GatherParameters,
KernelScope,
uint(RingId),
bIsFirstRing,
/* inout */ Accumulator,
/* inout */ FastAccumulator);
bIsFirstRing = false;
} // for (int RingId)
LOOP
for (uint SamplingDensityChangeId = 0; SamplingDensityChangeId < GatherParameters.MaxDensityChangeCount; SamplingDensityChangeId++)
{
float MipTexelSize = pow(2.0, uint(KernelScope.GatherInputMipLevel));
const float MinConvolutionRadius = GatherParameters.MaxRingCount;
const float SmallestConvolutionRadius = GatherParameters.KernelRadius * RadiusDownScaleFactor + MipTexelSize;
bool bChangeDensity = (
GatherParameters.KernelRadius * RadiusDownScaleFactor > MinConvolutionRadius &&
GatherParameters.MinIntersectableCocRadiusAbs < SmallestConvolutionRadius);
BRANCH
if (bChangeDensity)
{
// Decrease kernel size, and changes the already accumulated weight.
{
GatherParameters.KernelRadius *= RadiusDownScaleFactor;
// Gives hints to the compiler that these var can be shifted to SGPR.
#if CONFIG_SGPR_HINTS
{
GatherParameters.KernelRadius = ToScalarMemory(GatherParameters.KernelRadius);
}
#endif
const float AccumulatedWeightMultiplier = rcp(RadiusDownScaleFactor * RadiusDownScaleFactor);
AmendAccumulatedRingsWeight(/* inout */ Accumulator, GatherParameters, AccumulatedWeightMultiplier);
AmendAccumulatedRingsWeight(/* inout */ FastAccumulator, AccumulatedWeightMultiplier);
UpdateRuntimeKernelScope(GatherParameters, /* inout */ KernelScope);
}
UNROLL_N(5)
for (int RingId2 = GatherParameters.MaxRingCount - 1; RingId2 >= LowerRingId; RingId2--)
{
GatherRing(
GatherParameters,
KernelScope,
uint(RingId2),
/* bIsFirstRing = */ false,
/* inout */ Accumulator,
/* inout */ FastAccumulator);
} // for (int RingId)
DebugGatherDensityChanges++;
}
else
{
break;
}
}
UNROLL_N(5)
for (; RingId >= 0; RingId--)
{
GatherRing(
GatherParameters,
KernelScope,
uint(RingId),
/* bIsFirstRing = */ false,
/* inout */ Accumulator,
/* inout */ FastAccumulator);
} // for (int RingId)
// TODO: this is not right, but that is fine since the only accumulator that uses that is not using this kernel density.
GatherResolveInfos.SamplesCount = GetKernelSampleCount(GatherParameters.KernelSamplingDensityMode, GatherParameters.MaxRingCount);
}
else if (static_condition(GatherParameters.KernelSamplingDensityMode == KERNEL_DENSITY_LOWER_IN_CENTER_HEXAWEB))
{
int RingId = GatherParameters.MaxRingCount - 1;
// Gather outter ring first.
GatherRing(
GatherParameters,
KernelScope,
uint(RingId),
/* bIsFirstRing = */ true,
/* inout */ Accumulator,
/* inout */ FastAccumulator);
// Changes the sampling density.
// TODO: probably a bug in here since bokeh are not entirely great.
{
// Compute the number of ring remaining.
const int RemainingRingCount = GatherParameters.MaxRingCount / 2;
const float RadiusUpScaleFactor = float(1 + 2 * (GatherParameters.MaxRingCount - 1 )) / float(1 + 2 * RemainingRingCount);
const float AccumulatedWeightMultiplier = rcp(RadiusUpScaleFactor * RadiusUpScaleFactor);
GatherParameters.KernelRadius *= RadiusUpScaleFactor;
AmendAccumulatedRingsWeight(/* inout */ Accumulator, GatherParameters, AccumulatedWeightMultiplier);
AmendAccumulatedRingsWeight(/* inout */ FastAccumulator, AccumulatedWeightMultiplier);
// TODO: looks like there is an increase in VGPR pressure in here. Drill at why.
UpdateRuntimeKernelScope(GatherParameters, /* inout */ KernelScope);
GatherResolveInfos.SamplesCount = (
2 * GetRingSamplingPairCount(GatherParameters.KernelSamplingDensityMode, uint(RingId)) * AccumulatedWeightMultiplier +
GetKernelSampleCount(GatherParameters.KernelSamplingDensityMode, RemainingRingCount));
RingId = RemainingRingCount - 1;
}
UNROLL_N(5)
for (; RingId >= 0; RingId--)
{
GatherRing(
GatherParameters,
KernelScope,
uint(RingId),
/* bIsFirstRing = */ false,
/* inout */ Accumulator,
/* inout */ FastAccumulator);
} // for (int RingId)
}
#if CONFIG_LAYER_GATHERING && GATHER_ACCUMULATOR_METHOD == GATHER_ACCUMULATOR_METHOD_RINGBINNING
else if (static_condition(GatherParameters.KernelSamplingDensityMode == KERNEL_DENSITY_RECURSIVE_DISKS))
{
//static_assert(GatherParameters.MaxRingCount >= 2, "Assumes this LARGEST_RINGS_FIRST.");
{
// Error in coc radius error introduced by the under sampling of the kernel
const float CocRadiusError = 0.0;
Accumulator.CloseBorderingRadius = ((GatherParameters.RingCount - GatherParameters.DensityChangeRingCount) + (0.5 + CocRadiusError)) * (Accumulator.GatherParameters.KernelRadius * rcp(0.5 + Accumulator.GatherParameters.RingCount));
Accumulator.FarBorderingRadius = Accumulator.CloseBorderingRadius/ RadiusDownScaleFactor;
#if CONFIG_SGPR_HINTS
{
Accumulator.FarBorderingRadius = ToScalarMemory(Accumulator.FarBorderingRadius);
Accumulator.CloseBorderingRadius = ToScalarMemory(Accumulator.CloseBorderingRadius);
}
#endif
}
// Gather very first ring.
{
const uint RingId = GatherParameters.MaxRingCount - 1;
GatherRing(
GatherParameters,
KernelScope,
RingId,
/* bIsFirstRing = */ true,
/* inout */ Accumulator,
/* inout */ FastAccumulator);
}
for (uint SamplingDensityChangeId = 0; SamplingDensityChangeId < GatherParameters.MaxDensityChangeCount; SamplingDensityChangeId++)
{
// Gather all remaining rings as single cluster of sample to the accumulator.
{
const uint RemainingRingCount = GatherParameters.MaxRingCount - 1;
// Distance of the ring from the center of the kernel in sample count.
const uint SecondRingId = GatherParameters.MaxRingCount - 2;
// Number of sample iteration for this ring.
const uint DiskSampleCount = GetKernelSampleCount(GatherParameters.KernelSamplingDensityMode, RemainingRingCount);
// Distance of the ring from the center of the kernel in sample count.
const uint RingDistance = uint(SecondRingId + 1);
FGatherRingInfos RingInfos;
RingInfos.RingId = SecondRingId;
RingInfos.bIsFirstRing = false;
RingInfos.Radius = RingDistance * KernelScope.SampleCountToSamplePosition;
RingInfos.SampleCount = DiskSampleCount;
RingInfos.Weight = DiskSampleCount * ComputeSampleWeight(RingInfos.Radius);
// Start accumulating ring samples
StartRingSamples(Accumulator, RingInfos);
for (uint RingId = 0; RingId < RemainingRingCount; RingId++)
{
GatherRingSamples(
GatherParameters,
KernelScope,
RingId,
/* bIsFirstRing = */ false,
/* inout */ Accumulator,
/* inout */ FastAccumulator);
}
// Feed the center sample to the accumulator.
{
FGatherSample CenterSample = FetchGatherSample(KernelScope, KernelScope.KernelCenterUV);
AccumulateCenterSample(Accumulator, CenterSample);
}
// End accumulating ring samples.
EndRingSamples(Accumulator, RingInfos);
}
// Change density or stop.
{
float MipTexelSize = pow(2.0, uint(KernelScope.GatherInputMipLevel));
const float MinConvolutionRadius = GatherParameters.MaxRingCount;
const float SmallestConvolutionRadius = GatherParameters.KernelRadius * RadiusDownScaleFactor + MipTexelSize;
bool bChangeDensity = (
GatherParameters.KernelRadius * RadiusDownScaleFactor > MinConvolutionRadius &&
GatherParameters.MinIntersectableCocRadiusAbs < SmallestConvolutionRadius);
BRANCH
if (bChangeDensity)
{
// Decrease kernel size.
GatherParameters.KernelRadius *= RadiusDownScaleFactor;
Accumulator.FarBorderingRadius = Accumulator.CloseBorderingRadius;
Accumulator.CloseBorderingRadius = Accumulator.FarBorderingRadius * RadiusDownScaleFactor;
// Gives hints to the compiler that these var can be shifted to SGPR.
#if CONFIG_SGPR_HINTS
{
GatherParameters.KernelRadius = ToScalarMemory(GatherParameters.KernelRadius);
Accumulator.FarBorderingRadius = ToScalarMemory(Accumulator.FarBorderingRadius);
Accumulator.CloseBorderingRadius = ToScalarMemory(Accumulator.CloseBorderingRadius);
}
#endif
const float AccumulatedWeightMultiplier = rcp(RadiusDownScaleFactor * RadiusDownScaleFactor);
AmendAccumulatedRingsWeight(/* inout */ Accumulator, GatherParameters, AccumulatedWeightMultiplier);
UpdateRuntimeKernelScope(GatherParameters, /* inout */ KernelScope);
}
else
{
break;
}
}
// Gather largest ring of this smaller ring.
{
const uint RingId = GatherParameters.MaxRingCount - 1;
GatherRing(
GatherParameters,
KernelScope,
RingId,
/* bIsFirstRing = */ false,
/* inout */ Accumulator,
/* inout */ FastAccumulator);
}
} // for (uint SamplingDensityChangeId = 0; SamplingDensityChangeId < GatherParameters.MaxDensityChangeCount; SamplingDensityChangeId++)
// TODO: this is not right, but that is fine since the only accumulator that uses that is not using this kernel density.
GatherResolveInfos.SamplesCount = GetKernelSampleCount(GatherParameters.KernelSamplingDensityMode, GatherParameters.MaxRingCount);
}
#endif // CONFIG_LAYER_GATHERING && GATHER_ACCUMULATOR_METHOD == GATHER_ACCUMULATOR_METHOD_RINGBINNING
// Feed the center sample to the accumulator at the end if largest rings first.
if (static_condition(bLargestRingFirst && GatherParameters.KernelSamplingDensityMode != KERNEL_DENSITY_RECURSIVE_DISKS))
{
FGatherSample CenterSample = FetchGatherSample(KernelScope, KernelScope.KernelCenterUV);
AccumulateCenterSampleAsItsOwnRing(Accumulator, CenterSample);
FastAccumulator.Color += CenterSample.Color;
}
// Resolve up fast accumulator.
{
// Normalise scene color to be consistent with ResolveForegroundAndBackgroundOutputs.
FastAccumulator.Color *= rcp(GatherResolveInfos.SamplesCount);
}
// Resolves the accumulator.
ResolveAccumulatorOutputs(Accumulator, GatherResolveInfos, /* out */ AccumulatorOutput);
return DebugGatherDensityChanges;
}