1201 lines
38 KiB
HLSL
1201 lines
38 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
/*=============================================================================
|
|
DiaphragmDOF/DOFRecombine.usf: Recombine lower res convolution with full
|
|
res scene color.
|
|
=============================================================================*/
|
|
|
|
|
|
#include "DOFCommon.ush"
|
|
#include "../SceneTexturesCommon.ush"
|
|
#include "../Random.ush"
|
|
#include "../MonteCarlo.ush"
|
|
#include "../SobolRandom.ush"
|
|
|
|
|
|
|
|
//------------------------------------------------------- DEBUG COMPILE TIME CONFIG
|
|
|
|
// When on, color the output pixels according to how expensive they were.
|
|
#define DEBUG_FAST_PATHS 0
|
|
|
|
|
|
//------------------------------------------------------- ENUM VALUES
|
|
|
|
/** Slight out of focus gathering method. */
|
|
// No slight out of focus.
|
|
#define SLIGHT_FOCUS_METHOD_DISABLED 0
|
|
|
|
// Accumulate foreground and background slight out of focus in unique convolution.
|
|
#define SLIGHT_FOCUS_METHOD_UNIQUE_CONVOLUTIONS 1
|
|
|
|
// Accumulate foreground and background slight out of focus separatly
|
|
#define SLIGHT_FOCUS_METHOD_SEPARATE_CONVOLUTIONS 2
|
|
|
|
|
|
/** Method used to analysis the full resolution neighborhood. */
|
|
// Uses integer atomic
|
|
#define NEIGHBORHOOD_ANALISIS_ATOMIC 0
|
|
|
|
// Uses wave instruction.
|
|
#define NEIGHBORHOOD_ANALISIS_WAVE 1
|
|
|
|
// Uses LDS reduction.
|
|
#define NEIGHBORHOOD_ANALISIS_LDS_REDUCE 2
|
|
|
|
|
|
// Compositing method for the background.
|
|
// TODO: shader permutation to scale down.
|
|
#define COMPOSITING_METHOD_NONE 0
|
|
#define COMPOSITING_METHOD_BILINEAR_BKG 1
|
|
|
|
|
|
//------------------------------------------------------- COMPILE TIME CONFIG
|
|
|
|
// Configures across layer processing permutations.
|
|
#if DIM_LAYER_PROCESSING == LAYER_PROCESSING_FOREGROUND_ONLY
|
|
#define CONFIG_COMPOSITING_METHOD (COMPOSITING_METHOD_NONE)
|
|
|
|
#elif DIM_LAYER_PROCESSING == LAYER_PROCESSING_BACKGROUND_ONLY
|
|
#define CONFIG_COMPOSITING_METHOD (COMPOSITING_METHOD_BILINEAR_BKG)
|
|
|
|
#elif DIM_LAYER_PROCESSING == LAYER_PROCESSING_COMBINED
|
|
#define CONFIG_COMPOSITING_METHOD (COMPOSITING_METHOD_BILINEAR_BKG)
|
|
|
|
#else
|
|
#error Unknown layer processing.
|
|
#endif
|
|
|
|
|
|
// Configures across quality permutations.
|
|
#if DIM_QUALITY == 0
|
|
#define CONFIG_GATHER_PAIR_COUNT 0
|
|
#define CONFIG_SLIGHT_FOCUS_METHOD (SLIGHT_FOCUS_METHOD_DISABLED)
|
|
#define CONFIG_HOLE_FILLING_METHOD (HOLE_FILLING_METHOD_OPACITY_AMEND)
|
|
|
|
#elif DIM_QUALITY == 1
|
|
#define CONFIG_GATHER_PAIR_COUNT 12
|
|
#define CONFIG_SLIGHT_FOCUS_METHOD (SLIGHT_FOCUS_METHOD_UNIQUE_CONVOLUTIONS)
|
|
#define CONFIG_HOLE_FILLING_METHOD (HOLE_FILLING_METHOD_SEPARATE_GATHER)
|
|
#define CONFIG_FETCH_FULLRES_COC_FROM_ALPHA (!CONFIG_DOF_ALPHA)
|
|
|
|
#elif DIM_QUALITY == 2
|
|
#define CONFIG_GATHER_PAIR_COUNT 16
|
|
#define CONFIG_SLIGHT_FOCUS_METHOD (SLIGHT_FOCUS_METHOD_UNIQUE_CONVOLUTIONS)
|
|
#define CONFIG_HOLE_FILLING_METHOD (HOLE_FILLING_METHOD_SEPARATE_GATHER)
|
|
#define CONFIG_FETCH_FULLRES_COC_FROM_ALPHA (!CONFIG_DOF_ALPHA)
|
|
|
|
#else
|
|
#error Unknown quality.
|
|
#endif
|
|
|
|
|
|
// Configures the neighborhood analysis method to use for slight out of focus early out.
|
|
#if COMPILER_SUPPORTS_WAVE_MINMAX && (PS4_PROFILE || XBOXONE_PROFILE)
|
|
// GCN only optimisation
|
|
#define NEIGHBORHOOD_ANALISIS_METHOD (NEIGHBORHOOD_ANALISIS_WAVE)
|
|
|
|
#elif COMPILER_HLSLCC
|
|
// Compiler does not like NEIGHBORHOOD_ANALISIS_ATOMIC at all.
|
|
#define NEIGHBORHOOD_ANALISIS_METHOD (NEIGHBORHOOD_ANALISIS_LDS_REDUCE)
|
|
|
|
#else
|
|
#define NEIGHBORHOOD_ANALISIS_METHOD (NEIGHBORHOOD_ANALISIS_ATOMIC)
|
|
|
|
#endif
|
|
|
|
// Clamp full res gathering to have spec hits still temporally stable.
|
|
#define CONFIG_CLAMP_FULLRES_GATHER 1
|
|
|
|
// Clamp buffer UVs
|
|
#define CONFIG_CLAMP_SCENE_BUFFER_UV 1
|
|
|
|
// Clamp buffer UVs
|
|
#define CONFIG_CLAMP_DOF_BUFFER_UV 1
|
|
|
|
// Fetch CoC radius from full res scene color's alpha channel.
|
|
#ifndef CONFIG_FETCH_FULLRES_COC_FROM_ALPHA
|
|
#define CONFIG_FETCH_FULLRES_COC_FROM_ALPHA 0
|
|
#endif
|
|
|
|
|
|
//------------------------------------------------------- COMPILE TIME CONSTS
|
|
|
|
// Epsilon used to compare opacity values.
|
|
#define OPACITY_EPSILON 0.01
|
|
|
|
|
|
#define GROUP_BORDER_SIZE (DEFAULT_GROUP_BORDER_SIZE)
|
|
#define THREADGROUP_TOTALSIZE (GROUP_BORDER_SIZE * GROUP_BORDER_SIZE)
|
|
|
|
|
|
//------------------------------------------------------- PARAMETERS
|
|
|
|
float4 ViewportSize;
|
|
uint4 ViewportRect;
|
|
|
|
FScreenTransform DispatchThreadIdToDOFBufferUV;
|
|
float2 DOFBufferUVMax;
|
|
|
|
float4 SeparateTranslucencyBilinearUVMinMax;
|
|
uint SeparateTranslucencyUpscaling;
|
|
float EncodedCocRadiusToRecombineCocRadius;
|
|
float MaxRecombineAbsCocRadius;
|
|
|
|
Texture2D BokehLUT;
|
|
|
|
Texture2D SceneColorInput;
|
|
Texture2D SceneDepthTexture;
|
|
Texture2D SceneSeparateCoc;
|
|
|
|
Texture2D<float> LowResDepthTexture;
|
|
Texture2D<float> FullResDepthTexture;
|
|
|
|
Texture2D SceneSeparateTranslucency;
|
|
Texture2D SceneSeparateTranslucencyModulateColor;
|
|
|
|
float4 ConvolutionInputSize;
|
|
|
|
Texture2D ForegroundConvolution_SceneColor;
|
|
Texture2D ForegroundConvolution_SeparateAlpha;
|
|
|
|
Texture2D ForegroundHoleFillingConvolution_SceneColor;
|
|
Texture2D ForegroundHoleFillingConvolution_SeparateAlpha;
|
|
|
|
Texture2D SlightOutOfFocusConvolution_SceneColor;
|
|
Texture2D SlightOutOfFocusConvolution_SeparateAlpha;
|
|
|
|
Texture2D BackgroundConvolution_SceneColor;
|
|
Texture2D BackgroundConvolution_SeparateAlpha;
|
|
|
|
|
|
RWTexture2D<float4> SceneColorOutput;
|
|
|
|
// Utilities to upsample tranmslucency to full resolution
|
|
#include "../SeparateTranslucency.ush"
|
|
float2 SeparateTranslucencyTextureLowResExtentInverse;
|
|
|
|
//------------------------------------------------------- INTERMEDIARY STRUCTURES
|
|
|
|
// Intermediary results of the recombine.
|
|
struct FRecombineInputParameters
|
|
{
|
|
// The viewport UV of the output pixel.
|
|
float2 ViewportUV;
|
|
|
|
// Buffer UV to sample scene texture buffers.
|
|
float2 SceneBufferUV;
|
|
|
|
// Buffer UV to sample DOF buffers.
|
|
float2 DOFBufferUV;
|
|
|
|
// Buffer size and inv size for the DOF inputs.
|
|
float4 DOFBufferSize;
|
|
|
|
// Random seeds
|
|
uint2 Seed0;
|
|
};
|
|
|
|
|
|
//------------------------------------------------------- FUNCTIONS
|
|
|
|
float SampleWorldDepth(float2 BufferUV)
|
|
{
|
|
return ConvertFromDeviceZ(SceneDepthTexture.SampleLevel(GlobalPointClampedSampler, BufferUV, 0).x);
|
|
}
|
|
|
|
// {0 to 1} output.
|
|
float NoizNorm(float2 N, float X)
|
|
{
|
|
N+=X;
|
|
return frac(sin(dot(N.xy,float2(12.9898, 78.233)))*43758.5453);
|
|
}
|
|
|
|
float2 RotVec(float Radius, float Radians)
|
|
{
|
|
return Radius * float2(cos(Radians), sin(Radians));
|
|
}
|
|
|
|
void AmendAdditiveColorWithMaxTranslucency(inout float4 Color, inout float Translucency, float MaxTranslucency)
|
|
{
|
|
if (Translucency < 1)
|
|
{
|
|
float NewTranslucency = min(Translucency, MaxTranslucency);
|
|
Color *= (1 - NewTranslucency) / (1 - Translucency);
|
|
Translucency = NewTranslucency;
|
|
}
|
|
}
|
|
|
|
// TODO: most of the math below uses CocRadius as expressed in physical pixels, rather than scaling-independent 'encoded' units.
|
|
// It's not clear if the math is always accounting for this, as many of these expressions contain magic numbers.
|
|
|
|
|
|
// Compute sample weight according's to its Coc radius.
|
|
float ComputeSampleWeight(float CocRadius)
|
|
{
|
|
#if 0
|
|
return 1;
|
|
|
|
#else
|
|
const float PixelRadius = FullResPixelDistanceToCocDistance(0.5);
|
|
|
|
const float MaximumWeight = rcp((4 * PI) * PixelRadius * PixelRadius);
|
|
|
|
float Weight = min(rcp((4 * PI) * CocRadius * CocRadius), MaximumWeight);
|
|
|
|
//Weight /= max(1, CocRadius * 2);
|
|
|
|
|
|
return Weight;
|
|
#endif
|
|
}
|
|
|
|
float ComputeSampleIntersection(float SampleCocRadius, float SampleDistance)
|
|
{
|
|
#if 0 // DEBUG
|
|
return SampleDistance < SampleCocRadius ? 1 : 0;
|
|
#endif
|
|
|
|
// Mulitplier is set to 1 / pixel radius = 0.5, and also need a * 2.0 because recombine is done at full resolution.
|
|
const float Multiplier = 4.0;
|
|
|
|
// Offset is set to 0.5 / 2 so that when a sample Coc cover half of the pixel (abs(CocRadiusA) - SampleDistance) == 0),
|
|
// we get a 50% overlap.
|
|
const float LinearOffset = 0.5;
|
|
|
|
// Minimal Abs coc radius to be considered to avoid SampleCocRadius=0 & SampleDistance=0 returning < 1.0
|
|
const float MinimalAbsCocRadius = 0.25;
|
|
|
|
float AbsCocRadius = max(abs(SampleCocRadius), MinimalAbsCocRadius);
|
|
|
|
// Compute linear overlap.
|
|
float LinearIntersection = saturate((AbsCocRadius - SampleDistance) * Multiplier + LinearOffset);
|
|
|
|
// Pixels are aproximated as disk. So to make the intersection of two disk look better,
|
|
// do a smoothstep.
|
|
return smoothstep(0, 1, LinearIntersection);
|
|
}
|
|
|
|
// Returns the opacity to use to transition to background.
|
|
float ComputeBackgroundSampleOpacity(float CocRadius)
|
|
{
|
|
//return CocRadius < MAX_RECOMBINE_ABS_COC_RADIUS ? 1 : 0;
|
|
return saturate(MaxRecombineAbsCocRadius - CocRadius);
|
|
}
|
|
|
|
// Returns the opacity to use to transition to background.
|
|
float IsConsideredBackgroundSample(float CocRadius)
|
|
{
|
|
return ComputeBackgroundSampleOpacity(CocRadius) * saturate((CocRadius - 0.125) * 8);
|
|
}
|
|
|
|
// Compute translucency of the in focus sample.
|
|
float ComputeInFocusOpacity(float CocRadius)
|
|
{
|
|
// TODO: should be 4*
|
|
return saturate(2 - 4 * abs(CocRadius));
|
|
}
|
|
|
|
// Returns the opacity to use to transition foreground slight out of focus over in focus.
|
|
float ComputeForegroundSampleOpacity(float CocRadius)
|
|
{
|
|
return saturate(-1 - 8 * CocRadius);
|
|
}
|
|
|
|
// Returns the opacity to use to transition to background.
|
|
float IsConsideredForegroundSample(float CocRadius)
|
|
{
|
|
return ComputeForegroundSampleOpacity(CocRadius) * saturate(MaxRecombineAbsCocRadius + CocRadius);
|
|
}
|
|
|
|
|
|
|
|
//------------------------------------------------------- ACCUMULATOR
|
|
|
|
/** Structs that holds data about a sample for gathering. */
|
|
struct FGatherSample
|
|
{
|
|
// Sample's scene color (and optionally alpha channel).
|
|
float4 Color;
|
|
|
|
// Sample's radius of the Coc
|
|
float CocRadius;
|
|
|
|
// Sample's intersection.
|
|
float Intersection;
|
|
};
|
|
|
|
/** Gathering parameters in recombine pass */
|
|
struct FFullResGatherParameters
|
|
{
|
|
// Radius size in FULL res pixels.
|
|
float KernelPixelRadius;
|
|
|
|
// Number of pair of gathering samples.
|
|
uint SamplePairCount;
|
|
|
|
};
|
|
|
|
/** Gathering accumulator for recombine pass */
|
|
struct FFullResGatherAccumulator
|
|
{
|
|
// Parameters of the full res gather.
|
|
FFullResGatherParameters Parameters;
|
|
|
|
float4 Color;
|
|
float ColorWeight;
|
|
|
|
float Opacity;
|
|
float OpacityWeight;
|
|
|
|
uint LayerProcessing;
|
|
};
|
|
|
|
FFullResGatherAccumulator CreateFullResGatherAccumulator(in FFullResGatherParameters GatherParameters)
|
|
{
|
|
FFullResGatherAccumulator Accumulator;
|
|
Accumulator.Parameters = GatherParameters;
|
|
|
|
Accumulator.Color = 0.0;
|
|
Accumulator.ColorWeight = 0.0;
|
|
Accumulator.Opacity = 0.0;
|
|
Accumulator.OpacityWeight = 0.0;
|
|
Accumulator.LayerProcessing = LAYER_PROCESSING_BACKGROUND_ONLY;
|
|
|
|
return Accumulator;
|
|
}
|
|
|
|
struct FGatherSampleDerivedParameters
|
|
{
|
|
float Weight;
|
|
float IsConsidered;
|
|
float Opacity;
|
|
};
|
|
|
|
FGatherSampleDerivedParameters ComputeSampleDerivates(in FFullResGatherAccumulator Accumulator, in FGatherSample A)
|
|
{
|
|
FGatherSampleDerivedParameters DerivedA;
|
|
DerivedA.Weight = ComputeSampleWeight(A.CocRadius);
|
|
|
|
if (Accumulator.LayerProcessing == LAYER_PROCESSING_FOREGROUND_ONLY)
|
|
{
|
|
DerivedA.Opacity = ComputeForegroundSampleOpacity(A.CocRadius);
|
|
DerivedA.IsConsidered = IsConsideredForegroundSample(A.CocRadius);
|
|
}
|
|
else if (Accumulator.LayerProcessing == LAYER_PROCESSING_FOREGROUND_HOLE_FILLING)
|
|
{
|
|
// Don't care about weight for hole filling
|
|
DerivedA.Weight = 1;
|
|
|
|
DerivedA.Opacity = ComputeForegroundSampleOpacity(A.CocRadius);
|
|
DerivedA.IsConsidered = 1 - IsConsideredForegroundSample(A.CocRadius);
|
|
}
|
|
else if (Accumulator.LayerProcessing == LAYER_PROCESSING_SLIGHT_OUT_OF_FOCUS)
|
|
{
|
|
DerivedA.Opacity = ComputeBackgroundSampleOpacity(A.CocRadius);
|
|
DerivedA.IsConsidered = saturate(MaxRecombineAbsCocRadius - abs(A.CocRadius));
|
|
}
|
|
else if (Accumulator.LayerProcessing == LAYER_PROCESSING_BACKGROUND_ONLY)
|
|
{
|
|
DerivedA.Opacity = ComputeBackgroundSampleOpacity(A.CocRadius);
|
|
DerivedA.IsConsidered = IsConsideredBackgroundSample(A.CocRadius);
|
|
}
|
|
|
|
return DerivedA;
|
|
}
|
|
|
|
void HoleFillCloserSample(
|
|
in FFullResGatherAccumulator Accumulator,
|
|
inout FGatherSample A, inout FGatherSampleDerivedParameters DerivedA,
|
|
in FGatherSample Closer, in FGatherSampleDerivedParameters DerivedCloser)
|
|
{
|
|
if (Accumulator.LayerProcessing == LAYER_PROCESSING_FOREGROUND_ONLY)
|
|
{
|
|
A.Intersection = Closer.Intersection;
|
|
DerivedA.Weight = DerivedCloser.Weight;
|
|
|
|
#if 1 // Used with LAYER_PROCESSING_FOREGROUND_HOLE_FILLING
|
|
|
|
#elif 0 // looks nice over slight out of focus, but looks bad over large background out of focus.
|
|
Opacity[1] = Opacity[0] * ComputeBackgroundSampleOpacity(S[1].CocRadius);
|
|
IsConsidered[1] = Opacity[1] * IsConsidered[0];
|
|
|
|
#else
|
|
DerivedA.IsConsidered = DerivedCloser.IsConsidered;
|
|
DerivedA.Opacity = DerivedCloser.Opacity;
|
|
#endif
|
|
}
|
|
else if (Accumulator.LayerProcessing == LAYER_PROCESSING_FOREGROUND_HOLE_FILLING)
|
|
{
|
|
A.Intersection = Closer.Intersection;
|
|
DerivedA.Weight = DerivedCloser.Weight;
|
|
|
|
DerivedA.Opacity = DerivedCloser.Opacity;
|
|
}
|
|
else if (Accumulator.LayerProcessing == LAYER_PROCESSING_SLIGHT_OUT_OF_FOCUS)
|
|
{
|
|
A.Intersection = Closer.Intersection;
|
|
DerivedA.Weight = DerivedCloser.Weight;
|
|
}
|
|
}
|
|
|
|
void AccumulateSample(
|
|
inout FFullResGatherAccumulator Accumulator,
|
|
in FGatherSample A,
|
|
in FGatherSampleDerivedParameters DerivedA)
|
|
{
|
|
float ColorWeight = A.Intersection * DerivedA.Weight * DerivedA.IsConsidered;
|
|
float OpacityWeight = A.Intersection;
|
|
|
|
if (Accumulator.LayerProcessing == LAYER_PROCESSING_BACKGROUND_ONLY)
|
|
{
|
|
// This works really well to have smaller out ofcus than the gathering kernel.
|
|
DerivedA.Opacity *= DerivedA.Weight * rcp(ComputeSampleWeight(Accumulator.Parameters.KernelPixelRadius * 0.5));
|
|
}
|
|
else if (Accumulator.LayerProcessing == LAYER_PROCESSING_SLIGHT_OUT_OF_FOCUS)
|
|
{
|
|
//DerivedA.Opacity *= DerivedA.Weight * rcp(ComputeSampleWeight(Accumulator.Parameters.KernelPixelRadius * 0.5));
|
|
}
|
|
else if (1)
|
|
{
|
|
DerivedA.Opacity *= DerivedA.Weight * rcp(ComputeSampleWeight(Accumulator.Parameters.KernelPixelRadius * 0.5));
|
|
}
|
|
|
|
Accumulator.Color += ColorWeight * A.Color;
|
|
Accumulator.ColorWeight += ColorWeight;
|
|
|
|
Accumulator.Opacity += OpacityWeight * DerivedA.Opacity;
|
|
Accumulator.OpacityWeight += OpacityWeight;
|
|
}
|
|
|
|
/** Accumulates mirror samples. */
|
|
void AccumulateMirrorSamples(inout FFullResGatherAccumulator Accumulator, in FGatherSample S[2])
|
|
{
|
|
FGatherSampleDerivedParameters DerivedS[2];
|
|
|
|
UNROLL
|
|
for (uint i = 0; i < 2; i++)
|
|
{
|
|
DerivedS[i] = ComputeSampleDerivates(Accumulator, S[i]);
|
|
}
|
|
|
|
// Mirror hole filling.
|
|
#if 1
|
|
if (S[1].CocRadius > S[0].CocRadius)
|
|
{
|
|
HoleFillCloserSample(Accumulator, S[1], DerivedS[1], S[0], DerivedS[0]);
|
|
}
|
|
else if (S[0].CocRadius > S[1].CocRadius)
|
|
{
|
|
HoleFillCloserSample(Accumulator, S[0], DerivedS[0], S[1], DerivedS[1]);
|
|
}
|
|
#else
|
|
if (IsForeground(S[0].CocRadius) && S[1].CocRadius > S[0].CocRadius)
|
|
{
|
|
HoleFillCloserSample(Accumulator, S[1], DerivedS[1], S[0], DerivedS[0]);
|
|
}
|
|
else if (IsForeground(S[1].CocRadius) && S[0].CocRadius > S[1].CocRadius)
|
|
{
|
|
HoleFillCloserSample(Accumulator, S[0], DerivedS[0], S[1], DerivedS[1]);
|
|
}
|
|
#endif
|
|
|
|
UNROLL
|
|
for (uint j = 0; j < 2; j++)
|
|
{
|
|
AccumulateSample(Accumulator, S[j], DerivedS[j]);
|
|
}
|
|
}
|
|
|
|
/** Accumulates center sample. */
|
|
void AccumulateCenterSample(inout FFullResGatherAccumulator Accumulator, in FGatherSample A)
|
|
{
|
|
FGatherSampleDerivedParameters DerivedA = ComputeSampleDerivates(Accumulator, A);
|
|
|
|
// Force this sample to be considered to guareentee their is a resolved color if in focus or background.
|
|
DerivedA.IsConsidered = 1;
|
|
|
|
AccumulateSample(Accumulator, A, DerivedA);
|
|
}
|
|
|
|
/** Resolves the slightly out of focus. */
|
|
void ResolveAccumulator(
|
|
in FFullResGatherAccumulator Accumulator,
|
|
out float4 OutGatherBackgroundUnpremultipliedColor,
|
|
out float OutGatherBackgroundOpacity)
|
|
{
|
|
const float SampleCount = 1.0 + 2.0 * Accumulator.Parameters.SamplePairCount;
|
|
|
|
float Opacity;
|
|
if (Accumulator.LayerProcessing == LAYER_PROCESSING_SLIGHT_OUT_OF_FOCUS)
|
|
{
|
|
//Opacity = saturate(Accumulator.Opacity * SafeRcp(SampleCount));
|
|
Opacity = saturate(Accumulator.Opacity * SafeRcp(Accumulator.OpacityWeight));
|
|
}
|
|
else if (Accumulator.LayerProcessing == LAYER_PROCESSING_BACKGROUND_ONLY || 1)
|
|
{
|
|
Opacity = saturate(Accumulator.Opacity * SafeRcp(Accumulator.OpacityWeight));
|
|
}
|
|
else
|
|
{
|
|
Opacity = saturate(Accumulator.Opacity * SafeRcp(SampleCount));
|
|
}
|
|
|
|
OutGatherBackgroundOpacity = Accumulator.ColorWeight > 0 ? Opacity : 0;
|
|
OutGatherBackgroundUnpremultipliedColor = Accumulator.Color * (SafeRcp(Accumulator.ColorWeight));
|
|
}
|
|
|
|
|
|
//------------------------------------------------------- KERNEL
|
|
|
|
#if CONFIG_SLIGHT_FOCUS_METHOD != SLIGHT_FOCUS_METHOD_DISABLED
|
|
|
|
void FetchAndAccumulateSamplePair(
|
|
in const FRecombineInputParameters InputParameters,
|
|
in float2 PixelOffset,
|
|
inout FFullResGatherAccumulator Accumulator)
|
|
{
|
|
// Accuratly quantize sample offset so the intersection get evaluated at the center,
|
|
// unless using a look up table to know SampleDistance.
|
|
#if DIM_BOKEH_SIMULATION == BOKEH_SIMULATION_DISABLED
|
|
PixelOffset = sign(PixelOffset) * floor(abs(PixelOffset) + 0.5);
|
|
#endif
|
|
|
|
// Distance of the sample from output pixels in half res pixel unit.
|
|
float SampleDistance = FullResPixelDistanceToCocDistance(length(PixelOffset));
|
|
|
|
// Scene buffer offset.
|
|
float2 SceneBufferUVOffset = PixelOffset * View.BufferSizeAndInvSize.zw * float2(CocInvSqueeze, 1.0);
|
|
|
|
// Two sample to gather at same time.
|
|
FGatherSample Sample[2];
|
|
|
|
UNROLL
|
|
for (uint k = 0; k < 2; k++)
|
|
{
|
|
const float SampleSign = (k == 0) ? 1.0 : -1.0;
|
|
|
|
// Fetch SampleDistance from lookup table.
|
|
#if DIM_BOKEH_SIMULATION != BOKEH_SIMULATION_DISABLED
|
|
if (k == 0 || DIM_BOKEH_SIMULATION == BOKEH_SIMULATION_GENERAL)
|
|
{
|
|
const float InvLutSize = rcp(float(BOKEH_LUT_SIZE));
|
|
|
|
float2 LookupUV = (0.5 + 0.5 * InvLutSize) + PixelOffset * (SampleSign * InvLutSize) * float2(CocInvSqueeze, 1.0);
|
|
float4 LookupSample = BokehLUT.SampleLevel(GlobalPointClampedSampler, LookupUV, 0);
|
|
SampleDistance = LookupSample.x;
|
|
}
|
|
#endif
|
|
|
|
float2 BufferUV = InputParameters.SceneBufferUV + SampleSign * SceneBufferUVOffset;
|
|
|
|
if (true) // TODO.
|
|
{
|
|
BufferUV = clamp(BufferUV, View.BufferBilinearUVMinMax.xy, View.BufferBilinearUVMinMax.zw);
|
|
}
|
|
|
|
// Fetch full res color and CocRadius.
|
|
#if CONFIG_FETCH_FULLRES_COC_FROM_ALPHA
|
|
Sample[k].Color = SceneColorInput.SampleLevel(GlobalPointClampedSampler, BufferUV, 0);
|
|
Sample[k].CocRadius = Sample[k].Color.a * EncodedCocRadiusToRecombineCocRadius;
|
|
#else
|
|
Sample[k].Color = SceneColorInput.SampleLevel(GlobalPointClampedSampler, BufferUV, 0);
|
|
Sample[k].CocRadius = SceneDepthToCocRadius(SampleWorldDepth(BufferUV));
|
|
#endif
|
|
|
|
// Convert scene color alpha from translucency to opacity.
|
|
Sample[k].Color.a = 1 - Sample[k].Color.a;
|
|
|
|
#if DIM_LAYER_PROCESSING == LAYER_PROCESSING_BACKGROUND_ONLY
|
|
Sample[k].CocRadius = max(Sample[k].CocRadius, 0);
|
|
#endif
|
|
|
|
Sample[k].Intersection = ComputeSampleIntersection(Sample[k].CocRadius, SampleDistance);
|
|
}
|
|
|
|
AccumulateMirrorSamples(Accumulator, Sample);
|
|
}
|
|
|
|
void GatherToAccumulator(
|
|
in const FRecombineInputParameters InputParameters,
|
|
in const FFullResGatherParameters GatherParameters,
|
|
inout FFullResGatherAccumulator Accumulator)
|
|
#if 0 // brute force the gathering kernel.
|
|
{
|
|
int QuadSize = 0 * 2 * MAX_RECOMBINE_ABS_COC_RADIUS;
|
|
|
|
UNROLL
|
|
for (int x = -QuadSize; x <= QuadSize; x++)
|
|
UNROLL
|
|
for (int y = 0; y <= QuadSize; y++)
|
|
{
|
|
if (y == 0 && x <= 0)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
const float2 PixelOffset = float2(x, y);
|
|
const float PixelDistance = length(PixelOffset);
|
|
|
|
if (PixelDistance > QuadSize)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
FetchAndAccumulateSamplePair(InputParameters, PixelOffset, Accumulator);
|
|
}
|
|
}
|
|
#else
|
|
{
|
|
// Samples at full resolution.
|
|
LOOP
|
|
for(uint SamplePairId = 0; SamplePairId < GatherParameters.SamplePairCount; SamplePairId++)
|
|
{
|
|
float2 E = Hammersley16(SamplePairId, CONFIG_GATHER_PAIR_COUNT, InputParameters.Seed0);
|
|
float2 DiskRandom = UniformSampleDiskConcentricApprox(E);
|
|
|
|
float2 PixelOffset = GatherParameters.KernelPixelRadius * DiskRandom;
|
|
|
|
// We already sampled the center pixels, and there is no point sampling it again with very small Coc.
|
|
// Therefore clipped the offset so that it does not sample the center again.
|
|
//FLATTEN
|
|
if (any(abs(PixelOffset) <= 0.5) && 0)
|
|
{
|
|
PixelOffset = clamp(PixelOffset * SafeRcp(max(abs(PixelOffset.x), abs(PixelOffset.y))), -1, 1);
|
|
}
|
|
|
|
FetchAndAccumulateSamplePair(InputParameters, PixelOffset, Accumulator);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#endif // CONFIG_SLIGHT_FOCUS_METHOD != SLIGHT_FOCUS_METHOD_DISABLED
|
|
|
|
|
|
//------------------------------------------------------- ENTRY POINT
|
|
|
|
#if NEIGHBORHOOD_ANALISIS_METHOD == NEIGHBORHOOD_ANALISIS_ATOMIC
|
|
|
|
groupshared uint SharedMaxConsideredAbsCocRadius;
|
|
|
|
|
|
#elif NEIGHBORHOOD_ANALISIS_METHOD == NEIGHBORHOOD_ANALISIS_LDS_REDUCE
|
|
|
|
groupshared float SharedMaxConsideredAbsCocRadius[GROUP_BORDER_SIZE * GROUP_BORDER_SIZE];
|
|
|
|
|
|
#endif
|
|
|
|
[numthreads(GROUP_BORDER_SIZE, GROUP_BORDER_SIZE, 1)]
|
|
void RecombineMainCS(
|
|
uint2 DispatchThreadId : SV_DispatchThreadID,
|
|
uint2 GroupThreadId : SV_GroupThreadID,
|
|
uint GroupThreadIndex : SV_GroupIndex)
|
|
{
|
|
float4 Debug = 0;
|
|
|
|
// Setup input parameters.
|
|
FRecombineInputParameters InputParameters;
|
|
{
|
|
InputParameters.DOFBufferSize = ConvolutionInputSize;
|
|
|
|
InputParameters.ViewportUV = (DispatchThreadId + 0.5) * ViewportSize.zw;
|
|
InputParameters.SceneBufferUV = ViewportUVToBufferUV(InputParameters.ViewportUV);
|
|
|
|
if (CONFIG_CLAMP_SCENE_BUFFER_UV)
|
|
{
|
|
InputParameters.SceneBufferUV = clamp(InputParameters.SceneBufferUV, View.BufferBilinearUVMinMax.xy, View.BufferBilinearUVMinMax.zw);
|
|
}
|
|
|
|
// - 0.5 * TemporalJitterPixels because DOF buffer is non temporally jittering, thanks to half res TAA pass.
|
|
InputParameters.DOFBufferUV = ApplyScreenTransform(float2(DispatchThreadId), DispatchThreadIdToDOFBufferUV);
|
|
|
|
if (CONFIG_CLAMP_DOF_BUFFER_UV)
|
|
{
|
|
InputParameters.DOFBufferUV = min(InputParameters.DOFBufferUV, DOFBufferUVMax);
|
|
}
|
|
|
|
InputParameters.Seed0 = Rand3DPCG16(int3(DispatchThreadId, View.StateFrameIndexMod8)).xy;
|
|
}
|
|
|
|
//Fetch foreground layer first to early return if ForegroundTranslucency == 0.0.
|
|
float4 ForegroundColor;
|
|
float ForegroundTranslucency;
|
|
{
|
|
#if DIM_LAYER_PROCESSING == LAYER_PROCESSING_BACKGROUND_ONLY
|
|
ForegroundColor = 0;
|
|
ForegroundTranslucency = 1;
|
|
|
|
#elif CONFIG_DOF_ALPHA
|
|
// Sample premultiplied RGBA foreground.
|
|
ForegroundColor = ForegroundConvolution_SceneColor.SampleLevel(
|
|
GlobalBilinearClampedSampler, InputParameters.DOFBufferUV, 0);
|
|
ForegroundTranslucency = 1 - ForegroundConvolution_SeparateAlpha.SampleLevel(
|
|
GlobalBilinearClampedSampler, InputParameters.DOFBufferUV, 0).r;
|
|
#else
|
|
// Sample premultiplied RGBA foreground.
|
|
ForegroundColor = ForegroundConvolution_SceneColor.SampleLevel(
|
|
GlobalBilinearClampedSampler, InputParameters.DOFBufferUV, 0);
|
|
ForegroundTranslucency = 1 - ForegroundColor.a;
|
|
|
|
#endif
|
|
}
|
|
|
|
|
|
// Get full res color and coc radius.
|
|
FGatherSample CenterSample;
|
|
CenterSample.Color = SceneColorInput.SampleLevel(GlobalPointClampedSampler, InputParameters.SceneBufferUV, 0);
|
|
CenterSample.Intersection = 1.0;
|
|
|
|
#if CONFIG_FETCH_FULLRES_COC_FROM_ALPHA
|
|
CenterSample.CocRadius = CenterSample.Color.a * EncodedCocRadiusToRecombineCocRadius;
|
|
#else
|
|
CenterSample.CocRadius = SceneDepthToCocRadius(SampleWorldDepth(InputParameters.SceneBufferUV));
|
|
#endif
|
|
|
|
// Convert scene color alpha from translucency to opacity.
|
|
CenterSample.Color.a = 1 - CenterSample.Color.a;
|
|
|
|
// Whether can display solly foreground.
|
|
bool bCanReturnForegroundOnly = ForegroundTranslucency < OPACITY_EPSILON;
|
|
|
|
// Group constant: Whether should do full resolution gathering for slight out of focus.
|
|
bool bGatherFullRes = false;
|
|
|
|
#if CONFIG_SLIGHT_FOCUS_METHOD != SLIGHT_FOCUS_METHOD_DISABLED
|
|
// Full resolution gather's parameters.
|
|
FFullResGatherParameters GatherParameters;
|
|
{
|
|
#if NEIGHBORHOOD_ANALISIS_METHOD == NEIGHBORHOOD_ANALISIS_ATOMIC
|
|
{
|
|
SharedMaxConsideredAbsCocRadius = 0;
|
|
GroupMemoryBarrierWithGroupSync();
|
|
}
|
|
#endif
|
|
|
|
|
|
// Grab the smallest slightly out of focus Coc radius of the tile.
|
|
float TileMaxConsideredAbsCocRadius;
|
|
{
|
|
float MaxConsideredAbsCocRadius = abs(CenterSample.CocRadius) < MaxRecombineAbsCocRadius ? abs(CenterSample.CocRadius) : 0;
|
|
|
|
for (uint j = 0; j < 4; j++)
|
|
{
|
|
const float2 SamplePixelOffset = float2(kOffsetsCross3x3[j]) * CocDistanceToFullResPixelDistance(MaxRecombineAbsCocRadius);
|
|
|
|
float2 SampleUVOffset = View.BufferSizeAndInvSize.zw * SamplePixelOffset;
|
|
float2 SampleUV = InputParameters.SceneBufferUV + SampleUVOffset;
|
|
|
|
if (CONFIG_CLAMP_SCENE_BUFFER_UV)
|
|
{
|
|
SampleUV = clamp(SampleUV, View.BufferBilinearUVMinMax.xy, View.BufferBilinearUVMinMax.zw);
|
|
}
|
|
|
|
#if CONFIG_FETCH_FULLRES_COC_FROM_ALPHA
|
|
float SampleCocRadius = SceneColorInput.SampleLevel(GlobalPointClampedSampler, SampleUV, 0).a * EncodedCocRadiusToRecombineCocRadius;
|
|
#else
|
|
float SampleCocRadius = SceneDepthToCocRadius(SampleWorldDepth(SampleUV));
|
|
#endif
|
|
|
|
float SampleAbsCocRadius = abs(SampleCocRadius);
|
|
|
|
#if 0
|
|
// Compute the minimum CocRadius to overlap with the group's tile, to reduce amount of tiles gathering uselessly.
|
|
// TODO: have not witnessed any performance regression or improvement with this yet.
|
|
{
|
|
// Compute the minimum CocRadius to overlap with the group's tile.
|
|
float2 ThreadDistanceToGroupBorder = lerp(GroupThreadId, (GROUP_BORDER_SIZE - 1) - GroupThreadId, kSquare2x2[j]);
|
|
float2 OutsideGroupPixelOffset = abs(SamplePixelOffset) - ThreadDistanceToGroupBorder;
|
|
float2 OutsideGroupCocOffset = FullResPixelDistanceToCocDistance(OutsideGroupPixelOffset);
|
|
|
|
float MinCocRadiusSquare = dot(OutsideGroupCocOffset, OutsideGroupCocOffset);
|
|
|
|
// Not interested if the CocRadius is too large, or does not overlap with the group's tile.
|
|
if (SampleAbsCocRadius < MaxRecombineAbsCocRadius &&
|
|
SampleAbsCocRadius * SampleAbsCocRadius > MinCocRadiusSquare)
|
|
{
|
|
MaxConsideredAbsCocRadius = max(MaxConsideredAbsCocRadius, SampleAbsCocRadius);
|
|
}
|
|
}
|
|
#else
|
|
{
|
|
MaxConsideredAbsCocRadius = max(MaxConsideredAbsCocRadius, SampleAbsCocRadius < MaxRecombineAbsCocRadius ? SampleAbsCocRadius : MaxConsideredAbsCocRadius);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
#if NEIGHBORHOOD_ANALISIS_METHOD == NEIGHBORHOOD_ANALISIS_ATOMIC
|
|
{
|
|
// Do atomic min and max of the positive or null float MaxConsideredAbsCocRadius
|
|
// as if they were uint.
|
|
uint Unused;
|
|
InterlockedMax(SharedMaxConsideredAbsCocRadius, asuint(MaxConsideredAbsCocRadius), Unused);
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Read atomic counters.
|
|
TileMaxConsideredAbsCocRadius = asfloat(SharedMaxConsideredAbsCocRadius);
|
|
}
|
|
#elif NEIGHBORHOOD_ANALISIS_METHOD == NEIGHBORHOOD_ANALISIS_LDS_REDUCE
|
|
{
|
|
SharedMaxConsideredAbsCocRadius[GroupThreadIndex] = MaxConsideredAbsCocRadius;
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Safe for vector sizes 32 or larger, AMD and NV
|
|
// TODO Intel variable size vector
|
|
UNROLL
|
|
for (uint i = 0; i < 5; i++)
|
|
{
|
|
const uint ReduceSize = 32u >> i;
|
|
if (GroupThreadIndex < ReduceSize)
|
|
{
|
|
MaxConsideredAbsCocRadius = max(MaxConsideredAbsCocRadius, SharedMaxConsideredAbsCocRadius[GroupThreadIndex + ReduceSize]);
|
|
SharedMaxConsideredAbsCocRadius[GroupThreadIndex] = MaxConsideredAbsCocRadius;
|
|
}
|
|
}
|
|
|
|
TileMaxConsideredAbsCocRadius = SharedMaxConsideredAbsCocRadius[0];
|
|
}
|
|
#elif NEIGHBORHOOD_ANALISIS_METHOD == NEIGHBORHOOD_ANALISIS_WAVE
|
|
{
|
|
TileMaxConsideredAbsCocRadius = WaveActiveMax(MaxConsideredAbsCocRadius);
|
|
}
|
|
#else
|
|
#error Unknown neighborhood analisis method to use.
|
|
#endif
|
|
}
|
|
|
|
// Determines what should be done.
|
|
{
|
|
// Gather at full resolution only if we know there is considered neighborhood that a COC radius big enough.
|
|
bGatherFullRes = TileMaxConsideredAbsCocRadius > 0.125;
|
|
|
|
// No need to gather at full res for this pixel if totally occluded by foreground.
|
|
bGatherFullRes = bGatherFullRes && !bCanReturnForegroundOnly;
|
|
}
|
|
|
|
// Set up gathering parameters.
|
|
{
|
|
float FullResKernelRadius = CocDistanceToFullResPixelDistance(TileMaxConsideredAbsCocRadius);
|
|
|
|
// Set the size of the kernel to exactly the max convolution that needs to be done.
|
|
GatherParameters.KernelPixelRadius = ceil(FullResKernelRadius);
|
|
|
|
// Increase the size of the kernel radius to avoid the gather Point sampler to create Coc step artifacts.
|
|
GatherParameters.KernelPixelRadius += 0.5;
|
|
|
|
float KenelArea = Pow2(FullResKernelRadius) * PI;
|
|
|
|
float RecommendedPairCount = KenelArea * 0.5;
|
|
|
|
// Number of pair of sample.
|
|
GatherParameters.SamplePairCount = min(uint(CONFIG_GATHER_PAIR_COUNT), uint(RecommendedPairCount));
|
|
}
|
|
}
|
|
#endif // CONFIG_SLIGHT_FOCUS_METHOD != SLIGHT_FOCUS_METHOD_DISABLED
|
|
|
|
if (any((ViewportRect.xy + DispatchThreadId) >= ViewportRect.zw))
|
|
{
|
|
return;
|
|
}
|
|
|
|
float GatherBackgroundOpacity = ComputeInFocusOpacity(CenterSample.CocRadius);
|
|
float4 GatherBackgroundUnpremultipliedColor = CenterSample.Color;
|
|
|
|
#if CONFIG_SLIGHT_FOCUS_METHOD == SLIGHT_FOCUS_METHOD_DISABLED
|
|
{
|
|
GatherBackgroundOpacity = ComputeBackgroundSampleOpacity(CenterSample.CocRadius);
|
|
}
|
|
#elif CONFIG_SLIGHT_FOCUS_METHOD == SLIGHT_FOCUS_METHOD_UNIQUE_CONVOLUTIONS
|
|
BRANCH
|
|
if (bGatherFullRes)
|
|
{
|
|
// Full resolution's opacity with background.
|
|
FFullResGatherAccumulator Accumulator = CreateFullResGatherAccumulator(GatherParameters);
|
|
Accumulator.LayerProcessing = LAYER_PROCESSING_SLIGHT_OUT_OF_FOCUS;
|
|
|
|
// Accumulate center sample first to reduce VGPR pressure.
|
|
AccumulateCenterSample(Accumulator, CenterSample);
|
|
|
|
// TODO: Adaptive number of sample.
|
|
GatherToAccumulator(
|
|
InputParameters, GatherParameters,
|
|
Accumulator);
|
|
|
|
// The full resolution gathering kernel is sampling directly the full res scene color, that is jittering
|
|
// and potentially flickering on spec hits. To avoids issues with TAA's clamping box, we clamp this with the
|
|
// with prefiltering scene color for temporal stability.
|
|
//
|
|
// TODO: should this be done in YCoCg or LCoCg?
|
|
#if CONFIG_CLAMP_FULLRES_GATHER
|
|
{
|
|
float4 Min;
|
|
float4 Max;
|
|
|
|
float2 ClampUVBox = InputParameters.DOFBufferSize.zw;
|
|
|
|
UNROLL
|
|
for (uint i = 0; i < 4; i++)
|
|
{
|
|
float2 SampleUV = InputParameters.DOFBufferUV + (0.5 * kOffsetsCross3x3[i]) * ClampUVBox;
|
|
|
|
if (CONFIG_CLAMP_DOF_BUFFER_UV)
|
|
{
|
|
SampleUV = min(SampleUV, DOFBufferUVMax);
|
|
}
|
|
|
|
float4 StableSampleColor = SlightOutOfFocusConvolution_SceneColor.SampleLevel(GlobalPointClampedSampler, SampleUV, 0);
|
|
|
|
if (i == 0)
|
|
{
|
|
Min = Max = StableSampleColor;
|
|
}
|
|
else
|
|
{
|
|
Min = min(Min, StableSampleColor);
|
|
Max = max(Max, StableSampleColor);
|
|
}
|
|
}
|
|
|
|
// TODO: Increase constrast of limit a little to workaround to strong denoise at near-in-focus (stolen from CircleDOF).
|
|
#if 1
|
|
{
|
|
float4 HD = Min;
|
|
float Small = 0.125 * (1.0 - saturate(CenterSample.CocRadius * CenterSample.CocRadius * rcp(64.0)));
|
|
Max += HD * Small;
|
|
Min -= HD * Small;
|
|
|
|
// Ensures the temporally stable opacity remains between 0-1.
|
|
// Uses saturate() instead of min(0 and max() to be optimised as
|
|
// saturate() MAD post modifier on GCN.
|
|
Min.a = saturate(Min.a);
|
|
Max.a = saturate(Max.a);
|
|
}
|
|
#endif
|
|
|
|
float ClampWeight = saturate(CenterSample.CocRadius * CenterSample.CocRadius * 4.0);
|
|
|
|
// Clamp color
|
|
float3 ClampedColor = clamp(Accumulator.Color.rgb, Min.rgb * Accumulator.ColorWeight, Max.rgb * Accumulator.ColorWeight);
|
|
|
|
Accumulator.Color.rgb = lerp(Accumulator.Color.rgb, ClampedColor, ClampWeight);
|
|
|
|
// Clamp opacity.
|
|
float ClampedOpacity = clamp(Accumulator.Opacity, Min.a * Accumulator.OpacityWeight, Max.a * Accumulator.OpacityWeight);
|
|
|
|
Accumulator.Opacity = lerp(Accumulator.Opacity, ClampedOpacity, ClampWeight);
|
|
}
|
|
#endif
|
|
|
|
// Resolve full res gather.
|
|
ResolveAccumulator(Accumulator, GatherBackgroundUnpremultipliedColor, GatherBackgroundOpacity);
|
|
}
|
|
#endif
|
|
|
|
// Compose lower res foreground with full res gather foreground.
|
|
float4 GatherForegroundAdditiveColor = ForegroundColor;
|
|
float GatherForegroundTranslucency = ForegroundTranslucency;
|
|
|
|
// Sample lower res background, if necessary.
|
|
float4 BackgroundColor = 0.0;
|
|
float BackgroundValidity = 0.0;
|
|
|
|
// Separate foregroung hole filling, exposed mainly for debugging purposes.
|
|
float4 HoleFillingAdditiveColor = 0;
|
|
float HoleFillingTranslucency = 1;
|
|
|
|
BRANCH
|
|
if ((GatherForegroundTranslucency < OPACITY_EPSILON || bCanReturnForegroundOnly) && 0)
|
|
{
|
|
GatherForegroundAdditiveColor *= SafeRcp(1 - GatherForegroundTranslucency);
|
|
GatherForegroundTranslucency = 0;
|
|
}
|
|
else
|
|
{
|
|
#if CONFIG_COMPOSITING_METHOD == COMPOSITING_METHOD_BILINEAR_BKG
|
|
{
|
|
BackgroundColor = BackgroundConvolution_SceneColor.SampleLevel(GlobalBilinearClampedSampler, InputParameters.DOFBufferUV, 0);
|
|
|
|
#if CONFIG_DOF_ALPHA
|
|
BackgroundValidity = BackgroundConvolution_SeparateAlpha.SampleLevel(GlobalBilinearClampedSampler, InputParameters.DOFBufferUV, 0).r;
|
|
#else
|
|
//Background = float4(0, 0, 0, 1);
|
|
BackgroundValidity = BackgroundColor.a;
|
|
#endif
|
|
|
|
// Make sure the background color is always normalized, or unrendered.
|
|
BackgroundColor *= SafeRcp(BackgroundValidity);
|
|
}
|
|
#endif
|
|
|
|
// Hole fill the background in output final scene color before composing foreground on top.
|
|
#if CONFIG_HOLE_FILLING_METHOD == HOLE_FILLING_METHOD_SEPARATE_GATHER
|
|
{
|
|
HoleFillingAdditiveColor = ForegroundHoleFillingConvolution_SceneColor.SampleLevel(GlobalBilinearClampedSampler, InputParameters.DOFBufferUV, 0);
|
|
|
|
#if CONFIG_DOF_ALPHA
|
|
HoleFillingTranslucency = ForegroundHoleFillingConvolution_SeparateAlpha.SampleLevel(GlobalBilinearClampedSampler, InputParameters.DOFBufferUV, 0).r;
|
|
#else
|
|
HoleFillingTranslucency = HoleFillingAdditiveColor.a;
|
|
#endif
|
|
|
|
float MaxTranslucency = 1 - IsConsideredForegroundSample(CenterSample.CocRadius);
|
|
|
|
// Force the hole filling translucency to 0 if the background is unrendered.
|
|
if (BackgroundValidity <= 0.001)
|
|
{
|
|
if (HoleFillingTranslucency < 1.0)
|
|
{
|
|
float MaxTranslucency = 1 - IsConsideredForegroundSample(CenterSample.CocRadius);
|
|
|
|
MaxTranslucency = min(MaxTranslucency, BackgroundValidity);
|
|
AmendAdditiveColorWithMaxTranslucency(HoleFillingAdditiveColor, HoleFillingTranslucency, MaxTranslucency);
|
|
}
|
|
else
|
|
{
|
|
GatherBackgroundOpacity = 1;
|
|
}
|
|
}
|
|
|
|
BackgroundColor = BackgroundColor * HoleFillingTranslucency + HoleFillingAdditiveColor;
|
|
}
|
|
#else
|
|
if (BackgroundValidity <= 0.001)
|
|
{
|
|
GatherBackgroundOpacity = 1;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
// Compose background loweer res gather and full res gather.
|
|
float4 OutputFinalSceneColor = BackgroundColor * (1 - GatherBackgroundOpacity) + GatherBackgroundOpacity * GatherBackgroundUnpremultipliedColor;
|
|
|
|
// Forces foreground translucency to 0 when large out of focus high res foreground, to
|
|
// avoid background leaking.
|
|
#if CONFIG_HOLE_FILLING_METHOD == HOLE_FILLING_METHOD_OPACITY_AMEND
|
|
if (GatherForegroundTranslucency < 1 && BackgroundValidity != 1.0)
|
|
{
|
|
#if CONFIG_GATHER_PAIR_COUNT == 0
|
|
float MaxTranslucency = saturate(MaxRecombineAbsCocRadius + CenterSample.CocRadius);
|
|
#else
|
|
float MaxTranslucency = 1 - ComputeForegroundSampleOpacity(CenterSample.CocRadius);
|
|
#endif
|
|
|
|
AmendAdditiveColorWithMaxTranslucency(GatherForegroundAdditiveColor, GatherForegroundTranslucency, MaxTranslucency);
|
|
}
|
|
#endif
|
|
|
|
// Compose foreground.
|
|
OutputFinalSceneColor = OutputFinalSceneColor * GatherForegroundTranslucency + GatherForegroundAdditiveColor;
|
|
|
|
// Compose separate translucency.
|
|
if (1)
|
|
{
|
|
NearestDepthNeighborUpsamplingResult UpsampleResult;
|
|
if (SeparateTranslucencyUpscaling == 0)
|
|
{
|
|
UpsampleResult.bUsePointSampler = true;
|
|
UpsampleResult.UV = InputParameters.SceneBufferUV;
|
|
}
|
|
else
|
|
{
|
|
float2 PixelPos = (View.ViewRectMin.xy + DispatchThreadId) + 0.5;
|
|
UpsampleResult = NearestDepthNeighborUpsampling(
|
|
LowResDepthTexture,
|
|
FullResDepthTexture,
|
|
PixelPos,
|
|
InputParameters.SceneBufferUV,
|
|
SeparateTranslucencyTextureLowResExtentInverse);
|
|
}
|
|
|
|
UpsampleResult.UV = clamp(UpsampleResult.UV, SeparateTranslucencyBilinearUVMinMax.xy, SeparateTranslucencyBilinearUVMinMax.zw);
|
|
|
|
float4 SeparateTranslucencyColor = 0;
|
|
float4 SeparateTranslucencyModulateColor = 0;
|
|
if (UpsampleResult.bUsePointSampler)
|
|
{
|
|
SeparateTranslucencyColor = SceneSeparateTranslucency.SampleLevel(GlobalPointClampedSampler, UpsampleResult.UV, 0);
|
|
SeparateTranslucencyModulateColor = SceneSeparateTranslucencyModulateColor.SampleLevel(GlobalPointClampedSampler, UpsampleResult.UV, 0);
|
|
}
|
|
else
|
|
{
|
|
SeparateTranslucencyColor = SceneSeparateTranslucency.SampleLevel(GlobalBilinearClampedSampler, UpsampleResult.UV, 0);
|
|
SeparateTranslucencyModulateColor = SceneSeparateTranslucencyModulateColor.SampleLevel(GlobalBilinearClampedSampler, UpsampleResult.UV, 0);
|
|
}
|
|
float SeparateTranslucencyBackgroundVisibility = SeparateTranslucencyColor.a;
|
|
float GreyScaleModulateColorBackgroundVisibility = dot(SeparateTranslucencyModulateColor.rgb, float3(1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f));
|
|
|
|
// This matches what is done in ComposeSeparateTranslucency.usf
|
|
OutputFinalSceneColor.rgb = OutputFinalSceneColor.rgb * SeparateTranslucencyBackgroundVisibility * SeparateTranslucencyModulateColor.rgb + SeparateTranslucencyColor.rgb;
|
|
// Also stores BackgroundVisibility (=transmittance) in alpha
|
|
float FinalSceneVisibility = 1.0 - OutputFinalSceneColor.a;
|
|
OutputFinalSceneColor.a = FinalSceneVisibility * SeparateTranslucencyBackgroundVisibility * GreyScaleModulateColorBackgroundVisibility;
|
|
|
|
// Convert from visibility to coverage to comply with the following process
|
|
OutputFinalSceneColor.a = 1.0f - OutputFinalSceneColor.a;
|
|
}
|
|
|
|
// Convert alpha channel from opacity back to translucency.
|
|
OutputFinalSceneColor.a = 1 - OutputFinalSceneColor.a;
|
|
|
|
// Ensure that alpha values that are expected to be opaque (but are only close to opaque) are forced to be opaque.
|
|
// (0.005 chosen to accomodate handling of 1/255)
|
|
#if CONFIG_DOF_ALPHA
|
|
OutputFinalSceneColor.a = select(OutputFinalSceneColor.a < 0.005, 0.0, OutputFinalSceneColor.a);
|
|
OutputFinalSceneColor.a = select(OutputFinalSceneColor.a > 0.995, 1.0, OutputFinalSceneColor.a);
|
|
#endif
|
|
|
|
// Debug optimisation colors.
|
|
#if 0
|
|
{
|
|
float3 DebugColor;
|
|
|
|
if (bGatherFullRes)
|
|
{
|
|
// RED: Full res gather.
|
|
Debug = float4(1.0, 0.0, 0.0, 0.0);
|
|
}
|
|
else if (bCanReturnForegroundOnly)
|
|
{
|
|
// GREEN: Foreground is the cheapest.
|
|
Debug = float4(0.0, 1.0, 0.0, 0.0);
|
|
}
|
|
else
|
|
{
|
|
// BLUE: Fetch foreground and background.
|
|
Debug = float4(0.0, 0.0, 1.0, 0.0);
|
|
}
|
|
}
|
|
#elif 0
|
|
{
|
|
if (bGatherFullRes)
|
|
{
|
|
Debug = float4(1.0, 0.0, 0.0, 0.0);
|
|
}
|
|
else
|
|
{
|
|
Debug = float4(0.0, 1.0, 0.0, 0.0);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#if 1 // Lower VGPR footprint.
|
|
uint2 OutputPixelPosition = InputParameters.SceneBufferUV * View.BufferSizeAndInvSize.xy;
|
|
#else
|
|
uint2 OutputPixelPosition = ViewportRect.xy + DispatchThreadId;
|
|
#endif
|
|
|
|
#if CONFIG_DOF_ALPHA
|
|
SceneColorOutput[OutputPixelPosition] = OutputFinalSceneColor;
|
|
#else
|
|
SceneColorOutput[OutputPixelPosition] = float4(OutputFinalSceneColor.rgb, 0);
|
|
#endif
|
|
|
|
|
|
#if DEBUG_OUTPUT
|
|
{
|
|
DebugOutput[OutputPixelPosition] = Debug;
|
|
}
|
|
#endif
|
|
}
|