Files
UnrealEngine/Engine/Shaders/Private/TemporalAA.usf
2025-05-18 13:04:45 +08:00

2416 lines
70 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
#define EYE_ADAPTATION_LOOSE_PARAMETERS 1
#include "Common.ush"
#include "Random.ush"
#include "EyeAdaptationCommon.ush"
#include "TextureSampling.ush"
#include "MonteCarlo.ush"
#include "Quantization.ush"
//------------------------------------------------------- COMPILER CONFIG
// Generate vector truncation warnings to errors.
#pragma warning(error: 3206)
//------------------------------------------------------- ENUM VALUES
/** Payload of the history. History might still have addtional TAA internals. */
// Only have RGB.
#define HISTORY_PAYLOAD_RGB 0
// Have RGB and translucency in alpha.
#define HISTORY_PAYLOAD_RGB_TRANSLUCENCY 1
// Have RGB and opacity in alpha.
#define HISTORY_PAYLOAD_RGB_OPACITY (HISTORY_PAYLOAD_RGB_TRANSLUCENCY)
// Have RGB and DOF's CocRadius in alpha.
#define HISTORY_PAYLOAD_RGB_COC 2
// Have RGB, opacity in alpha and DOF's CocRadius in separate's Red.
#define HISTORY_PAYLOAD_RGB_OPACITY_COC 3
/** Caching method for scene color. */
// Disable any in code cache.
#define AA_SAMPLE_CACHE_METHOD_DISABLE 0
// Caches 3x3 Neighborhood into VGPR (although my have corner optimised away).
#define AA_SAMPLE_CACHE_METHOD_VGPR_3X3 1
// Prefetches scene color into 10x10 LDS tile (8x8 when screen percentage < 71%).
#define AA_SAMPLE_CACHE_METHOD_LDS 2
/** Clamping method for scene color. */
// Min max neighboorhing samples.
#define HISTORY_CLAMPING_BOX_MIN_MAX 0
// Variance computed from neighboorhing samples.
#define HISTORY_CLAMPING_BOX_VARIANCE 1
// Min max samples that are within distance from output pixel.
#define HISTORY_CLAMPING_BOX_SAMPLE_DISTANCE 2
/** Qualities */
#define TAA_QUALITY_LOW 0
#define TAA_QUALITY_MEDIUM 1
#define TAA_QUALITY_HIGH 2
#define TAA_QUALITY_MEDIUM_HIGH 3
//------------------------------------------------------- CONFIGS
// Compute shaders always do responsive TAA in a single pass.
#if COMPUTESHADER
#define AA_SINGLE_PASS_RESPONSIVE SHADING_PATH_DEFERRED //The StencilTexture is not available on the mobile platform
#endif
#if TAA_PASS_CONFIG == 0 // Main
#if TAA_ALPHA_CHANNEL
#define AA_HISTORY_PAYLOAD (HISTORY_PAYLOAD_RGB_TRANSLUCENCY)
#else
#define AA_HISTORY_PAYLOAD (HISTORY_PAYLOAD_RGB)
#endif
#define AA_BICUBIC 1
#define AA_CROSS 2
#define AA_DYNAMIC 1
#define AA_MANUALLY_CLAMP_HISTORY_UV 1
#define AA_TONE 1
#define AA_YCOCG 1
#if TAA_QUALITY == TAA_QUALITY_LOW
#define AA_FILTERED 0
#define AA_DYNAMIC_ANTIGHOST 0
#elif TAA_QUALITY == TAA_QUALITY_MEDIUM
#define AA_FILTERED 1
#define AA_DYNAMIC_ANTIGHOST 0
#elif TAA_QUALITY == TAA_QUALITY_HIGH
#define AA_FILTERED 1
#define AA_DYNAMIC_ANTIGHOST 1
#elif TAA_QUALITY == TAA_QUALITY_MEDIUM_HIGH
#define AA_FILTERED 1
#define AA_DYNAMIC_ANTIGHOST 1
#else
#error Unknown TAA quality
#endif
#if COMPUTESHADER
#if AA_MOBILE_CONFIG
#define AA_SAMPLE_CACHE_METHOD (AA_SAMPLE_CACHE_METHOD_DISABLE) // the shared memory is not as efficient as expected on mobile devices, try not to use it on mobile devices.
#else
#define AA_SAMPLE_CACHE_METHOD (AA_SAMPLE_CACHE_METHOD_LDS)
#endif
#endif
#elif TAA_PASS_CONFIG == 7 // Used for Hair
#define AA_HISTORY_PAYLOAD (HISTORY_PAYLOAD_RGB_OPACITY)
#define AA_DYNAMIC 1
#define AA_FILTERED 0
#define AA_LERP 3
#define AA_MANUALLY_CLAMP_HISTORY_UV 1
#define AA_YCOCG 0
#elif TAA_PASS_CONFIG == 3 // Used for SSR
#define AA_HISTORY_PAYLOAD (HISTORY_PAYLOAD_RGB_OPACITY)
#define AA_DYNAMIC 1
#define AA_FILTERED 1
#define AA_LERP 8
#define AA_MANUALLY_CLAMP_HISTORY_UV 1
#define AA_YCOCG 1
#elif TAA_PASS_CONFIG == 4 // Used for LightShaft
#define AA_HISTORY_PAYLOAD (HISTORY_PAYLOAD_RGB_OPACITY)
#define AA_FILTERED 1
#define AA_LERP 64
#define AA_MANUALLY_CLAMP_HISTORY_UV 1
#define AA_YCOCG 1
#define AA_LOWER_RESOLUTION 1
#elif TAA_PASS_CONFIG == 1 || TAA_PASS_CONFIG == 2 // MainUpsampling & MainSuperSampling
#if TAA_ALPHA_CHANNEL
#define AA_HISTORY_PAYLOAD (HISTORY_PAYLOAD_RGB_TRANSLUCENCY)
#else
#define AA_HISTORY_PAYLOAD (HISTORY_PAYLOAD_RGB)
#endif
#define AA_BICUBIC 1
#define AA_CROSS 1
#define AA_DYNAMIC 1
#define AA_MANUALLY_CLAMP_HISTORY_UV 1
#define AA_TONE 1
#if SWITCH_PROFILE || SWITCH_PROFILE_FORWARD
#define AA_YCOCG 0
#else
#define AA_YCOCG 1
#endif
#define AA_UPSAMPLE 1
#define AA_UPSAMPLE_ADAPTIVE_FILTERING 1
#if TAA_QUALITY == TAA_QUALITY_LOW
#define AA_FILTERED 0
#if AA_MOBILE_CONFIG
#define AA_SAMPLES 5
#else
#define AA_SAMPLES 6
#endif
#elif TAA_QUALITY == TAA_QUALITY_MEDIUM
#define AA_FILTERED 1
#if AA_MOBILE_CONFIG
#define AA_SAMPLES 5
#else
#define AA_SAMPLES 6
#endif
#elif TAA_QUALITY == TAA_QUALITY_HIGH
#define AA_HISTORY_CLAMPING_BOX (HISTORY_CLAMPING_BOX_SAMPLE_DISTANCE)
#define AA_FILTERED 1
#define AA_DYNAMIC_ANTIGHOST 1
#define AA_SAMPLES 9
#elif TAA_QUALITY == TAA_QUALITY_MEDIUM_HIGH
#define AA_FILTERED 1
#if AA_MOBILE_CONFIG
#define AA_SAMPLES 5
#else
#define AA_SAMPLES 6
#endif
#define AA_DYNAMIC_ANTIGHOST 1
#else
#error Unknown TAA quality
#endif
#if COMPUTESHADER
// Do not use LDS caching for screen percentage > 100% or < 50%.
#if TAA_SCREEN_PERCENTAGE_RANGE == 2 || TAA_SCREEN_PERCENTAGE_RANGE == 3
#define AA_SAMPLE_CACHE_METHOD (AA_SAMPLE_CACHE_METHOD_VGPR_3X3)
#else
#if AA_MOBILE_CONFIG
#define AA_SAMPLE_CACHE_METHOD (AA_SAMPLE_CACHE_METHOD_DISABLE) // the shared memory is not as efficient as expected on mobile devices, try not to use it on mobile devices.
#else
#define AA_SAMPLE_CACHE_METHOD (AA_SAMPLE_CACHE_METHOD_LDS)
#endif
#endif
#endif
#elif TAA_PASS_CONFIG == 5 || TAA_PASS_CONFIG == 6 // Used for diaphragm DOF pre-filtering.
#if TAA_ALPHA_CHANNEL
#define AA_HISTORY_PAYLOAD (HISTORY_PAYLOAD_RGB_OPACITY_COC)
#else
#define AA_HISTORY_PAYLOAD (HISTORY_PAYLOAD_RGB_COC)
#endif
#define AA_CROSS 4 // because running at half res.
#define AA_DYNAMIC 1
#define AA_FORCE_ALPHA_CLAMP 1
#define AA_MANUALLY_CLAMP_HISTORY_UV 1
#define AA_LOWER_RESOLUTION 1
#if TAA_PASS_CONFIG == 6
#define AA_UPSAMPLE 1
#define AA_UPSAMPLE_ADAPTIVE_FILTERING 1
#else
#define AA_UPSAMPLE 0
#endif
#if TAA_QUALITY == TAA_QUALITY_MEDIUM
#if AA_UPSAMPLE
#define AA_BICUBIC 1
#define AA_FILTERED 1
#endif
#elif TAA_QUALITY == TAA_QUALITY_HIGH
#define AA_BICUBIC 1
#define AA_FILTERED 1
#define AA_YCOCG 1
#elif TAA_QUALITY == TAA_QUALITY_MEDIUM_HIGH
#if AA_UPSAMPLE
#define AA_BICUBIC 1
#define AA_FILTERED 1
#endif
#else
#error Unknown TAA quality
#endif
#if TAA_SCREEN_PERCENTAGE_RANGE != 2 && AA_UPSAMPLE
//#define AA_SAMPLE_CACHE_METHOD (AA_SAMPLE_CACHE_METHOD_LDS) // TODO: doesn't work yet.
#endif
#else
#error Unknown TAA pass config. Have you changed ETAAPassConfig without updating me?
#endif
#if TAA_SCREEN_PERCENTAGE_RANGE == 2
#define AA_DOWNSAMPLE 1
#else
#define AA_DOWNSAMPLE 0
#endif
//------------------------------------------------------- CONFIG DISABLED DEFAULTS
// Num samples of current frame
#ifndef AA_SAMPLES
#define AA_SAMPLES 5
#endif
// 1 = Use tighter AABB clamp for history.
// 0 = Use simple min/max clamp.
#ifndef AA_CLIP
#define AA_CLIP 0
#endif
// Cross distance in pixels used in depth search X pattern.
// 0 = Turn this feature off.
// 2 = Is required for standard temporal AA pass.
#ifndef AA_CROSS
#define AA_CROSS 0
#endif
// 1 = Use dynamic motion.
// 0 = Skip dynamic motion, currently required for half resolution passes.
#ifndef AA_DYNAMIC
#define AA_DYNAMIC 0
#endif
// 0 = Dynamic motion based lerp value (default).
// non-zero = Use 1/LERP fixed lerp value (used for reflections).
#ifndef AA_LERP
#define AA_LERP 0
#endif
// 1 = Use higher quality round clamp.
// 0 = Use lower quality but faster box clamp.
#ifndef AA_ROUND
#define AA_ROUND 0
#endif
// Force clamp on alpha.
#ifndef AA_FORCE_ALPHA_CLAMP
#define AA_FORCE_ALPHA_CLAMP 0
#endif
// Use YCoCg path.
#ifndef AA_YCOCG
#define AA_YCOCG 0
#endif
// Bicubic filter history
#ifndef AA_BICUBIC
#define AA_BICUBIC 0
#endif
// Tone map to kill fireflies
#ifndef AA_TONE
#define AA_TONE 0
#endif
// Antighosting using dynamic mask
#ifndef AA_DYNAMIC_ANTIGHOST
#define AA_DYNAMIC_ANTIGHOST 0
#endif
// Sample the stencil buffer inline rather than multiple masked passes.
#ifndef AA_SINGLE_PASS_RESPONSIVE
#define AA_SINGLE_PASS_RESPONSIVE 0
#endif
// Upsample the output.
#ifndef AA_UPSAMPLE
#define AA_UPSAMPLE 0
#endif
// Method used for generating the history clamping box.
#ifndef AA_HISTORY_CLAMPING_BOX
#define AA_HISTORY_CLAMPING_BOX (HISTORY_CLAMPING_BOX_MIN_MAX)
#endif
// Change the upsampling filter size when history is rejected that reduce blocky output pixels.
#ifndef AA_UPSAMPLE_ADAPTIVE_FILTERING
#define AA_UPSAMPLE_ADAPTIVE_FILTERING 0
#endif
// Whether this pass run at lower resolution than main view rectangle.
#ifndef AA_LOWER_RESOLUTION
#define AA_LOWER_RESOLUTION 0
#endif
// Whether the history buffer UV should be manually clamped.
#ifndef AA_MANUALLY_CLAMP_HISTORY_UV
#define AA_MANUALLY_CLAMP_HISTORY_UV 0
#endif
//------------------------------------------------------- CONFIG ENABLED DEFAULTS
// Always enable scene color filtering
// 1 = Use filtered sample.
// 0 = Use center sample.
#ifndef AA_FILTERED
#define AA_FILTERED 1
#endif
// Always enable AA_NAN to avoid all NAN in all TAA pass that is more convenient considering the amount of / 0 we can have.
// 0 = Don't use.
// 1 = Use extra clamp to avoid NANs
#ifndef AA_NAN
#define AA_NAN 1
#endif
// Neighborhood clamping. Disable for testing reprojection. Always enabled, well because TAA is totally broken otherwise.
#ifndef AA_CLAMP
#define AA_CLAMP 1
#endif
// By default, always cache neighbooring samples into VGPR.
#ifndef AA_SAMPLE_CACHE_METHOD
#if COMPUTESHADER
#define AA_SAMPLE_CACHE_METHOD (AA_SAMPLE_CACHE_METHOD_VGPR_3X3)
#else
#define AA_SAMPLE_CACHE_METHOD (AA_SAMPLE_CACHE_METHOD_DISABLE)
#endif
#endif
// By default, enable stocastic quantization of the output.
#ifndef AA_ENABLE_STOCASTIC_QUANTIZATION
#define AA_ENABLE_STOCASTIC_QUANTIZATION 1
#endif
//------------------------------------------------------- MENDATORY CONFIG
#ifndef AA_HISTORY_PAYLOAD
#error You forgot to defines the history payload.
#endif
//------------------------------------------------------- DERIVES
// Defines number of component in history payload.
#if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB
#define HISTORY_PAYLOAD_COMPONENTS 3
#elif AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_OPACITY_COC
#define HISTORY_PAYLOAD_COMPONENTS 5
#else
#define HISTORY_PAYLOAD_COMPONENTS 4
#endif
// Defines the number of render target to store TAA's history.
#if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_OPACITY_COC
#define HISTORY_RENDER_TARGETS 2
#else
#define HISTORY_RENDER_TARGETS 1
#endif
//------------------------------------------------------- CONFIG CHECKS
#if AA_SAMPLES != 9 && AA_SAMPLES != 5 && AA_SAMPLES != 6
#error Samples must be 5, (6 for TAAU) or 9
#endif
#if AA_SAMPLE_CACHE_METHOD >= 2 && !COMPUTESHADER
#error Group share only for compute shader.
#endif
//------------------------------------------------------- CONSTANTS
// K = Center of the nearest input pixel.
// O = Center of the output pixel.
//
// | |
// 0 | 1 | 2
// | |
// | |
// --------+-----------+--------
// | |
// | O |
// 3 | K | 5
// | |
// | |
// --------+-----------+--------
// | |
// | |
// 6 | 7 | 8
// | |
//
static const int2 kOffsets3x3[9] =
{
int2(-1, -1),
int2( 0, -1),
int2( 1, -1),
int2(-1, 0),
int2( 0, 0), // K
int2( 1, 0),
int2(-1, 1),
int2( 0, 1),
int2( 1, 1),
};
// Indexes of the 3x3 square.
static const uint kSquareIndexes3x3[9] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 };
// Indexes of the offsets to have plus + shape.
static const uint kPlusIndexes3x3[5] = { 1, 3, 4, 5, 7 };
// Number of neighbors.
static const uint kNeighborsCount = 9;
#if AA_UPSAMPLE
// T = Center of the nearest top left pixel input pixel.
// O = Center of the output pixel.
//
// |
// T | .
// |
// O |
// --------+--------
// |
// |
// . | .
// |
static const int2 Offsets2x2[4] =
{
int2( 0, 0), // T
int2( 1, 0),
int2( 0, 1),
int2( 1, 1),
};
// Indexes of the 2x2 square.
static const uint SquareIndexes2x2[4] = { 0, 1, 2, 3 };
#endif // AA_UPSAMPLE
//------------------------------------------------------- PARAMETERS
float HistoryPreExposureCorrection;
float CurrentFrameWeight;
int bCameraCut;
DECLARE_SCALAR_ARRAY(float, SampleWeights, 9);
DECLARE_SCALAR_ARRAY(float, PlusWeights, 5);
float4 ViewportUVToInputBufferUV;
float4 MaxViewportUVAndSvPositionToViewportUV;
float2 ScreenPosAbsMax;
float4 ScreenPosToHistoryBufferUV;
float4 InputSceneColorSize;
int2 InputMinPixelCoord;
int2 InputMaxPixelCoord;
Texture2D InputSceneColor;
SamplerState InputSceneColorSampler;
Texture2D InputSceneMetadata;
SamplerState InputSceneMetadataSampler;
Texture2D SceneDepthTexture;
SamplerState SceneDepthTextureSampler;
#if COMPILER_GLSL_ES3_1
Texture2D<uint4> GBufferVelocityTextureSRV;
#else
Texture2D GBufferVelocityTexture;
SamplerState GBufferVelocityTextureSampler;
#endif
Texture2D<uint2> StencilTexture;
Texture2D HistoryBuffer_0;
Texture2D HistoryBuffer_1;
SamplerState HistoryBufferSampler_0;
SamplerState HistoryBufferSampler_1;
float4 HistoryBufferSize;
float4 HistoryBufferUVMinMax;
float CoCBilateralFilterStrength;
float4 OutputViewportSize;
float4 OutputViewportRect;
float3 OutputQuantizationError;
#if COMPUTESHADER
RWTexture2D<float4> OutComputeTex_0;
#if TAA_DOWNSAMPLE
// This shader permutation outputs half resolution image in addition to main full-res one.
// It is more efficient than performing a separate downsampling pass afterwards.
RWTexture2D<float4> OutComputeTexDownsampled;
groupshared float4 GroupSharedDownsampleArray[THREADGROUP_SIZEX*THREADGROUP_SIZEY]; // TODO: share this with GroupSharedArrayF4 when possible
#endif // TAA_DOWNSAMPLE
#if HISTORY_RENDER_TARGETS == 2
RWTexture2D<float4> OutComputeTex_1;
#endif // HISTORY_RENDER_TARGETS == 2
#endif
// Temporal upsample specific params.
#if AA_UPSAMPLE
float2 InputViewMin;
float4 InputViewSize;
// Temporal jitter at the pixel scale.
float2 TemporalJitterPixels;
float ScreenPercentage;
float UpscaleFactor; // = 1 / ScreenPercentage
#endif // AA_UPSAMPLE
//------------------------------------------------------- FUNCTIONS
#if COMPILER_SUPPORTS_HLSL2021
template<typename T>
void CorrectExposure(inout T X)
{
X *= HistoryPreExposureCorrection;
}
#endif
ENCODED_VELOCITY_TYPE SampleVelocityTexture(float2 BufferUV, int2 PixelOffset = int2(0, 0))
{
#if COMPILER_GLSL_ES3_1
int2 Coord = int2(BufferUV * InputSceneColorSize.xy) + PixelOffset;
return GBufferVelocityTextureSRV.Load(int3(Coord, 0));
#else
return GBufferVelocityTexture.SampleLevel(GBufferVelocityTextureSampler, BufferUV, 0, PixelOffset);
#endif
}
float3 RGBToYCoCg( float3 RGB )
{
float Y = dot( RGB, float3( 1, 2, 1 ) );
float Co = dot( RGB, float3( 2, 0, -2 ) );
float Cg = dot( RGB, float3( -1, 2, -1 ) );
float3 YCoCg = float3( Y, Co, Cg );
return YCoCg;
}
float3 YCoCgToRGB( float3 YCoCg )
{
float Y = YCoCg.x * 0.25;
float Co = YCoCg.y * 0.25;
float Cg = YCoCg.z * 0.25;
float R = Y + Co - Cg;
float G = Y + Cg;
float B = Y - Co - Cg;
float3 RGB = float3( R, G, B );
return RGB;
}
// Faster but less accurate luma computation.
// Luma includes a scaling by 4.
float Luma4(float3 Color)
{
return (Color.g * 2.0) + (Color.r + Color.b);
}
// Optimized HDR weighting function.
float HdrWeight4(float3 Color, float Exposure)
{
return rcp(Luma4(Color) * Exposure + 4.0);
}
float HdrWeightY(float Color, float Exposure)
{
return rcp(Color * Exposure + 4.0);
}
// Intersect ray with AABB, knowing there is an intersection.
// Dir = Ray direction.
// Org = Start of the ray.
// Box = Box is at {0,0,0} with this size.
// Returns distance on line segment.
float IntersectAABB(float3 Dir, float3 Org, float3 Box)
{
#if PS4_PROFILE
// This causes flicker, it should only be used on PS4 until proper fix is in.
if(min(min(abs(Dir.x), abs(Dir.y)), abs(Dir.z)) < (1.0/65536.0)) return 1.0;
#endif
float3 RcpDir = rcp(Dir);
float3 TNeg = ( Box - Org) * RcpDir;
float3 TPos = ((-Box) - Org) * RcpDir;
return max(max(min(TNeg.x, TPos.x), min(TNeg.y, TPos.y)), min(TNeg.z, TPos.z));
}
float HistoryClip(float3 History, float3 Filtered, float3 NeighborMin, float3 NeighborMax)
{
#if 0
float3 Min = min(Filtered, min(NeighborMin, NeighborMax));
float3 Max = max(Filtered, max(NeighborMin, NeighborMax));
float3 Avg2 = Max + Min;
float3 Dir = Filtered - History;
float3 Org = History - Avg2 * 0.5;
float3 Scale = Max - Avg2 * 0.5;
return saturate(IntersectAABB(Dir, Org, Scale));
#else
float3 BoxMin = NeighborMin;
float3 BoxMax = NeighborMax;
//float3 BoxMin = min( Filtered, NeighborMin );
//float3 BoxMax = max( Filtered, NeighborMax );
float3 RayOrigin = History;
float3 RayDir = Filtered - History;
RayDir = select(abs( RayDir ) < (1.0/65536.0), (1.0/65536.0), RayDir);
float3 InvRayDir = rcp( RayDir );
float3 MinIntersect = (BoxMin - RayOrigin) * InvRayDir;
float3 MaxIntersect = (BoxMax - RayOrigin) * InvRayDir;
float3 EnterIntersect = min( MinIntersect, MaxIntersect );
return max3( EnterIntersect.x, EnterIntersect.y, EnterIntersect.z );
#endif
}
float2 WeightedLerpFactors(float WeightA, float WeightB, float Blend)
{
float BlendA = (1.0 - Blend) * WeightA;
float BlendB = Blend * WeightB;
float RcpBlend = rcp(BlendA + BlendB);
BlendA *= RcpBlend;
BlendB *= RcpBlend;
return float2(BlendA, BlendB);
}
#if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_COC || AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_OPACITY_COC
// Computes the bilateral weight according to two Coc radii.
float ComputeBilateralWeight(float RefCocRadius, float SampleCocRadius)
{
float Factor = (abs(RefCocRadius) > 1 ? rcp(abs(RefCocRadius)) : 1.0);
return saturate(1 - abs(RefCocRadius - SampleCocRadius) * Factor);
}
float ComputeNeightborSampleBilateralWeight(float CenterCocRadius, float SampleCocRadius)
{
float Factor = (abs(CenterCocRadius) > 1 ? rcp(abs(CenterCocRadius)) : 1.0);
return saturate(1 - (CenterCocRadius - SampleCocRadius) * Factor);
}
#endif
//------------------------------------------------------- HISTORY's PAYLOAD
// Payload of the TAA's history.
struct FTAAHistoryPayload
{
// Transformed scene color and alpha channel.
float4 Color;
// Radius of the circle of confusion for DOF.
float CocRadius;
};
FTAAHistoryPayload MulPayload(in FTAAHistoryPayload Payload, in float x)
{
Payload.Color *= x;
Payload.CocRadius *= x;
return Payload;
}
FTAAHistoryPayload AddPayload(in FTAAHistoryPayload Payload0, in FTAAHistoryPayload Payload1)
{
Payload0.Color += Payload1.Color;
Payload0.CocRadius += Payload1.CocRadius;
return Payload0;
}
FTAAHistoryPayload MinPayload(in FTAAHistoryPayload Payload0, in FTAAHistoryPayload Payload1)
{
Payload0.Color = min(Payload0.Color, Payload1.Color);
Payload0.CocRadius = min(Payload0.CocRadius, Payload1.CocRadius);
return Payload0;
}
FTAAHistoryPayload MaxPayload(in FTAAHistoryPayload Payload0, in FTAAHistoryPayload Payload1)
{
Payload0.Color = max(Payload0.Color, Payload1.Color);
Payload0.CocRadius = max(Payload0.CocRadius, Payload1.CocRadius);
return Payload0;
}
FTAAHistoryPayload MinPayload3(in FTAAHistoryPayload Payload0, in FTAAHistoryPayload Payload1, in FTAAHistoryPayload Payload2)
{
Payload0.Color = min3(Payload0.Color, Payload1.Color, Payload2.Color);
Payload0.CocRadius = min3(Payload0.CocRadius, Payload1.CocRadius, Payload2.CocRadius);
return Payload0;
}
FTAAHistoryPayload MaxPayload3(in FTAAHistoryPayload Payload0, in FTAAHistoryPayload Payload1, in FTAAHistoryPayload Payload2)
{
Payload0.Color = max3(Payload0.Color, Payload1.Color, Payload2.Color);
Payload0.CocRadius = max3(Payload0.CocRadius, Payload1.CocRadius, Payload2.CocRadius);
return Payload0;
}
//------------------------------------------------------- TAA INTERMEDIARY STRUCTURES
// Output pixel parameters. Should not be modified once setup.
struct FTAAInputParameters
{
// Compute shader dispatch params, set to 0 in pixel shader.
uint2 GroupId;
uint2 GroupThreadId;
uint GroupThreadIndex;
// Viewport UV of the output pixel.
float2 ViewportUV;
// Position of the output pixel on screen.
float2 ScreenPos;
// Buffer UV of the nearest input pixel.
float2 NearestBufferUV;
#if AA_UPSAMPLE
// Buffer UV of the nearest top left input pixel.
float2 NearestTopLeftBufferUV;
#endif
// Whether this pixel should be responsive.
float bIsResponsiveAAPixel;
// Frame exposure's scale.
float FrameExposureScale;
// Cache of neightbors' transformed scene color.
#if AA_SAMPLE_CACHE_METHOD == AA_SAMPLE_CACHE_METHOD_VGPR_3X3
float4 CachedNeighbors0[kNeighborsCount];
#if HISTORY_RENDER_TARGETS == 2
float CachedNeighbors1[kNeighborsCount];
#endif
#endif
};
// Intermediary results for major function to conveniently share values between them.
//
// it is allowed to passdown this to major function with still unitialized variables.
struct FTAAIntermediaryResult
{
// The filtered input.
FTAAHistoryPayload Filtered;
// Temporal weight of the filtered input.
float FilteredTemporalWeight;
// 1 / filtering kernel scale factor for AA_UPSAMPLE_ADAPTIVE_FILTERING.
float InvFilterScaleFactor;
};
// Create intermediary result.
FTAAIntermediaryResult CreateIntermediaryResult()
{
// Disable warning X4008: floating point division by zero
#pragma warning(disable:4008)
FTAAIntermediaryResult IntermediaryResult = (FTAAIntermediaryResult) (1.0 / 0.0);
#pragma warning(default:4008)
IntermediaryResult.FilteredTemporalWeight = 1;
IntermediaryResult.InvFilterScaleFactor = 1;
return IntermediaryResult;
}
// Transformed scene color's data for a sample.
struct FTAASceneColorSample
{
// Transformed scene color and alpha channel.
float4 Color;
// Radius of the circle of confusion for DOF.
float CocRadius;
// HDR weight of the scene color sample.
float HdrWeight;
};
//------------------------------------------------------- SCENE COLOR SPACE MANAGMENT
// Transform RAW linear scene color RGB to TAA's working color space.
float4 TransformSceneColor(float4 RawLinearSceneColorRGBA)
{
#if AA_YCOCG
return float4(RGBToYCoCg(RawLinearSceneColorRGBA.rgb), RawLinearSceneColorRGBA.a);
#else
return RawLinearSceneColorRGBA;
#endif
}
// Reciprocal of TransformSceneColor().
float4 TransformBackToRawLinearSceneColor(float4 SceneColor)
{
#if AA_YCOCG
return float4(YCoCgToRGB(SceneColor.xyz), SceneColor.a);
#else
return SceneColor;
#endif
}
// Transform current frame's RAW scene color RGB to TAA's working color space.
float4 TransformCurrentFrameSceneColor(float4 RawSceneColorRGBA)
{
return TransformSceneColor(RawSceneColorRGBA);
}
// Get the Luma4 of the sceneColor
float GetSceneColorLuma4(float4 SceneColor)
{
#if AA_YCOCG
return SceneColor.x;
#else
return Luma4(SceneColor.rgb);
#endif
}
// Get the HDR weight of the transform scene color.
float GetSceneColorHdrWeight(
in FTAAInputParameters InputParams,
float4 SceneColor)
{
#if AA_YCOCG
return HdrWeightY(SceneColor.x, InputParams.FrameExposureScale);
#else
return HdrWeight4(SceneColor.rgb, InputParams.FrameExposureScale);
#endif
}
//------------------------------------------------------- INPUT SAMPLE CACHING.
// API to sample input scene color and depth through caching system.
//
// Precache scene color or depth:
// PrecacheInputSceneColor(InputParams);
// PrecacheInputSceneDepth(InputParams);
//
// Then sample scene color or depth:
// SampleCachedSceneColorTexture(InputParams, /* Offset = */ int2(-1, -1));
// SampleCachedSceneDepthTexture(InputParams, /* Offset = */ int2(-1, -1));
//
// <Offset> parameter is meant to be compile time constant of the pixel offset from nearest input sample.
#if AA_SAMPLE_CACHE_METHOD == AA_SAMPLE_CACHE_METHOD_VGPR_3X3
//------------------------------------------------------- 3x3 NEIGHTBORS CACHING
#define AA_PRECACHE_SCENE_COLOR 1
void PrecacheInputSceneColor(inout FTAAInputParameters InputParams)
{
// Precache 3x3 input scene color into FTAAInputParameters::CachedNeighbors.
UNROLL
for (uint i = 0; i < kNeighborsCount; i++)
{
int2 Coord = int2(InputParams.NearestBufferUV * InputSceneColorSize.xy) + kOffsets3x3[i];
Coord = clamp(Coord, InputMinPixelCoord, InputMaxPixelCoord);
InputParams.CachedNeighbors0[i] = TransformCurrentFrameSceneColor(InputSceneColor[Coord]);
#if HISTORY_RENDER_TARGETS == 2
InputParams.CachedNeighbors1[i] = InputSceneMetadata[Coord].r;
#endif
}
}
FTAASceneColorSample SampleCachedSceneColorTexture(
inout FTAAInputParameters InputParams,
int2 PixelOffset)
{
// PixelOffset is const at compile time. Therefore all this computaton is actually free.
uint NeighborsId = uint(4 + PixelOffset.x + PixelOffset.y * 3);
FTAASceneColorSample Sample;
Sample.Color = InputParams.CachedNeighbors0[NeighborsId];
Sample.CocRadius = 0;
#if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_COC
Sample.CocRadius = Sample.Color.a;
#elif AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_OPACITY_COC
Sample.CocRadius = InputParams.CachedNeighbors1[NeighborsId];
#endif
Sample.HdrWeight = GetSceneColorHdrWeight(InputParams, Sample.Color);
return Sample;
}
#elif AA_SAMPLE_CACHE_METHOD == AA_SAMPLE_CACHE_METHOD_LDS
//-------------------------------- Generic LDS functions.
// Returns SV_GroupIndex.
uint GetGroupIndex(in FTAAInputParameters InputParams)
{
if (0)
{
// Group thread index doesn't actually suppress ALU instruction on consoles.
return InputParams.GroupThreadIndex;
}
return InputParams.GroupThreadId.x + InputParams.GroupThreadId.y * THREADGROUP_SIZEX;
}
//------------------------------------------------------- COMPUTE SHADER GROUP SHARE OPTIMIZATION
#if THREADGROUP_SIZEX != THREADGROUP_SIZEY
#error AA_SAMPLE_CACHE_METHOD >= 2 assume square shaped tiles.
#endif
#ifndef TAA_SCREEN_PERCENTAGE_RANGE
#error LDS cache needs to know the screen percentage range.
#endif
#define AA_PRECACHE_IMPLEMENTATIONS 1
// Total number of thread per group.
#define THREADGROUP_TOTAL (THREADGROUP_SIZEX * THREADGROUP_SIZEY)
// Configure base width of the LDS tile according to screen percentage range.
// If screen percentage is lower than 75%, we can set the base tile width to 6, which will subsequently create a scene color
// tile width of 8 that can be cache with only one iteration per lane in PrecacheInputSceneColorToLDS(), and reduce size(LDS) = 1k.
#if TAA_SCREEN_PERCENTAGE_RANGE == 1 // screen percentage < 75%
#define LDS_BASE_TILE_WIDTH 6
#elif TAA_SCREEN_PERCENTAGE_RANGE == 0 // screen percentage in [75%; 100%]
#define LDS_BASE_TILE_WIDTH THREADGROUP_SIZEX
#elif TAA_SCREEN_PERCENTAGE_RANGE == 2 // screen percentage > 100%
#error Should not use LDS caching.
#else
#error Unknown screen percentage range.
#endif
//-------------------------------- Configuration.
// Configuration of what should be prefetched.
// 1: use Load; 2: use gather4.
#if !AA_UPSAMPLE
// Disables scene depth caching for TAA upsample because the extra screen percentage ALU is making things worst.
#define AA_PRECACHE_SCENE_DEPTH 2
#endif
// 1: use load.
#define AA_PRECACHE_SCENE_COLOR 1
// Precache GetSceneColorHdrWeight() into scene color's alpha channel.
#define AA_PRECACHE_SCENE_HDR_WEIGHT (AA_TONE && HISTORY_PAYLOAD_COMPONENTS == 3)
// Layout of scene color in LDS.
// 0: AoS
// 1: SoA
// 2: AoSoA (SoA scene color, AoS GetSceneColorHdrWeight())
#define LDS_COLOR_LAYOUT 0
//-------------------------------- Depth tile constants.
// Number of texels arround the group tile for depth.
#define LDS_DEPTH_TILE_BORDER_SIZE (AA_CROSS)
// Width in texels of the depth tile cached into LDS.
#define LDS_DEPTH_TILE_WIDTH (LDS_BASE_TILE_WIDTH + 2 * LDS_DEPTH_TILE_BORDER_SIZE)
// Total number of texels cached in the depth tile.
#define LDS_DEPTH_ARRAY_SIZE (LDS_DEPTH_TILE_WIDTH * LDS_DEPTH_TILE_WIDTH)
//-------------------------------- Scene color tile constants.
// TODO: shader permutation for screen percentage <= 75% with AA_UPSAMPLE to fit in 1k LDS.
// Number of scene color component that gets cached.
#if HISTORY_PAYLOAD_COMPONENTS == 4 || AA_PRECACHE_SCENE_HDR_WEIGHT
#define LDS_COLOR_COMPONENT_COUNT 4
#else
#define LDS_COLOR_COMPONENT_COUNT 3
#endif
// Number of texels arround the group tile for scene color.
#define LDS_COLOR_TILE_BORDER_SIZE (1)
// Width in texels of the depth tile cached into LDS.
#define LDS_COLOR_TILE_WIDTH (LDS_BASE_TILE_WIDTH + 2 * LDS_COLOR_TILE_BORDER_SIZE)
// Total number of texels cached in the scene color tile.
#define LDS_COLOR_ARRAY_SIZE (LDS_COLOR_TILE_WIDTH * LDS_COLOR_TILE_WIDTH)
//-------------------------------- Group shared global.
// Size of the LDS to be allocated.
#define LDS_ARRAY_SIZE (LDS_COLOR_ARRAY_SIZE * LDS_COLOR_COMPONENT_COUNT)
#if LDS_ARRAY_SIZE < LDS_DEPTH_ARRAY_SIZE
#error LDS_ARRAY_SIZE assumed scene color caching is bigger than scene depth caching.
#endif
// Some compilers may have issues optimising LDS store instructions, therefore we give the compiler a hint by using a float4 LDS.
#if defined(AA_PRECACHE_SCENE_DEPTH)
#define LDS_USE_FLOAT4_ARRAY 0
#else
#define LDS_USE_FLOAT4_ARRAY (LDS_COLOR_COMPONENT_COUNT == 4 && LDS_COLOR_LAYOUT == 0)
#endif
#if LDS_USE_FLOAT4_ARRAY
groupshared float4 GroupSharedArrayF4[LDS_ARRAY_SIZE/4];
#else
groupshared float GroupSharedArray[LDS_ARRAY_SIZE];
#endif
//-------------------------------- Generic LDS tile functions.
#if AA_UPSAMPLE
// Get the pixel coordinate of the nearest input pixel K for group's thread 0.
float2 GetGroupThread0InputPixelCoord(in FTAAInputParameters InputParams)
{
// Output pixel center position of the group thread index 0, relative to top left corner of the viewport.
float2 Thread0SvPosition = InputParams.GroupId * uint2(THREADGROUP_SIZEX, THREADGROUP_SIZEY) + 0.5;
// Output pixel's viewport UV group thread index 0.
float2 Thread0ViewportUV = Thread0SvPosition * OutputViewportSize.zw;
// Pixel coordinate of the center of output pixel O in the input viewport.
float2 Thread0PPCo = Thread0ViewportUV * InputViewSize.xy + TemporalJitterPixels;
// Pixel coordinate of the center of the nearest input pixel K.
float2 Thread0PPCk = floor(Thread0PPCo) + 0.5;
return InputViewMin.xy + Thread0PPCk;
}
#endif
// Get the texel offset of a LDS tile's top left corner.
uint2 GetGroupTileTexelOffset(in FTAAInputParameters InputParams, uint TileBorderSize)
{
#if AA_UPSAMPLE
{
// Pixel coordinate of the center of the nearest input pixel K.
float2 Thread0PPCk = GetGroupThread0InputPixelCoord(InputParams);
return uint2(floor(Thread0PPCk) - TileBorderSize);
}
#else // !AA_UPSAMPLE
{
return OutputViewportRect.xy + InputParams.GroupId * uint2(THREADGROUP_SIZEX, THREADGROUP_SIZEY) - TileBorderSize;
}
#endif
}
// Get the index within the LDS array.
uint GetTileArrayIndexFromPixelOffset(in FTAAInputParameters InputParams, int2 PixelOffset, uint TileBorderSize)
{
#if AA_UPSAMPLE
{
const float2 RowMultiplier = float2(1, TileBorderSize * 2 + LDS_BASE_TILE_WIDTH);
float2 Thread0PPCk = GetGroupThread0InputPixelCoord(InputParams);
float2 PPCk = InputParams.NearestBufferUV * InputSceneColorSize.xy;
float2 TilePos = floor(PPCk) - floor(Thread0PPCk);
return uint(dot(TilePos, RowMultiplier) + dot(float2(PixelOffset) + float(TileBorderSize), RowMultiplier));
}
#else
{
uint2 TilePos = InputParams.GroupThreadId + uint2(PixelOffset + TileBorderSize);
return TilePos.x + TilePos.y * (TileBorderSize * 2 + LDS_BASE_TILE_WIDTH);
}
#endif
}
//-------------------------------- Share depth texture fetches.
#if defined(AA_PRECACHE_SCENE_DEPTH)
// Precache input scene depth into LDS.
void PrecacheInputSceneDepthToLDS(in FTAAInputParameters InputParams)
{
uint2 GroupTexelOffset = GetGroupTileTexelOffset(InputParams, LDS_DEPTH_TILE_BORDER_SIZE);
#if AA_PRECACHE_SCENE_DEPTH == 1
// Prefetch depth buffer using Load.
{
const uint LoadCount = (LDS_DEPTH_ARRAY_SIZE + THREADGROUP_TOTAL - 1) / THREADGROUP_TOTAL;
uint LinearGroupThreadId = GetGroupIndex(InputParams);
UNROLL
for (uint i = 0; i < LoadCount; i++)
{
uint2 TexelLocation = GroupTexelOffset + uint2(
LinearGroupThreadId % LDS_DEPTH_TILE_WIDTH,
LinearGroupThreadId / LDS_DEPTH_TILE_WIDTH);
if ((LinearGroupThreadId < LDS_DEPTH_ARRAY_SIZE) ||
(i != LoadCount - 1) ||
(LDS_DEPTH_ARRAY_SIZE % THREADGROUP_TOTAL) == 0)
{
GroupSharedArray[LinearGroupThreadId] = SceneDepthTexture.Load(uint3(TexelLocation, 0)).x;
}
LinearGroupThreadId += THREADGROUP_TOTAL;
}
}
#elif AA_PRECACHE_SCENE_DEPTH == 2
// Prefetch depth buffer using Gather.
{
const uint LoadCount = (LDS_DEPTH_ARRAY_SIZE / 4 + THREADGROUP_TOTAL - 1) / THREADGROUP_TOTAL;
uint LinearGroupThreadId = GetGroupIndex(InputParams);
UNROLL
for (uint i = 0; i < LoadCount; i++)
{
uint2 TileDest = uint2(
(2 * LinearGroupThreadId) % LDS_DEPTH_TILE_WIDTH,
2 * ((2 * LinearGroupThreadId) / LDS_DEPTH_TILE_WIDTH));
uint2 TexelLocation = GroupTexelOffset + TileDest;
uint DestI = TileDest.x + TileDest.y * LDS_DEPTH_TILE_WIDTH;
if ((DestI < LDS_DEPTH_ARRAY_SIZE) ||
(i != LoadCount - 1) ||
((LDS_DEPTH_ARRAY_SIZE / 4) % THREADGROUP_TOTAL) == 0)
{
float2 UV = float2(TexelLocation + 0.5) * InputSceneColorSize.zw;
float4 Depth = SceneDepthTexture.Gather(SceneDepthTextureSampler, UV);
GroupSharedArray[DestI + 1 * LDS_DEPTH_TILE_WIDTH + 0] = Depth.x;
GroupSharedArray[DestI + 1 * LDS_DEPTH_TILE_WIDTH + 1] = Depth.y;
GroupSharedArray[DestI + 0 * LDS_DEPTH_TILE_WIDTH + 1] = Depth.z;
GroupSharedArray[DestI + 0 * LDS_DEPTH_TILE_WIDTH + 0] = Depth.w;
}
LinearGroupThreadId += THREADGROUP_TOTAL;
}
}
#else
#error Wrong AA_PRECACHE_SCENE_DEPTH
#endif
}
float SampleCachedSceneDepthTexture(in FTAAInputParameters InputParams, int2 PixelOffset)
{
return GroupSharedArray[GetTileArrayIndexFromPixelOffset(InputParams, PixelOffset, LDS_DEPTH_TILE_BORDER_SIZE)];
}
#endif // define(AA_PRECACHE_SCENE_DEPTH)
//-------------------------------- Share color texture fetches.
#if defined(AA_PRECACHE_SCENE_COLOR)
// Return the index GroupSharedArray from a given ArrayIndex and ComponentId.
uint GetSceneColorLDSIndex(uint ArrayIndex, uint ComponentId)
{
#if LDS_COLOR_LAYOUT == 0 // AoS
return ArrayIndex * LDS_COLOR_COMPONENT_COUNT + ComponentId;
#elif LDS_COLOR_LAYOUT == 1 // SoA
return ArrayIndex + ComponentId * LDS_COLOR_ARRAY_SIZE;
#else
#error Unknown color layout.
#endif
}
// Precache input scene color into LDS.
void PrecacheInputSceneColorToLDS(in FTAAInputParameters InputParams)
{
const uint LoadCount = (LDS_COLOR_ARRAY_SIZE + THREADGROUP_TOTAL - 1) / THREADGROUP_TOTAL;
#define LDS_FLOAT_UV AA_UPSAMPLE
#if LDS_FLOAT_UV
float LinearGroupThreadId = float(GetGroupIndex(InputParams));
float2 Thread0PPCk = GetGroupThread0InputPixelCoord(InputParams);
float2 GroupTexelOffset = Thread0PPCk - LDS_COLOR_TILE_BORDER_SIZE;
#else
uint LinearGroupThreadId = GetGroupIndex(InputParams);
uint2 GroupTexelOffset = GetGroupTileTexelOffset(InputParams, LDS_COLOR_TILE_BORDER_SIZE);
#endif
UNROLL
for (uint i = 0; i < LoadCount; i++)
{
#if LDS_FLOAT_UV
float Y = floor(LinearGroupThreadId * (1.0 / LDS_COLOR_TILE_WIDTH));
float X = LinearGroupThreadId - LDS_COLOR_TILE_WIDTH * Y;
float2 TexelLocation = GroupTexelOffset + float2(X, Y);
#else
uint2 TexelLocation = GroupTexelOffset + uint2(
LinearGroupThreadId % LDS_COLOR_TILE_WIDTH,
LinearGroupThreadId / LDS_COLOR_TILE_WIDTH);
#endif
if ((LinearGroupThreadId < LDS_COLOR_ARRAY_SIZE) ||
(i != LoadCount - 1) ||
(LDS_COLOR_ARRAY_SIZE % THREADGROUP_TOTAL) == 0)
{
#if LDS_FLOAT_UV
int2 Coord = TexelLocation;
Coord = clamp(Coord, InputMinPixelCoord, InputMaxPixelCoord);
float4 RawColor = InputSceneColor[Coord];
#else
int2 Coord = int2(TexelLocation);
Coord = clamp(Coord, InputMinPixelCoord, InputMaxPixelCoord);
float4 RawColor = InputSceneColor.Load(uint3(Coord, 0));
#endif
float4 Color = TransformCurrentFrameSceneColor(RawColor);
// Precache scene color's HDR weight into alpha channel to reduce rcp() instructions in innerloops.
#if AA_PRECACHE_SCENE_HDR_WEIGHT
Color.a = GetSceneColorHdrWeight(InputParams, Color);
#endif
#if LDS_USE_FLOAT4_ARRAY
GroupSharedArrayF4[uint(LinearGroupThreadId)] = Color;
#else
GroupSharedArray[GetSceneColorLDSIndex(uint(LinearGroupThreadId), 0)] = Color.r;
GroupSharedArray[GetSceneColorLDSIndex(uint(LinearGroupThreadId), 1)] = Color.g;
GroupSharedArray[GetSceneColorLDSIndex(uint(LinearGroupThreadId), 2)] = Color.b;
#if LDS_COLOR_COMPONENT_COUNT == 4
GroupSharedArray[GetSceneColorLDSIndex(uint(LinearGroupThreadId), 3)] = Color.a;
#endif
#endif
}
LinearGroupThreadId += THREADGROUP_TOTAL;
}
}
FTAASceneColorSample SampleCachedSceneColorTexture(
in FTAAInputParameters InputParams,
int2 PixelOffset)
{
uint ArrayPos = GetTileArrayIndexFromPixelOffset(InputParams, PixelOffset, LDS_COLOR_TILE_BORDER_SIZE);
FTAASceneColorSample Sample;
Sample.CocRadius = 0;
#if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_COC || AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_OPACITY_COC
#error Unsupported history payload with LDS scene color caching.Test
#endif
#if LDS_USE_FLOAT4_ARRAY
Sample.Color = GroupSharedArrayF4[ArrayPos];
#if AA_PRECACHE_SCENE_HDR_WEIGHT
Sample.HdrWeight = Sample.Color.a;
Sample.Color.a = 0;
#elif HISTORY_PAYLOAD_COMPONENTS != 4
#error LDS_USE_FLOAT4_ARRAY assumes 4 components.
#endif
#else
Sample.Color.r = GroupSharedArray[GetSceneColorLDSIndex(ArrayPos, 0)];
Sample.Color.g = GroupSharedArray[GetSceneColorLDSIndex(ArrayPos, 1)];
Sample.Color.b = GroupSharedArray[GetSceneColorLDSIndex(ArrayPos, 2)];
Sample.Color.a = 0;
#if HISTORY_PAYLOAD_COMPONENTS == 4
Sample.Color.a = GroupSharedArray[GetSceneColorLDSIndex(ArrayPos, 3)];
#elif AA_PRECACHE_SCENE_HDR_WEIGHT
Sample.HdrWeight = GroupSharedArray[GetSceneColorLDSIndex(ArrayPos, 3)];
#endif
#endif
// if scene color weight was not precached in LDS, compute it.
#if !AA_PRECACHE_SCENE_HDR_WEIGHT
Sample.HdrWeight = GetSceneColorHdrWeight(InputParams, Sample.Color);
#endif
// Color has already been transformed in PrecacheInputSceneColor.
return Sample;
}
#endif // defined(AA_PRECACHE_SCENE_COLOR)
void PrecacheInputSceneDepth(in FTAAInputParameters InputParams)
{
#if defined(AA_PRECACHE_SCENE_DEPTH)
PrecacheInputSceneDepthToLDS(InputParams);
GroupMemoryBarrierWithGroupSync();
#endif
}
void PrecacheInputSceneColor(in FTAAInputParameters InputParams)
{
#if defined(AA_PRECACHE_SCENE_DEPTH) && defined(AA_PRECACHE_SCENE_COLOR)
GroupMemoryBarrierWithGroupSync();
#endif
#if defined(AA_PRECACHE_SCENE_COLOR)
PrecacheInputSceneColorToLDS(InputParams);
GroupMemoryBarrierWithGroupSync();
#endif
}
#endif // AA_SAMPLE_CACHE_METHOD == AA_SAMPLE_CACHE_METHOD_LDS
//------------------------------------------------------- FALLBACK TO NO CACHING IMPLEMENTATIONS.
#if !defined(AA_PRECACHE_SCENE_DEPTH)
#if !defined(AA_PRECACHE_IMPLEMENTATIONS)
// Silently do no scene depth precaching.
void PrecacheInputSceneDepth(in FTAAInputParameters InputParams)
{ }
#endif
// Sample scene color.
float SampleCachedSceneDepthTexture(in FTAAInputParameters InputParams, int2 PixelOffset)
{
return SceneDepthTexture.SampleLevel(SceneDepthTextureSampler, InputParams.NearestBufferUV, 0, PixelOffset).r;
}
#endif
#if !defined(AA_PRECACHE_SCENE_COLOR)
#if !defined(AA_PRECACHE_IMPLEMENTATIONS)
// Silently do no scene color precaching.
void PrecacheInputSceneColor(in FTAAInputParameters InputParams)
{ }
#endif
// Sample and transform scene color.
FTAASceneColorSample SampleCachedSceneColorTexture(
in FTAAInputParameters InputParams,
int2 PixelOffset)
{
FTAASceneColorSample Sample;
int2 Coord = int2(InputParams.NearestBufferUV * InputSceneColorSize.xy) + PixelOffset;
Coord = clamp(Coord, InputMinPixelCoord, InputMaxPixelCoord);
Sample.Color = TransformCurrentFrameSceneColor(InputSceneColor[Coord]);
Sample.CocRadius = 0;
#if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_COC
Sample.CocRadius = Sample.Color.a;
#elif AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_OPACITY_COC
Sample.CocRadius = InputSceneMetadata[Coord].r;
#endif
Sample.HdrWeight = GetSceneColorHdrWeight(InputParams, Sample.Color);
return Sample;
}
#endif
//------------------------------------------------------- TEMPORAL UPSAMPLING
#if AA_UPSAMPLE
// Returns the weight of a pixels at a coordinate <PixelDelta> from the PDF highest point.
float ComputeSampleWeigth(in FTAAIntermediaryResult IntermediaryResult, float2 PixelDelta)
{
float u2 = UpscaleFactor * UpscaleFactor;
// The point of InvFilterScaleFactor is to blur current frame scene color when upscaling.
// Therefore there is no need to do it when downscaling.
if (!AA_DOWNSAMPLE)
{
u2 *= (IntermediaryResult.InvFilterScaleFactor * IntermediaryResult.InvFilterScaleFactor);
}
#if 1
// 1 - 1.9 * x^2 + 0.9 * x^4
float x2 = saturate(u2 * dot(PixelDelta, PixelDelta));
return (0.905 * x2 - 1.9) * x2 + 1;
#else
// original e ^ (- x^2 / (2 * s^2))
const float Sigma = 0.47;
const float ExponentInputFactor = (-0.5 / (Sigma * Sigma));
float x2 = dot(PixelDelta, PixelDelta) * u2;
return exp(ExponentInputFactor * x2);
#endif
}
// Returns the weight of a pixels at a coordinate <PixelDelta> from the PDF highest point.
float ComputePixelWeigth(in FTAAIntermediaryResult IntermediaryResult, float2 PixelDelta)
{
float u2 = UpscaleFactor * UpscaleFactor;
// The point of InvFilterScaleFactor is to blur current frame scene color when upscaling.
// Therefore there is no need to do it when downscaling.
if (!AA_DOWNSAMPLE)
{
u2 *= (IntermediaryResult.InvFilterScaleFactor * IntermediaryResult.InvFilterScaleFactor);
}
#if 1
// 1 - 1.9 * x^2 + 0.9 * x^4
float x2 = saturate(u2 * dot(PixelDelta, PixelDelta));
float r = (0.905 * x2 - 1.9) * x2 + 1;
#else
// original e ^ (- x^2 / (2 * s^2))
const float Sigma = 0.47;
const float ExponentInputFactor = (-0.5 / (Sigma * Sigma));
float x2 = dot(PixelDelta, PixelDelta);
float r = exp(ExponentInputFactor * x2);
#endif
if (!AA_DOWNSAMPLE)
{
// Multiply pixel weight ^ 2 by upscale factor because have only a probability = screen percentage ^ 2 to return 1.
// Only do it for upsampling to not converge slower than if doing screen percentage 100%.
return u2 * r;
}
return r;
}
#endif // AA_UPSAMPLE
//------------------------------------------------------- TAA MAJOR FUNCTIONS
// Filter input pixels.
void FilterCurrentFrameInputSamples(
in FTAAInputParameters InputParams,
inout FTAAIntermediaryResult IntermediaryResult)
{
#if !AA_FILTERED
{
IntermediaryResult.Filtered.Color = SampleCachedSceneColorTexture(InputParams, int2(0, 0)).Color;
IntermediaryResult.Filtered.CocRadius = SampleCachedSceneColorTexture(InputParams, int2(0, 0)).CocRadius;
return;
}
#endif
FTAAHistoryPayload Filtered;
{
#if AA_UPSAMPLE
// Pixel coordinate of the center of output pixel O in the input viewport.
float2 PPCo = InputParams.ViewportUV * InputViewSize.xy + TemporalJitterPixels;
// Pixel coordinate of the center of the nearest input pixel K.
float2 PPCk = floor(PPCo) + 0.5;
// Vector in pixel between pixel K -> O.
float2 dKO = PPCo - PPCk;
#endif
#if AA_SAMPLES == 9
const uint SampleIndexes[9] = kSquareIndexes3x3;
#elif AA_SAMPLES == 5 || AA_SAMPLES == 6
const uint SampleIndexes[5] = kPlusIndexes3x3;
#endif
#if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_COC || AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_OPACITY_COC
// Fetches center pixel's Coc for the bilateral filtering.
float CenterCocRadius = SampleCachedSceneColorTexture(InputParams, int2(0, 0)).CocRadius;
#endif
float NeighborsHdrWeight = 0;
float NeighborsFinalWeight = 0;
float4 NeighborsColor = 0;
UNROLL
for (uint i = 0; i < AA_SAMPLES; i++)
{
// Get the sample offset from the nearest input pixel.
int2 SampleOffset;
#if AA_UPSAMPLE && AA_SAMPLES == 6
if (i == 5)
{
SampleOffset = SignFastInt(dKO);
}
else
#endif
{
const uint SampleIndex = SampleIndexes[i];
SampleOffset = kOffsets3x3[SampleIndex];
}
float2 fSampleOffset = float2(SampleOffset);
// When doing Coc bilateral, the center sample is accumulated last.
#if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_COC && 0
if (all(SampleOffset == 0) && (AA_SAMPLES != 6 || i != 5))
{
continue;
}
#endif
// Finds out the spatial weight of this input sample.
#if AA_UPSAMPLE
// Compute the pixel delta between output pixels and input pixel I.
// Note: abs() is unecessary because of the dot(dPP, dPP) latter on.
float2 dPP = fSampleOffset - dKO;
float SampleSpatialWeight = ComputeSampleWeigth(IntermediaryResult, dPP);
#elif AA_SAMPLES == 9
float SampleSpatialWeight = GET_SCALAR_ARRAY_ELEMENT(SampleWeights, i);
#elif AA_SAMPLES == 5
float SampleSpatialWeight = GET_SCALAR_ARRAY_ELEMENT(PlusWeights, i);
#else
#error Do not know how to compute filtering sample weight.
#endif
// Fetch sample.
FTAASceneColorSample Sample = SampleCachedSceneColorTexture(InputParams, SampleOffset);
// Finds out the sample's HDR weight.
#if AA_TONE
float SampleHdrWeight = Sample.HdrWeight;
#else
float SampleHdrWeight = 1;
#endif
// Finds out the sample's bilateral weight according to the payload.
#if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_COC
float BilateralWeight = ComputeNeightborSampleBilateralWeight(CenterCocRadius, Sample.CocRadius);
#else
float BilateralWeight = 1;
#endif
float SampleFinalWeight = SampleSpatialWeight * SampleHdrWeight * BilateralWeight;
// Apply pixel.
NeighborsColor += SampleFinalWeight * Sample.Color;
NeighborsFinalWeight += SampleFinalWeight;
NeighborsHdrWeight += SampleSpatialWeight * SampleHdrWeight;
}
#if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_COC || AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_OPACITY_COC
{
FTAASceneColorSample Sample = SampleCachedSceneColorTexture(InputParams, 0);
#if AA_UPSAMPLE
float SampleWeight = ComputeSampleWeigth(IntermediaryResult, -dKO);
#elif AA_SAMPLES == 9
float SampleWeight = GET_SCALAR_ARRAY_ELEMENT(SampleWeights, 4);
#else
float SampleWeight = GET_SCALAR_ARRAY_ELEMENT(PlusWeights, 2);
#endif
if (AA_TONE)
{
SampleWeight *= Sample.HdrWeight;
}
// TODO: it feels wrong...
//Filtered = NeighborsColor * (NeighborsHdrWeight * rcp(NeighborsFinalWeight)) + Sample.Color * SampleWeight;
Filtered.Color = NeighborsColor * rcp(NeighborsFinalWeight);
Filtered.CocRadius = CenterCocRadius;
}
#elif AA_TONE || AA_UPSAMPLE
{
// Reweight because SampleFinalWeight does not that have total sum = 1.
Filtered.Color = NeighborsColor * rcp(NeighborsFinalWeight);
Filtered.CocRadius = 0;
}
#else
{
Filtered.Color = NeighborsColor;
Filtered.CocRadius = 0;
}
#endif
#if AA_UPSAMPLE
// Compute the temporal weight of the output pixel.
IntermediaryResult.FilteredTemporalWeight = ComputePixelWeigth(IntermediaryResult, dKO);
#endif
}
IntermediaryResult.Filtered = Filtered;
}
// Compute the neighborhood bounding box used to reject history.
void ComputeNeighborhoodBoundingbox(
in FTAAInputParameters InputParams,
in FTAAIntermediaryResult IntermediaryResult,
out FTAAHistoryPayload OutNeighborMin,
out FTAAHistoryPayload OutNeighborMax)
{
// TODO: clean this up.
FTAAHistoryPayload Neighbors[kNeighborsCount];
UNROLL
for (uint i = 0; i < kNeighborsCount; i++)
{
Neighbors[i].Color = SampleCachedSceneColorTexture(InputParams, kOffsets3x3[i]).Color;
Neighbors[i].CocRadius = SampleCachedSceneColorTexture(InputParams, kOffsets3x3[i]).CocRadius;
}
FTAAHistoryPayload NeighborMin;
FTAAHistoryPayload NeighborMax;
#if AA_HISTORY_CLAMPING_BOX == HISTORY_CLAMPING_BOX_VARIANCE
{
#if AA_SAMPLES == 9
const uint SampleIndexes[9] = kSquareIndexes3x3;
#elif AA_SAMPLES == 5
const uint SampleIndexes[5] = kPlusIndexes3x3;
#else
#error Unknown number of samples.
#endif
float4 m1 = 0;
float4 m2 = 0;
for( uint i = 0; i < AA_SAMPLES; i++ )
{
float4 SampleColor = Neighbors[ SampleIndexes[i] ];
m1 += SampleColor;
m2 += Pow2( SampleColor );
}
m1 *= (1.0 / AA_SAMPLES);
m2 *= (1.0 / AA_SAMPLES);
float4 StdDev = sqrt( abs(m2 - m1 * m1) );
NeighborMin = m1 - 1.25 * StdDev;
NeighborMax = m1 + 1.25 * StdDev;
NeighborMin = min( NeighborMin, IntermediaryResult.Filtered );
NeighborMax = max( NeighborMax, IntermediaryResult.Filtered );
}
#elif AA_HISTORY_CLAMPING_BOX == HISTORY_CLAMPING_BOX_SAMPLE_DISTANCE
// Do color clamping only within a radius.
{
float2 PPCo = InputParams.ViewportUV * InputViewSize.xy + TemporalJitterPixels;
float2 PPCk = floor(PPCo) + 0.5;
float2 dKO = PPCo - PPCk;
// Sample 4 is is always going to be considered anyway.
NeighborMin = Neighbors[4];
NeighborMax = Neighbors[4];
// Reduce distance threshold as upsacale factor increase to reduce ghosting.
float DistthresholdLerp = UpscaleFactor - 1;
float DistThreshold = lerp(1.51, 1.3, DistthresholdLerp);
#if AA_SAMPLES == 9
const uint Indexes[9] = kSquareIndexes3x3;
#else
const uint Indexes[5] = kPlusIndexes3x3;
#endif
UNROLL
for( uint i = 0; i < AA_SAMPLES; i++ )
{
uint NeightborId = Indexes[i];
if (NeightborId != 4)
{
float2 dPP = float2(kOffsets3x3[NeightborId]) - dKO;
FLATTEN
if (dot(dPP, dPP) < (DistThreshold * DistThreshold))
{
NeighborMin = MinPayload(NeighborMin, Neighbors[NeightborId]);
NeighborMax = MaxPayload(NeighborMax, Neighbors[NeightborId]);
}
}
}
}
#elif AA_HISTORY_CLAMPING_BOX == HISTORY_CLAMPING_BOX_MIN_MAX
{
NeighborMin = MinPayload3( Neighbors[1], Neighbors[3], Neighbors[4] );
NeighborMin = MinPayload3( NeighborMin, Neighbors[5], Neighbors[7] );
NeighborMax = MaxPayload3( Neighbors[1], Neighbors[3], Neighbors[4] );
NeighborMax = MaxPayload3( NeighborMax, Neighbors[5], Neighbors[7] );
#if AA_SAMPLES == 6
{
float2 PPCo = InputParams.ViewportUV * InputViewSize.xy + TemporalJitterPixels;
float2 PPCk = floor(PPCo) + 0.5;
float2 dKO = PPCo - PPCk;
int2 FifthNeighborOffset = SignFastInt(dKO);
FTAAHistoryPayload FifthNeighbor;
FifthNeighbor.Color = SampleCachedSceneColorTexture(InputParams, FifthNeighborOffset).Color;
FifthNeighbor.CocRadius = SampleCachedSceneColorTexture(InputParams, FifthNeighborOffset).CocRadius;
NeighborMin = MinPayload(NeighborMin, FifthNeighbor);
NeighborMax = MaxPayload(NeighborMax, FifthNeighbor);
}
#elif AA_SAMPLES == 9
{
FTAAHistoryPayload NeighborMinPlus = NeighborMin;
FTAAHistoryPayload NeighborMaxPlus = NeighborMax;
NeighborMin = MinPayload3( NeighborMin, Neighbors[0], Neighbors[2] );
NeighborMin = MinPayload3( NeighborMin, Neighbors[6], Neighbors[8] );
NeighborMax = MaxPayload3( NeighborMax, Neighbors[0], Neighbors[2] );
NeighborMax = MaxPayload3( NeighborMax, Neighbors[6], Neighbors[8] );
if( AA_ROUND )
{
NeighborMin = AddPayload(MulPayload(NeighborMin, 0.5), MulPayload(NeighborMinPlus, 0.5));
NeighborMax = AddPayload(MulPayload(NeighborMax, 0.5), MulPayload(NeighborMaxPlus, 0.5));
}
}
#endif
}
#else
#error Unknown history clamping box.
#endif
OutNeighborMin = NeighborMin;
OutNeighborMax = NeighborMax;
}
// Sample history.
FTAAHistoryPayload SampleHistory(in float2 HistoryScreenPosition)
{
float4 RawHistory0 = 0;
float4 RawHistory1 = 0;
// Sample the history using Catmull-Rom to reduce blur on motion.
#if AA_BICUBIC
{
float2 HistoryBufferUV = HistoryScreenPosition * ScreenPosToHistoryBufferUV.xy + ScreenPosToHistoryBufferUV.zw;
// Clamp HistoryBufferUV so that we don't have to do it entirely for each samples.
#if AA_MANUALLY_CLAMP_HISTORY_UV
HistoryBufferUV = clamp(HistoryBufferUV, HistoryBufferUVMinMax.xy, HistoryBufferUVMinMax.zw);
#endif
FCatmullRomSamples Samples = GetBicubic2DCatmullRomSamples(HistoryBufferUV, HistoryBufferSize.xy, HistoryBufferSize.zw);
UNROLL
for (uint i = 0; i < Samples.Count; i++)
{
float2 SampleUV = Samples.UV[i];
// Clamp SampleUV within HistoryBufferUVMinMax to avoid sampling potential NaN outside view rect.
// This may look expensive, but Samples.UVDir is actually compile time constant to give a hint on what and how each component can be optimally clamped.
if (AA_MANUALLY_CLAMP_HISTORY_UV)
{
if (Samples.UVDir[i].x < 0)
{
SampleUV.x = max(SampleUV.x, HistoryBufferUVMinMax.x);
}
else if (Samples.UVDir[i].x > 0)
{
SampleUV.x = min(SampleUV.x, HistoryBufferUVMinMax.z);
}
if (Samples.UVDir[i].y < 0)
{
SampleUV.y = max(SampleUV.y, HistoryBufferUVMinMax.y);
}
else if (Samples.UVDir[i].y > 0)
{
SampleUV.y = min(SampleUV.y, HistoryBufferUVMinMax.w);
}
}
RawHistory0 += HistoryBuffer_0.SampleLevel(HistoryBufferSampler_0, SampleUV, 0) * Samples.Weight[i];
}
RawHistory0 *= Samples.FinalMultiplier;
}
// Sample the history using bilinear sampler.
#else
{
// Clamp HistoryScreenPosition to be within viewport.
if (AA_MANUALLY_CLAMP_HISTORY_UV)
{
HistoryScreenPosition = clamp(HistoryScreenPosition, -ScreenPosAbsMax, ScreenPosAbsMax);
}
float2 HistoryBufferUV = HistoryScreenPosition * ScreenPosToHistoryBufferUV.xy + ScreenPosToHistoryBufferUV.zw;
RawHistory0 = HistoryBuffer_0.SampleLevel(HistoryBufferSampler_0, HistoryBufferUV, 0);
}
#endif
#if HISTORY_RENDER_TARGETS == 2
{
if (AA_MANUALLY_CLAMP_HISTORY_UV)
{
HistoryScreenPosition = clamp(HistoryScreenPosition, -ScreenPosAbsMax, ScreenPosAbsMax);
}
float2 HistoryBufferUV = HistoryScreenPosition * ScreenPosToHistoryBufferUV.xy + ScreenPosToHistoryBufferUV.zw;
RawHistory1 = HistoryBuffer_1.SampleLevel(HistoryBufferSampler_1, HistoryBufferUV, 0);
}
#endif
FTAAHistoryPayload HistoryPayload;
HistoryPayload.Color = RawHistory0;
#if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_OPACITY_COC
HistoryPayload.CocRadius = RawHistory1.r;
#else
HistoryPayload.CocRadius = RawHistory0.a;
#endif
#if COMPILER_SUPPORTS_HLSL2021
CorrectExposure(HistoryPayload.Color.rgb);
#else
HistoryPayload.Color.rgb *= HistoryPreExposureCorrection;
#endif
HistoryPayload.Color = TransformSceneColor(HistoryPayload.Color);
return HistoryPayload;
}
// Clamp history.
FTAAHistoryPayload ClampHistory(inout FTAAIntermediaryResult IntermediaryResult, FTAAHistoryPayload History, FTAAHistoryPayload NeighborMin, FTAAHistoryPayload NeighborMax)
{
#if !AA_CLAMP
return History;
#elif AA_CLIP
// Clip history, this uses color AABB intersection for tighter fit.
//float4 TargetColor = 0.5 * ( NeighborMin + NeighborMax );
float4 TargetColor = Filtered;
float ClipBlend = HistoryClip( HistoryColor.rgb, TargetColor.rgb, NeighborMin.rgb, NeighborMax.rgb );
//float DistToClamp = saturate(-ClipBlend) / ( saturate(-ClipBlend) + 1 );
//float DistToClamp = abs( ClipBlend ) / ( 1 - ClipBlend );
ClipBlend = saturate( ClipBlend );
HistoryColor = lerp( HistoryColor, TargetColor, ClipBlend );
#if AA_FORCE_ALPHA_CLAMP
HistoryColor.a = clamp( HistoryColor.a, NeighborMin.a, NeighborMax.a );
#endif
return HistoryColor;
#else //!AA_CLIP
History.Color = clamp(History.Color, NeighborMin.Color, NeighborMax.Color);
History.CocRadius = clamp(History.CocRadius, NeighborMin.CocRadius, NeighborMax.CocRadius);
return History;
#endif
}
//------------------------------------------------------- TAA MAIN FUNCTION
FTAAHistoryPayload TemporalAASample(uint2 GroupId, uint2 GroupThreadId, uint GroupThreadIndex, float2 ViewportUV, float FrameExposureScale)
{
// SETUP
// -----
FTAAInputParameters InputParams;
// Per frame setup.
InputParams.FrameExposureScale = ToScalarMemory(FrameExposureScale * View.OneOverPreExposure);
// Per pixel setup.
{
InputParams.GroupId = GroupId;
InputParams.GroupThreadId = GroupThreadId;
InputParams.GroupThreadIndex = GroupThreadIndex;
InputParams.ViewportUV = ViewportUV;
InputParams.ScreenPos = ViewportUVToScreenPos(ViewportUV);
InputParams.NearestBufferUV = ViewportUV * ViewportUVToInputBufferUV.xy + ViewportUVToInputBufferUV.zw;
// Handle single or multi-pass responsive AA
#if AA_SINGLE_PASS_RESPONSIVE
{
const uint kResponsiveStencilMask = 1 << 3;
int2 SceneStencilUV = (int2)trunc(InputParams.NearestBufferUV * InputSceneColorSize.xy);
uint SceneStencilRef = StencilTexture.Load(int3(SceneStencilUV, 0)) STENCIL_COMPONENT_SWIZZLE;
InputParams.bIsResponsiveAAPixel = (SceneStencilRef & kResponsiveStencilMask) ? 1.f : 0.f;
}
#elif TAA_RESPONSIVE
InputParams.bIsResponsiveAAPixel = 1.f;
#else
InputParams.bIsResponsiveAAPixel = 0.f;
#endif
#if AA_UPSAMPLE
{
// Pixel coordinate of the center of output pixel O in the input viewport.
float2 PPCo = ViewportUV * InputViewSize.xy + TemporalJitterPixels;
// Pixel coordinate of the center of the nearest input pixel K.
float2 PPCk = floor(PPCo) + 0.5;
// Pixel coordinate of the center of the nearest top left input pixel T.
float2 PPCt = floor(PPCo - 0.5) + 0.5;
InputParams.NearestBufferUV = InputSceneColorSize.zw * (InputViewMin + PPCk);
InputParams.NearestTopLeftBufferUV = InputSceneColorSize.zw * (InputViewMin + PPCt);
// TODO: because use nearest sampler, can be faster in this computation.
}
#endif
}
// Setup intermediary results.
FTAAIntermediaryResult IntermediaryResult = CreateIntermediaryResult();
// FIND MOTION OF PIXEL AND NEAREST IN NEIGHBORHOOD
// ------------------------------------------------
float3 PosN; // Position of this pixel, possibly later nearest pixel in neighborhood.
PosN.xy = InputParams.ScreenPos;
PrecacheInputSceneDepth(InputParams);
PosN.z = SampleCachedSceneDepthTexture(InputParams, int2(0, 0));
// Screen position of minimum depth.
float2 VelocityOffset = float2(0.0, 0.0);
#if AA_CROSS // TODO: 2x2.
{
// For motion vector, use camera/dynamic motion from min depth pixel in pattern around pixel.
// This enables better quality outline on foreground against different motion background.
// Larger 2 pixel distance "x" works best (because AA dilates surface).
float4 Depths;
Depths.x = SampleCachedSceneDepthTexture(InputParams, int2(-AA_CROSS, -AA_CROSS));
Depths.y = SampleCachedSceneDepthTexture(InputParams, int2( AA_CROSS, -AA_CROSS));
Depths.z = SampleCachedSceneDepthTexture(InputParams, int2(-AA_CROSS, AA_CROSS));
Depths.w = SampleCachedSceneDepthTexture(InputParams, int2( AA_CROSS, AA_CROSS));
float2 DepthOffset = float2(AA_CROSS, AA_CROSS);
float DepthOffsetXx = float(AA_CROSS);
#if HAS_INVERTED_Z_BUFFER
// Nearest depth is the largest depth (depth surface 0=far, 1=near).
if(Depths.x > Depths.y)
{
DepthOffsetXx = -AA_CROSS;
}
if(Depths.z > Depths.w)
{
DepthOffset.x = -AA_CROSS;
}
float DepthsXY = max(Depths.x, Depths.y);
float DepthsZW = max(Depths.z, Depths.w);
if(DepthsXY > DepthsZW)
{
DepthOffset.y = -AA_CROSS;
DepthOffset.x = DepthOffsetXx;
}
float DepthsXYZW = max(DepthsXY, DepthsZW);
if(DepthsXYZW > PosN.z)
{
// This is offset for reading from velocity texture.
// This supports half or fractional resolution velocity textures.
// With the assumption that UV position scales between velocity and color.
VelocityOffset = DepthOffset * InputSceneColorSize.zw;
// This is [0 to 1] flipped in Y.
//PosN.xy = ScreenPos + DepthOffset * OutputViewportSize.zw * 2.0;
PosN.z = DepthsXYZW;
}
#else // !HAS_INVERTED_Z_BUFFER
#error Fix me!
#endif // !HAS_INVERTED_Z_BUFFER
}
#endif // AA_CROSS
// Camera motion for pixel or nearest pixel (in ScreenPos space).
bool OffScreen = false;
float Velocity = 0;
float HistoryBlur = 0;
float2 HistoryScreenPosition = InputParams.ScreenPos;
#if 1
{
float4 ThisClip = float4( PosN.xy, PosN.z, 1 );
float4 PrevClip = mul( ThisClip, View.ClipToPrevClip );
float2 PrevScreen = PrevClip.xy / PrevClip.w;
float2 BackN = PosN.xy - PrevScreen;
float2 BackTemp = BackN * OutputViewportSize.xy;
#if AA_DYNAMIC
{
ENCODED_VELOCITY_TYPE EncodedVelocity = SampleVelocityTexture(InputParams.NearestBufferUV + VelocityOffset);
bool DynamicN = EncodedVelocity.x > 0.0;
if(DynamicN)
{
BackN = DecodeVelocityFromTexture(EncodedVelocity).xy;
}
BackTemp = BackN * OutputViewportSize.xy;
}
#endif
Velocity = sqrt(dot(BackTemp, BackTemp));
#if !AA_BICUBIC
// Save the amount of pixel offset of just camera motion, used later as the amount of blur introduced by history.
float HistoryBlurAmp = 2.0;
HistoryBlur = saturate(abs(BackTemp.x) * HistoryBlurAmp + abs(BackTemp.y) * HistoryBlurAmp);
#endif
// Easier to do off screen check before conversion.
// BackN is in units of 2pixels/viewportWidthInPixels
// This converts back projection vector to [-1 to 1] offset in viewport.
HistoryScreenPosition = InputParams.ScreenPos - BackN;
// Detect if HistoryBufferUV would be outside of the viewport.
OffScreen = max(abs(HistoryScreenPosition.x), abs(HistoryScreenPosition.y)) >= 1.0;
}
#endif
// Precache input scene color.
PrecacheInputSceneColor(/* inout = */ InputParams);
// Filter input.
#if AA_UPSAMPLE_ADAPTIVE_FILTERING == 0
FilterCurrentFrameInputSamples(
InputParams,
/* inout = */ IntermediaryResult);
#endif
// Compute neighborhood bounding box.
FTAAHistoryPayload NeighborMin;
FTAAHistoryPayload NeighborMax;
ComputeNeighborhoodBoundingbox(
InputParams,
/* inout = */ IntermediaryResult,
NeighborMin, NeighborMax);
// Sample history.
FTAAHistoryPayload History = SampleHistory(HistoryScreenPosition);
// Whether the feedback needs to be reset.
bool IgnoreHistory = OffScreen || bCameraCut;
// DYNAMIC ANTI GHOSTING
// ---------------------
#if AA_DYNAMIC_ANTIGHOST && AA_DYNAMIC && HISTORY_PAYLOAD_COMPONENTS == 3
bool Dynamic4;
{
#if !AA_DYNAMIC
#error AA_DYNAMIC_ANTIGHOST requires AA_DYNAMIC
#endif
// TODO: try a 2x2 for AA_UPSAMPLE
bool Dynamic1 = SampleVelocityTexture(InputParams.NearestBufferUV, int2( 0, -1)).x > 0;
bool Dynamic3 = SampleVelocityTexture(InputParams.NearestBufferUV, int2(-1, 0)).x > 0;
Dynamic4 = SampleVelocityTexture(InputParams.NearestBufferUV).x > 0;
bool Dynamic5 = SampleVelocityTexture(InputParams.NearestBufferUV, int2( 1, 0)).x > 0;
bool Dynamic7 = SampleVelocityTexture(InputParams.NearestBufferUV, int2( 0, 1)).x > 0;
bool Dynamic = Dynamic1 || Dynamic3 || Dynamic4 || Dynamic5 || Dynamic7;
IgnoreHistory = IgnoreHistory || (!Dynamic && History.Color.a > 0);
}
#endif
// Save off luma of history before the clamp.
float LumaMin = GetSceneColorLuma4(NeighborMin.Color);
float LumaMax = GetSceneColorLuma4(NeighborMax.Color);
float LumaHistory = GetSceneColorLuma4(History.Color);
// Clamp history.
FTAAHistoryPayload PreClampingHistoryColor = History;
History = ClampHistory(IntermediaryResult, History, NeighborMin, NeighborMax);
// Filter input after color clamping.
#if AA_UPSAMPLE_ADAPTIVE_FILTERING == 1
{
#if AA_VARIANCE
#error AA_VARIANCE and AA_UPSAMPLE_ADAPTIVE_FILTERING are not compatible because of circular code dependency.
#endif
if (IgnoreHistory) // || any(HistoryColor != PreClampingHistoryColor))
{
// Set the input filter infinitely large when we know need to rely on it.
IntermediaryResult.InvFilterScaleFactor = 0;
}
// Blur input according to input pixel velocity to reduce blocky filtering cause by narrow filter on low screen percentage.
// Multiplied by upscale factor because Velocity is in output resolution based pixel velocity.
IntermediaryResult.InvFilterScaleFactor -= (Velocity * UpscaleFactor) * 0.1;
// Set a minimal filtering scale to screen percentage to not unecessarily blur the input more than the screen percentage.
IntermediaryResult.InvFilterScaleFactor = max(IntermediaryResult.InvFilterScaleFactor, ScreenPercentage);
FilterCurrentFrameInputSamples(
InputParams,
/* inout = */ IntermediaryResult);
}
#endif
// ADD BACK IN ALIASING TO SHARPEN
// -------------------------------
#if AA_FILTERED && !AA_BICUBIC
{
#if AA_UPSAMPLE
#error Temporal upsample does not support sharpen.
#endif
// Blend in non-filtered based on the amount of sub-pixel motion.
float AddAliasing = saturate(HistoryBlur) * 0.5;
float LumaContrastFactor = 32.0;
#if AA_YCOCG // TODO: Probably a bug arround here because using Luma4() even with YCOCG=0.
// 1/4 as bright.
LumaContrastFactor *= 4.0;
#endif
float LumaContrast = LumaMax - LumaMin;
AddAliasing = saturate(AddAliasing + rcp(1.0 + LumaContrast * LumaContrastFactor));
IntermediaryResult.Filtered.Color = lerp(IntermediaryResult.Filtered.Color, SampleCachedSceneColorTexture(InputParams, int2(0, 0)).Color, AddAliasing);
}
#endif
// COMPUTE BLEND AMOUNT
// --------------------
float BlendFinal;
{
float LumaFiltered = GetSceneColorLuma4(IntermediaryResult.Filtered.Color);
BlendFinal = IntermediaryResult.FilteredTemporalWeight * CurrentFrameWeight;
BlendFinal = lerp(BlendFinal, 0.2, saturate(Velocity / 40));
#if 0
{
// Anti-flicker
float DistToClamp = 2 * abs(min(LumaHistory - LumaMin, LumaMax - LumaHistory) / (LumaMax - LumaMin));
//BlendFinal *= lerp( 0, 1, saturate(4 * DistToClamp) );
BlendFinal += 0.8 * saturate(0.02 * LumaHistory / abs(Filtered.x - LumaHistory));
BlendFinal *= (LumaMin * InExposureScale + 0.5) / (LumaMax * InExposureScale + 0.5);
}
#endif
// Make sure to have at least some small contribution
BlendFinal = max( BlendFinal, saturate( 0.01 * LumaHistory / abs( LumaFiltered - LumaHistory ) ) );
#if AA_NAN && (COMPILER_GLSL || COMPILER_METAL)
// The current Metal & GLSL compilers don't handle saturate(NaN) -> 0, instead they return NaN/INF.
BlendFinal = -min(-BlendFinal, 0.0);
#endif
// Responsive forces 1/4 of new frame.
BlendFinal = InputParams.bIsResponsiveAAPixel ? (1.0/4.0) : BlendFinal;
#if AA_LERP
BlendFinal = 1.0/float(AA_LERP);
#endif
#if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_COC || AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_OPACITY_COC
{
float BilateralWeight = ComputeBilateralWeight(IntermediaryResult.Filtered.CocRadius, History.CocRadius);
BlendFinal = lerp(BlendFinal, 1, (1-BilateralWeight)*CoCBilateralFilterStrength);
}
#endif
if (bCameraCut)
{
BlendFinal = 1.0;
}
}
// Offscreen feedback resets.
if (IgnoreHistory)
{
History = IntermediaryResult.Filtered;
#if HISTORY_PAYLOAD_COMPONENTS == 3
History.Color.a = 0.0;
#endif
}
// DO FINAL BLEND BETWEEN HISTORY AND FILTERED COLOR
// -------------------------------------------------
// Luma weighted blend
float FilterWeight = GetSceneColorHdrWeight(InputParams, IntermediaryResult.Filtered.Color.x);
float HistoryWeight = GetSceneColorHdrWeight(InputParams, History.Color.x);
FTAAHistoryPayload OutputPayload;
{
float2 Weights = WeightedLerpFactors(HistoryWeight, FilterWeight, BlendFinal);
OutputPayload = AddPayload(MulPayload(History, Weights.x), MulPayload(IntermediaryResult.Filtered, Weights.y));
}
// Ensure that alpha values that are expected to be opaque (but are only close to opaque) are forced to be opaque.
// (0.995 chosen to accomodate handling of 254/255)
if (OutputPayload.Color.a > 0.995)
{
OutputPayload.Color.a = 1;
}
OutputPayload.Color = TransformBackToRawLinearSceneColor(OutputPayload.Color);
#if AA_NAN
// Transform NaNs to black, transform negative colors to black.
OutputPayload.Color = -min(-OutputPayload.Color, 0.0);
OutputPayload.CocRadius = isnan(OutputPayload.CocRadius) ? 0.0 : OutputPayload.CocRadius;
#endif
#if HISTORY_PAYLOAD_COMPONENTS == 3
#if AA_DYNAMIC_ANTIGHOST && AA_DYNAMIC
OutputPayload.Color.a = Dynamic4 ? 1 : 0;
#else
// Zero out to remove any prior computation of alpha
OutputPayload.Color.a = 0;
#endif
#endif
return OutputPayload;
}
//------------------------------------------------------- ENTRY POINTS
#if COMPUTESHADER
[numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)]
void MainCS(
uint2 DispatchThreadId : SV_DispatchThreadID,
uint2 GroupId : SV_GroupID,
uint2 GroupThreadId : SV_GroupThreadID,
uint GroupThreadIndex : SV_GroupIndex)
{
float2 ViewportUV = (float2(DispatchThreadId) + 0.5f) * OutputViewportSize.zw;
#if AA_LOWER_RESOLUTION
{
ViewportUV = (float2(DispatchThreadId) + 0.5f) * MaxViewportUVAndSvPositionToViewportUV.zw;
ViewportUV = min(ViewportUV, MaxViewportUVAndSvPositionToViewportUV.xy);
}
#endif
float FrameExposureScale = EyeAdaptationLookup();
FTAAHistoryPayload OutputPayload = TemporalAASample(GroupId, GroupThreadId, GroupThreadIndex, ViewportUV, FrameExposureScale);
float4 OutColor0 = 0;
float4 OutColor1 = 0;
#if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_COC
{
OutColor0.rgb = OutputPayload.Color.rgb;
OutColor0.a = OutputPayload.CocRadius;
}
#elif AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_OPACITY_COC
{
OutColor0 = OutputPayload.Color;
OutColor1.r = OutputPayload.CocRadius;
}
#else
{
OutColor0 = OutputPayload.Color;
}
#endif
uint2 PixelPos = DispatchThreadId + OutputViewportRect.xy;
if (all(PixelPos < OutputViewportRect.zw))
{
float4 FinalOutput0 = min(MaxHalfFloat.xxxx, OutColor0);
#if AA_ENABLE_STOCASTIC_QUANTIZATION
{
FinalOutput0.rgb = QuantizeFloatColor(FinalOutput0.rgb, OutputQuantizationError, PixelPos, QUANTIZE_NOISE_HAMMERSLEY);
}
#endif
OutComputeTex_0[PixelPos] = FinalOutput0;
#if HISTORY_RENDER_TARGETS == 2
OutComputeTex_1[PixelPos] = OutColor1;
#endif
}
#if TAA_DOWNSAMPLE
{
// This shader permutation outputs half resolution image in addition to main full-res one.
// It is more efficient than performing a separate downsampling pass afterwards.
// Assumes output resolution is always even.
uint P0 = GroupThreadId.x + GroupThreadId.y * THREADGROUP_SIZEX;
uint P1 = P0 + 1;
uint P2 = P0 + THREADGROUP_SIZEX;
uint P3 = P2 + 1;
GroupSharedDownsampleArray[P0] = OutColor0;
GroupMemoryBarrierWithGroupSync();
if (((GroupThreadId.x | GroupThreadId.y) & 1) == 0)
{
OutComputeTexDownsampled[PixelPos / 2] =
(OutColor0 + GroupSharedDownsampleArray[P1] + GroupSharedDownsampleArray[P2] + GroupSharedDownsampleArray[P3]) * 0.25;
}
}
#endif //TAA_DOWNSAMPLE
}
#elif PIXELSHADER // Mobile Only, supports Main and MainUpsampling
void MainPS(
float4 SvPosition : SV_POSITION,
out float4 OutColor0 : SV_Target0
)
{
float2 ViewportUV = (SvPosition.xy - OutputViewportRect.xy) * OutputViewportSize.zw;
uint2 GroupId = int2(0, 0);
uint2 GroupThreadId = int2(0, 0);
uint GroupThreadIndex = 0;
float FrameExposureScale = EyeAdaptationLookup();
FTAAHistoryPayload OutputPayload = TemporalAASample(GroupId, GroupThreadId, GroupThreadIndex, ViewportUV, FrameExposureScale);
OutColor0 = OutputPayload.Color;
}
#endif