// Copyright Epic Games, Inc. All Rights Reserved. #define EYE_ADAPTATION_LOOSE_PARAMETERS 1 #include "Common.ush" #include "Random.ush" #include "EyeAdaptationCommon.ush" #include "TextureSampling.ush" #include "MonteCarlo.ush" #include "Quantization.ush" //------------------------------------------------------- COMPILER CONFIG // Generate vector truncation warnings to errors. #pragma warning(error: 3206) //------------------------------------------------------- ENUM VALUES /** Payload of the history. History might still have addtional TAA internals. */ // Only have RGB. #define HISTORY_PAYLOAD_RGB 0 // Have RGB and translucency in alpha. #define HISTORY_PAYLOAD_RGB_TRANSLUCENCY 1 // Have RGB and opacity in alpha. #define HISTORY_PAYLOAD_RGB_OPACITY (HISTORY_PAYLOAD_RGB_TRANSLUCENCY) // Have RGB and DOF's CocRadius in alpha. #define HISTORY_PAYLOAD_RGB_COC 2 // Have RGB, opacity in alpha and DOF's CocRadius in separate's Red. #define HISTORY_PAYLOAD_RGB_OPACITY_COC 3 /** Caching method for scene color. */ // Disable any in code cache. #define AA_SAMPLE_CACHE_METHOD_DISABLE 0 // Caches 3x3 Neighborhood into VGPR (although my have corner optimised away). #define AA_SAMPLE_CACHE_METHOD_VGPR_3X3 1 // Prefetches scene color into 10x10 LDS tile (8x8 when screen percentage < 71%). #define AA_SAMPLE_CACHE_METHOD_LDS 2 /** Clamping method for scene color. */ // Min max neighboorhing samples. #define HISTORY_CLAMPING_BOX_MIN_MAX 0 // Variance computed from neighboorhing samples. #define HISTORY_CLAMPING_BOX_VARIANCE 1 // Min max samples that are within distance from output pixel. #define HISTORY_CLAMPING_BOX_SAMPLE_DISTANCE 2 /** Qualities */ #define TAA_QUALITY_LOW 0 #define TAA_QUALITY_MEDIUM 1 #define TAA_QUALITY_HIGH 2 #define TAA_QUALITY_MEDIUM_HIGH 3 //------------------------------------------------------- CONFIGS // Compute shaders always do responsive TAA in a single pass. #if COMPUTESHADER #define AA_SINGLE_PASS_RESPONSIVE SHADING_PATH_DEFERRED //The StencilTexture is not available on the mobile platform #endif #if TAA_PASS_CONFIG == 0 // Main #if TAA_ALPHA_CHANNEL #define AA_HISTORY_PAYLOAD (HISTORY_PAYLOAD_RGB_TRANSLUCENCY) #else #define AA_HISTORY_PAYLOAD (HISTORY_PAYLOAD_RGB) #endif #define AA_BICUBIC 1 #define AA_CROSS 2 #define AA_DYNAMIC 1 #define AA_MANUALLY_CLAMP_HISTORY_UV 1 #define AA_TONE 1 #define AA_YCOCG 1 #if TAA_QUALITY == TAA_QUALITY_LOW #define AA_FILTERED 0 #define AA_DYNAMIC_ANTIGHOST 0 #elif TAA_QUALITY == TAA_QUALITY_MEDIUM #define AA_FILTERED 1 #define AA_DYNAMIC_ANTIGHOST 0 #elif TAA_QUALITY == TAA_QUALITY_HIGH #define AA_FILTERED 1 #define AA_DYNAMIC_ANTIGHOST 1 #elif TAA_QUALITY == TAA_QUALITY_MEDIUM_HIGH #define AA_FILTERED 1 #define AA_DYNAMIC_ANTIGHOST 1 #else #error Unknown TAA quality #endif #if COMPUTESHADER #if AA_MOBILE_CONFIG #define AA_SAMPLE_CACHE_METHOD (AA_SAMPLE_CACHE_METHOD_DISABLE) // the shared memory is not as efficient as expected on mobile devices, try not to use it on mobile devices. #else #define AA_SAMPLE_CACHE_METHOD (AA_SAMPLE_CACHE_METHOD_LDS) #endif #endif #elif TAA_PASS_CONFIG == 7 // Used for Hair #define AA_HISTORY_PAYLOAD (HISTORY_PAYLOAD_RGB_OPACITY) #define AA_DYNAMIC 1 #define AA_FILTERED 0 #define AA_LERP 3 #define AA_MANUALLY_CLAMP_HISTORY_UV 1 #define AA_YCOCG 0 #elif TAA_PASS_CONFIG == 3 // Used for SSR #define AA_HISTORY_PAYLOAD (HISTORY_PAYLOAD_RGB_OPACITY) #define AA_DYNAMIC 1 #define AA_FILTERED 1 #define AA_LERP 8 #define AA_MANUALLY_CLAMP_HISTORY_UV 1 #define AA_YCOCG 1 #elif TAA_PASS_CONFIG == 4 // Used for LightShaft #define AA_HISTORY_PAYLOAD (HISTORY_PAYLOAD_RGB_OPACITY) #define AA_FILTERED 1 #define AA_LERP 64 #define AA_MANUALLY_CLAMP_HISTORY_UV 1 #define AA_YCOCG 1 #define AA_LOWER_RESOLUTION 1 #elif TAA_PASS_CONFIG == 1 || TAA_PASS_CONFIG == 2 // MainUpsampling & MainSuperSampling #if TAA_ALPHA_CHANNEL #define AA_HISTORY_PAYLOAD (HISTORY_PAYLOAD_RGB_TRANSLUCENCY) #else #define AA_HISTORY_PAYLOAD (HISTORY_PAYLOAD_RGB) #endif #define AA_BICUBIC 1 #define AA_CROSS 1 #define AA_DYNAMIC 1 #define AA_MANUALLY_CLAMP_HISTORY_UV 1 #define AA_TONE 1 #if SWITCH_PROFILE || SWITCH_PROFILE_FORWARD #define AA_YCOCG 0 #else #define AA_YCOCG 1 #endif #define AA_UPSAMPLE 1 #define AA_UPSAMPLE_ADAPTIVE_FILTERING 1 #if TAA_QUALITY == TAA_QUALITY_LOW #define AA_FILTERED 0 #if AA_MOBILE_CONFIG #define AA_SAMPLES 5 #else #define AA_SAMPLES 6 #endif #elif TAA_QUALITY == TAA_QUALITY_MEDIUM #define AA_FILTERED 1 #if AA_MOBILE_CONFIG #define AA_SAMPLES 5 #else #define AA_SAMPLES 6 #endif #elif TAA_QUALITY == TAA_QUALITY_HIGH #define AA_HISTORY_CLAMPING_BOX (HISTORY_CLAMPING_BOX_SAMPLE_DISTANCE) #define AA_FILTERED 1 #define AA_DYNAMIC_ANTIGHOST 1 #define AA_SAMPLES 9 #elif TAA_QUALITY == TAA_QUALITY_MEDIUM_HIGH #define AA_FILTERED 1 #if AA_MOBILE_CONFIG #define AA_SAMPLES 5 #else #define AA_SAMPLES 6 #endif #define AA_DYNAMIC_ANTIGHOST 1 #else #error Unknown TAA quality #endif #if COMPUTESHADER // Do not use LDS caching for screen percentage > 100% or < 50%. #if TAA_SCREEN_PERCENTAGE_RANGE == 2 || TAA_SCREEN_PERCENTAGE_RANGE == 3 #define AA_SAMPLE_CACHE_METHOD (AA_SAMPLE_CACHE_METHOD_VGPR_3X3) #else #if AA_MOBILE_CONFIG #define AA_SAMPLE_CACHE_METHOD (AA_SAMPLE_CACHE_METHOD_DISABLE) // the shared memory is not as efficient as expected on mobile devices, try not to use it on mobile devices. #else #define AA_SAMPLE_CACHE_METHOD (AA_SAMPLE_CACHE_METHOD_LDS) #endif #endif #endif #elif TAA_PASS_CONFIG == 5 || TAA_PASS_CONFIG == 6 // Used for diaphragm DOF pre-filtering. #if TAA_ALPHA_CHANNEL #define AA_HISTORY_PAYLOAD (HISTORY_PAYLOAD_RGB_OPACITY_COC) #else #define AA_HISTORY_PAYLOAD (HISTORY_PAYLOAD_RGB_COC) #endif #define AA_CROSS 4 // because running at half res. #define AA_DYNAMIC 1 #define AA_FORCE_ALPHA_CLAMP 1 #define AA_MANUALLY_CLAMP_HISTORY_UV 1 #define AA_LOWER_RESOLUTION 1 #if TAA_PASS_CONFIG == 6 #define AA_UPSAMPLE 1 #define AA_UPSAMPLE_ADAPTIVE_FILTERING 1 #else #define AA_UPSAMPLE 0 #endif #if TAA_QUALITY == TAA_QUALITY_MEDIUM #if AA_UPSAMPLE #define AA_BICUBIC 1 #define AA_FILTERED 1 #endif #elif TAA_QUALITY == TAA_QUALITY_HIGH #define AA_BICUBIC 1 #define AA_FILTERED 1 #define AA_YCOCG 1 #elif TAA_QUALITY == TAA_QUALITY_MEDIUM_HIGH #if AA_UPSAMPLE #define AA_BICUBIC 1 #define AA_FILTERED 1 #endif #else #error Unknown TAA quality #endif #if TAA_SCREEN_PERCENTAGE_RANGE != 2 && AA_UPSAMPLE //#define AA_SAMPLE_CACHE_METHOD (AA_SAMPLE_CACHE_METHOD_LDS) // TODO: doesn't work yet. #endif #else #error Unknown TAA pass config. Have you changed ETAAPassConfig without updating me? #endif #if TAA_SCREEN_PERCENTAGE_RANGE == 2 #define AA_DOWNSAMPLE 1 #else #define AA_DOWNSAMPLE 0 #endif //------------------------------------------------------- CONFIG DISABLED DEFAULTS // Num samples of current frame #ifndef AA_SAMPLES #define AA_SAMPLES 5 #endif // 1 = Use tighter AABB clamp for history. // 0 = Use simple min/max clamp. #ifndef AA_CLIP #define AA_CLIP 0 #endif // Cross distance in pixels used in depth search X pattern. // 0 = Turn this feature off. // 2 = Is required for standard temporal AA pass. #ifndef AA_CROSS #define AA_CROSS 0 #endif // 1 = Use dynamic motion. // 0 = Skip dynamic motion, currently required for half resolution passes. #ifndef AA_DYNAMIC #define AA_DYNAMIC 0 #endif // 0 = Dynamic motion based lerp value (default). // non-zero = Use 1/LERP fixed lerp value (used for reflections). #ifndef AA_LERP #define AA_LERP 0 #endif // 1 = Use higher quality round clamp. // 0 = Use lower quality but faster box clamp. #ifndef AA_ROUND #define AA_ROUND 0 #endif // Force clamp on alpha. #ifndef AA_FORCE_ALPHA_CLAMP #define AA_FORCE_ALPHA_CLAMP 0 #endif // Use YCoCg path. #ifndef AA_YCOCG #define AA_YCOCG 0 #endif // Bicubic filter history #ifndef AA_BICUBIC #define AA_BICUBIC 0 #endif // Tone map to kill fireflies #ifndef AA_TONE #define AA_TONE 0 #endif // Antighosting using dynamic mask #ifndef AA_DYNAMIC_ANTIGHOST #define AA_DYNAMIC_ANTIGHOST 0 #endif // Sample the stencil buffer inline rather than multiple masked passes. #ifndef AA_SINGLE_PASS_RESPONSIVE #define AA_SINGLE_PASS_RESPONSIVE 0 #endif // Upsample the output. #ifndef AA_UPSAMPLE #define AA_UPSAMPLE 0 #endif // Method used for generating the history clamping box. #ifndef AA_HISTORY_CLAMPING_BOX #define AA_HISTORY_CLAMPING_BOX (HISTORY_CLAMPING_BOX_MIN_MAX) #endif // Change the upsampling filter size when history is rejected that reduce blocky output pixels. #ifndef AA_UPSAMPLE_ADAPTIVE_FILTERING #define AA_UPSAMPLE_ADAPTIVE_FILTERING 0 #endif // Whether this pass run at lower resolution than main view rectangle. #ifndef AA_LOWER_RESOLUTION #define AA_LOWER_RESOLUTION 0 #endif // Whether the history buffer UV should be manually clamped. #ifndef AA_MANUALLY_CLAMP_HISTORY_UV #define AA_MANUALLY_CLAMP_HISTORY_UV 0 #endif //------------------------------------------------------- CONFIG ENABLED DEFAULTS // Always enable scene color filtering // 1 = Use filtered sample. // 0 = Use center sample. #ifndef AA_FILTERED #define AA_FILTERED 1 #endif // Always enable AA_NAN to avoid all NAN in all TAA pass that is more convenient considering the amount of / 0 we can have. // 0 = Don't use. // 1 = Use extra clamp to avoid NANs #ifndef AA_NAN #define AA_NAN 1 #endif // Neighborhood clamping. Disable for testing reprojection. Always enabled, well because TAA is totally broken otherwise. #ifndef AA_CLAMP #define AA_CLAMP 1 #endif // By default, always cache neighbooring samples into VGPR. #ifndef AA_SAMPLE_CACHE_METHOD #if COMPUTESHADER #define AA_SAMPLE_CACHE_METHOD (AA_SAMPLE_CACHE_METHOD_VGPR_3X3) #else #define AA_SAMPLE_CACHE_METHOD (AA_SAMPLE_CACHE_METHOD_DISABLE) #endif #endif // By default, enable stocastic quantization of the output. #ifndef AA_ENABLE_STOCASTIC_QUANTIZATION #define AA_ENABLE_STOCASTIC_QUANTIZATION 1 #endif //------------------------------------------------------- MENDATORY CONFIG #ifndef AA_HISTORY_PAYLOAD #error You forgot to defines the history payload. #endif //------------------------------------------------------- DERIVES // Defines number of component in history payload. #if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB #define HISTORY_PAYLOAD_COMPONENTS 3 #elif AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_OPACITY_COC #define HISTORY_PAYLOAD_COMPONENTS 5 #else #define HISTORY_PAYLOAD_COMPONENTS 4 #endif // Defines the number of render target to store TAA's history. #if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_OPACITY_COC #define HISTORY_RENDER_TARGETS 2 #else #define HISTORY_RENDER_TARGETS 1 #endif //------------------------------------------------------- CONFIG CHECKS #if AA_SAMPLES != 9 && AA_SAMPLES != 5 && AA_SAMPLES != 6 #error Samples must be 5, (6 for TAAU) or 9 #endif #if AA_SAMPLE_CACHE_METHOD >= 2 && !COMPUTESHADER #error Group share only for compute shader. #endif //------------------------------------------------------- CONSTANTS // K = Center of the nearest input pixel. // O = Center of the output pixel. // // | | // 0 | 1 | 2 // | | // | | // --------+-----------+-------- // | | // | O | // 3 | K | 5 // | | // | | // --------+-----------+-------- // | | // | | // 6 | 7 | 8 // | | // static const int2 kOffsets3x3[9] = { int2(-1, -1), int2( 0, -1), int2( 1, -1), int2(-1, 0), int2( 0, 0), // K int2( 1, 0), int2(-1, 1), int2( 0, 1), int2( 1, 1), }; // Indexes of the 3x3 square. static const uint kSquareIndexes3x3[9] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 }; // Indexes of the offsets to have plus + shape. static const uint kPlusIndexes3x3[5] = { 1, 3, 4, 5, 7 }; // Number of neighbors. static const uint kNeighborsCount = 9; #if AA_UPSAMPLE // T = Center of the nearest top left pixel input pixel. // O = Center of the output pixel. // // | // T | . // | // O | // --------+-------- // | // | // . | . // | static const int2 Offsets2x2[4] = { int2( 0, 0), // T int2( 1, 0), int2( 0, 1), int2( 1, 1), }; // Indexes of the 2x2 square. static const uint SquareIndexes2x2[4] = { 0, 1, 2, 3 }; #endif // AA_UPSAMPLE //------------------------------------------------------- PARAMETERS float HistoryPreExposureCorrection; float CurrentFrameWeight; int bCameraCut; DECLARE_SCALAR_ARRAY(float, SampleWeights, 9); DECLARE_SCALAR_ARRAY(float, PlusWeights, 5); float4 ViewportUVToInputBufferUV; float4 MaxViewportUVAndSvPositionToViewportUV; float2 ScreenPosAbsMax; float4 ScreenPosToHistoryBufferUV; float4 InputSceneColorSize; int2 InputMinPixelCoord; int2 InputMaxPixelCoord; Texture2D InputSceneColor; SamplerState InputSceneColorSampler; Texture2D InputSceneMetadata; SamplerState InputSceneMetadataSampler; Texture2D SceneDepthTexture; SamplerState SceneDepthTextureSampler; #if COMPILER_GLSL_ES3_1 Texture2D GBufferVelocityTextureSRV; #else Texture2D GBufferVelocityTexture; SamplerState GBufferVelocityTextureSampler; #endif Texture2D StencilTexture; Texture2D HistoryBuffer_0; Texture2D HistoryBuffer_1; SamplerState HistoryBufferSampler_0; SamplerState HistoryBufferSampler_1; float4 HistoryBufferSize; float4 HistoryBufferUVMinMax; float CoCBilateralFilterStrength; float4 OutputViewportSize; float4 OutputViewportRect; float3 OutputQuantizationError; #if COMPUTESHADER RWTexture2D OutComputeTex_0; #if TAA_DOWNSAMPLE // This shader permutation outputs half resolution image in addition to main full-res one. // It is more efficient than performing a separate downsampling pass afterwards. RWTexture2D OutComputeTexDownsampled; groupshared float4 GroupSharedDownsampleArray[THREADGROUP_SIZEX*THREADGROUP_SIZEY]; // TODO: share this with GroupSharedArrayF4 when possible #endif // TAA_DOWNSAMPLE #if HISTORY_RENDER_TARGETS == 2 RWTexture2D OutComputeTex_1; #endif // HISTORY_RENDER_TARGETS == 2 #endif // Temporal upsample specific params. #if AA_UPSAMPLE float2 InputViewMin; float4 InputViewSize; // Temporal jitter at the pixel scale. float2 TemporalJitterPixels; float ScreenPercentage; float UpscaleFactor; // = 1 / ScreenPercentage #endif // AA_UPSAMPLE //------------------------------------------------------- FUNCTIONS #if COMPILER_SUPPORTS_HLSL2021 template void CorrectExposure(inout T X) { X *= HistoryPreExposureCorrection; } #endif ENCODED_VELOCITY_TYPE SampleVelocityTexture(float2 BufferUV, int2 PixelOffset = int2(0, 0)) { #if COMPILER_GLSL_ES3_1 int2 Coord = int2(BufferUV * InputSceneColorSize.xy) + PixelOffset; return GBufferVelocityTextureSRV.Load(int3(Coord, 0)); #else return GBufferVelocityTexture.SampleLevel(GBufferVelocityTextureSampler, BufferUV, 0, PixelOffset); #endif } float3 RGBToYCoCg( float3 RGB ) { float Y = dot( RGB, float3( 1, 2, 1 ) ); float Co = dot( RGB, float3( 2, 0, -2 ) ); float Cg = dot( RGB, float3( -1, 2, -1 ) ); float3 YCoCg = float3( Y, Co, Cg ); return YCoCg; } float3 YCoCgToRGB( float3 YCoCg ) { float Y = YCoCg.x * 0.25; float Co = YCoCg.y * 0.25; float Cg = YCoCg.z * 0.25; float R = Y + Co - Cg; float G = Y + Cg; float B = Y - Co - Cg; float3 RGB = float3( R, G, B ); return RGB; } // Faster but less accurate luma computation. // Luma includes a scaling by 4. float Luma4(float3 Color) { return (Color.g * 2.0) + (Color.r + Color.b); } // Optimized HDR weighting function. float HdrWeight4(float3 Color, float Exposure) { return rcp(Luma4(Color) * Exposure + 4.0); } float HdrWeightY(float Color, float Exposure) { return rcp(Color * Exposure + 4.0); } // Intersect ray with AABB, knowing there is an intersection. // Dir = Ray direction. // Org = Start of the ray. // Box = Box is at {0,0,0} with this size. // Returns distance on line segment. float IntersectAABB(float3 Dir, float3 Org, float3 Box) { #if PS4_PROFILE // This causes flicker, it should only be used on PS4 until proper fix is in. if(min(min(abs(Dir.x), abs(Dir.y)), abs(Dir.z)) < (1.0/65536.0)) return 1.0; #endif float3 RcpDir = rcp(Dir); float3 TNeg = ( Box - Org) * RcpDir; float3 TPos = ((-Box) - Org) * RcpDir; return max(max(min(TNeg.x, TPos.x), min(TNeg.y, TPos.y)), min(TNeg.z, TPos.z)); } float HistoryClip(float3 History, float3 Filtered, float3 NeighborMin, float3 NeighborMax) { #if 0 float3 Min = min(Filtered, min(NeighborMin, NeighborMax)); float3 Max = max(Filtered, max(NeighborMin, NeighborMax)); float3 Avg2 = Max + Min; float3 Dir = Filtered - History; float3 Org = History - Avg2 * 0.5; float3 Scale = Max - Avg2 * 0.5; return saturate(IntersectAABB(Dir, Org, Scale)); #else float3 BoxMin = NeighborMin; float3 BoxMax = NeighborMax; //float3 BoxMin = min( Filtered, NeighborMin ); //float3 BoxMax = max( Filtered, NeighborMax ); float3 RayOrigin = History; float3 RayDir = Filtered - History; RayDir = select(abs( RayDir ) < (1.0/65536.0), (1.0/65536.0), RayDir); float3 InvRayDir = rcp( RayDir ); float3 MinIntersect = (BoxMin - RayOrigin) * InvRayDir; float3 MaxIntersect = (BoxMax - RayOrigin) * InvRayDir; float3 EnterIntersect = min( MinIntersect, MaxIntersect ); return max3( EnterIntersect.x, EnterIntersect.y, EnterIntersect.z ); #endif } float2 WeightedLerpFactors(float WeightA, float WeightB, float Blend) { float BlendA = (1.0 - Blend) * WeightA; float BlendB = Blend * WeightB; float RcpBlend = rcp(BlendA + BlendB); BlendA *= RcpBlend; BlendB *= RcpBlend; return float2(BlendA, BlendB); } #if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_COC || AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_OPACITY_COC // Computes the bilateral weight according to two Coc radii. float ComputeBilateralWeight(float RefCocRadius, float SampleCocRadius) { float Factor = (abs(RefCocRadius) > 1 ? rcp(abs(RefCocRadius)) : 1.0); return saturate(1 - abs(RefCocRadius - SampleCocRadius) * Factor); } float ComputeNeightborSampleBilateralWeight(float CenterCocRadius, float SampleCocRadius) { float Factor = (abs(CenterCocRadius) > 1 ? rcp(abs(CenterCocRadius)) : 1.0); return saturate(1 - (CenterCocRadius - SampleCocRadius) * Factor); } #endif //------------------------------------------------------- HISTORY's PAYLOAD // Payload of the TAA's history. struct FTAAHistoryPayload { // Transformed scene color and alpha channel. float4 Color; // Radius of the circle of confusion for DOF. float CocRadius; }; FTAAHistoryPayload MulPayload(in FTAAHistoryPayload Payload, in float x) { Payload.Color *= x; Payload.CocRadius *= x; return Payload; } FTAAHistoryPayload AddPayload(in FTAAHistoryPayload Payload0, in FTAAHistoryPayload Payload1) { Payload0.Color += Payload1.Color; Payload0.CocRadius += Payload1.CocRadius; return Payload0; } FTAAHistoryPayload MinPayload(in FTAAHistoryPayload Payload0, in FTAAHistoryPayload Payload1) { Payload0.Color = min(Payload0.Color, Payload1.Color); Payload0.CocRadius = min(Payload0.CocRadius, Payload1.CocRadius); return Payload0; } FTAAHistoryPayload MaxPayload(in FTAAHistoryPayload Payload0, in FTAAHistoryPayload Payload1) { Payload0.Color = max(Payload0.Color, Payload1.Color); Payload0.CocRadius = max(Payload0.CocRadius, Payload1.CocRadius); return Payload0; } FTAAHistoryPayload MinPayload3(in FTAAHistoryPayload Payload0, in FTAAHistoryPayload Payload1, in FTAAHistoryPayload Payload2) { Payload0.Color = min3(Payload0.Color, Payload1.Color, Payload2.Color); Payload0.CocRadius = min3(Payload0.CocRadius, Payload1.CocRadius, Payload2.CocRadius); return Payload0; } FTAAHistoryPayload MaxPayload3(in FTAAHistoryPayload Payload0, in FTAAHistoryPayload Payload1, in FTAAHistoryPayload Payload2) { Payload0.Color = max3(Payload0.Color, Payload1.Color, Payload2.Color); Payload0.CocRadius = max3(Payload0.CocRadius, Payload1.CocRadius, Payload2.CocRadius); return Payload0; } //------------------------------------------------------- TAA INTERMEDIARY STRUCTURES // Output pixel parameters. Should not be modified once setup. struct FTAAInputParameters { // Compute shader dispatch params, set to 0 in pixel shader. uint2 GroupId; uint2 GroupThreadId; uint GroupThreadIndex; // Viewport UV of the output pixel. float2 ViewportUV; // Position of the output pixel on screen. float2 ScreenPos; // Buffer UV of the nearest input pixel. float2 NearestBufferUV; #if AA_UPSAMPLE // Buffer UV of the nearest top left input pixel. float2 NearestTopLeftBufferUV; #endif // Whether this pixel should be responsive. float bIsResponsiveAAPixel; // Frame exposure's scale. float FrameExposureScale; // Cache of neightbors' transformed scene color. #if AA_SAMPLE_CACHE_METHOD == AA_SAMPLE_CACHE_METHOD_VGPR_3X3 float4 CachedNeighbors0[kNeighborsCount]; #if HISTORY_RENDER_TARGETS == 2 float CachedNeighbors1[kNeighborsCount]; #endif #endif }; // Intermediary results for major function to conveniently share values between them. // // it is allowed to passdown this to major function with still unitialized variables. struct FTAAIntermediaryResult { // The filtered input. FTAAHistoryPayload Filtered; // Temporal weight of the filtered input. float FilteredTemporalWeight; // 1 / filtering kernel scale factor for AA_UPSAMPLE_ADAPTIVE_FILTERING. float InvFilterScaleFactor; }; // Create intermediary result. FTAAIntermediaryResult CreateIntermediaryResult() { // Disable warning X4008: floating point division by zero #pragma warning(disable:4008) FTAAIntermediaryResult IntermediaryResult = (FTAAIntermediaryResult) (1.0 / 0.0); #pragma warning(default:4008) IntermediaryResult.FilteredTemporalWeight = 1; IntermediaryResult.InvFilterScaleFactor = 1; return IntermediaryResult; } // Transformed scene color's data for a sample. struct FTAASceneColorSample { // Transformed scene color and alpha channel. float4 Color; // Radius of the circle of confusion for DOF. float CocRadius; // HDR weight of the scene color sample. float HdrWeight; }; //------------------------------------------------------- SCENE COLOR SPACE MANAGMENT // Transform RAW linear scene color RGB to TAA's working color space. float4 TransformSceneColor(float4 RawLinearSceneColorRGBA) { #if AA_YCOCG return float4(RGBToYCoCg(RawLinearSceneColorRGBA.rgb), RawLinearSceneColorRGBA.a); #else return RawLinearSceneColorRGBA; #endif } // Reciprocal of TransformSceneColor(). float4 TransformBackToRawLinearSceneColor(float4 SceneColor) { #if AA_YCOCG return float4(YCoCgToRGB(SceneColor.xyz), SceneColor.a); #else return SceneColor; #endif } // Transform current frame's RAW scene color RGB to TAA's working color space. float4 TransformCurrentFrameSceneColor(float4 RawSceneColorRGBA) { return TransformSceneColor(RawSceneColorRGBA); } // Get the Luma4 of the sceneColor float GetSceneColorLuma4(float4 SceneColor) { #if AA_YCOCG return SceneColor.x; #else return Luma4(SceneColor.rgb); #endif } // Get the HDR weight of the transform scene color. float GetSceneColorHdrWeight( in FTAAInputParameters InputParams, float4 SceneColor) { #if AA_YCOCG return HdrWeightY(SceneColor.x, InputParams.FrameExposureScale); #else return HdrWeight4(SceneColor.rgb, InputParams.FrameExposureScale); #endif } //------------------------------------------------------- INPUT SAMPLE CACHING. // API to sample input scene color and depth through caching system. // // Precache scene color or depth: // PrecacheInputSceneColor(InputParams); // PrecacheInputSceneDepth(InputParams); // // Then sample scene color or depth: // SampleCachedSceneColorTexture(InputParams, /* Offset = */ int2(-1, -1)); // SampleCachedSceneDepthTexture(InputParams, /* Offset = */ int2(-1, -1)); // // parameter is meant to be compile time constant of the pixel offset from nearest input sample. #if AA_SAMPLE_CACHE_METHOD == AA_SAMPLE_CACHE_METHOD_VGPR_3X3 //------------------------------------------------------- 3x3 NEIGHTBORS CACHING #define AA_PRECACHE_SCENE_COLOR 1 void PrecacheInputSceneColor(inout FTAAInputParameters InputParams) { // Precache 3x3 input scene color into FTAAInputParameters::CachedNeighbors. UNROLL for (uint i = 0; i < kNeighborsCount; i++) { int2 Coord = int2(InputParams.NearestBufferUV * InputSceneColorSize.xy) + kOffsets3x3[i]; Coord = clamp(Coord, InputMinPixelCoord, InputMaxPixelCoord); InputParams.CachedNeighbors0[i] = TransformCurrentFrameSceneColor(InputSceneColor[Coord]); #if HISTORY_RENDER_TARGETS == 2 InputParams.CachedNeighbors1[i] = InputSceneMetadata[Coord].r; #endif } } FTAASceneColorSample SampleCachedSceneColorTexture( inout FTAAInputParameters InputParams, int2 PixelOffset) { // PixelOffset is const at compile time. Therefore all this computaton is actually free. uint NeighborsId = uint(4 + PixelOffset.x + PixelOffset.y * 3); FTAASceneColorSample Sample; Sample.Color = InputParams.CachedNeighbors0[NeighborsId]; Sample.CocRadius = 0; #if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_COC Sample.CocRadius = Sample.Color.a; #elif AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_OPACITY_COC Sample.CocRadius = InputParams.CachedNeighbors1[NeighborsId]; #endif Sample.HdrWeight = GetSceneColorHdrWeight(InputParams, Sample.Color); return Sample; } #elif AA_SAMPLE_CACHE_METHOD == AA_SAMPLE_CACHE_METHOD_LDS //-------------------------------- Generic LDS functions. // Returns SV_GroupIndex. uint GetGroupIndex(in FTAAInputParameters InputParams) { if (0) { // Group thread index doesn't actually suppress ALU instruction on consoles. return InputParams.GroupThreadIndex; } return InputParams.GroupThreadId.x + InputParams.GroupThreadId.y * THREADGROUP_SIZEX; } //------------------------------------------------------- COMPUTE SHADER GROUP SHARE OPTIMIZATION #if THREADGROUP_SIZEX != THREADGROUP_SIZEY #error AA_SAMPLE_CACHE_METHOD >= 2 assume square shaped tiles. #endif #ifndef TAA_SCREEN_PERCENTAGE_RANGE #error LDS cache needs to know the screen percentage range. #endif #define AA_PRECACHE_IMPLEMENTATIONS 1 // Total number of thread per group. #define THREADGROUP_TOTAL (THREADGROUP_SIZEX * THREADGROUP_SIZEY) // Configure base width of the LDS tile according to screen percentage range. // If screen percentage is lower than 75%, we can set the base tile width to 6, which will subsequently create a scene color // tile width of 8 that can be cache with only one iteration per lane in PrecacheInputSceneColorToLDS(), and reduce size(LDS) = 1k. #if TAA_SCREEN_PERCENTAGE_RANGE == 1 // screen percentage < 75% #define LDS_BASE_TILE_WIDTH 6 #elif TAA_SCREEN_PERCENTAGE_RANGE == 0 // screen percentage in [75%; 100%] #define LDS_BASE_TILE_WIDTH THREADGROUP_SIZEX #elif TAA_SCREEN_PERCENTAGE_RANGE == 2 // screen percentage > 100% #error Should not use LDS caching. #else #error Unknown screen percentage range. #endif //-------------------------------- Configuration. // Configuration of what should be prefetched. // 1: use Load; 2: use gather4. #if !AA_UPSAMPLE // Disables scene depth caching for TAA upsample because the extra screen percentage ALU is making things worst. #define AA_PRECACHE_SCENE_DEPTH 2 #endif // 1: use load. #define AA_PRECACHE_SCENE_COLOR 1 // Precache GetSceneColorHdrWeight() into scene color's alpha channel. #define AA_PRECACHE_SCENE_HDR_WEIGHT (AA_TONE && HISTORY_PAYLOAD_COMPONENTS == 3) // Layout of scene color in LDS. // 0: AoS // 1: SoA // 2: AoSoA (SoA scene color, AoS GetSceneColorHdrWeight()) #define LDS_COLOR_LAYOUT 0 //-------------------------------- Depth tile constants. // Number of texels arround the group tile for depth. #define LDS_DEPTH_TILE_BORDER_SIZE (AA_CROSS) // Width in texels of the depth tile cached into LDS. #define LDS_DEPTH_TILE_WIDTH (LDS_BASE_TILE_WIDTH + 2 * LDS_DEPTH_TILE_BORDER_SIZE) // Total number of texels cached in the depth tile. #define LDS_DEPTH_ARRAY_SIZE (LDS_DEPTH_TILE_WIDTH * LDS_DEPTH_TILE_WIDTH) //-------------------------------- Scene color tile constants. // TODO: shader permutation for screen percentage <= 75% with AA_UPSAMPLE to fit in 1k LDS. // Number of scene color component that gets cached. #if HISTORY_PAYLOAD_COMPONENTS == 4 || AA_PRECACHE_SCENE_HDR_WEIGHT #define LDS_COLOR_COMPONENT_COUNT 4 #else #define LDS_COLOR_COMPONENT_COUNT 3 #endif // Number of texels arround the group tile for scene color. #define LDS_COLOR_TILE_BORDER_SIZE (1) // Width in texels of the depth tile cached into LDS. #define LDS_COLOR_TILE_WIDTH (LDS_BASE_TILE_WIDTH + 2 * LDS_COLOR_TILE_BORDER_SIZE) // Total number of texels cached in the scene color tile. #define LDS_COLOR_ARRAY_SIZE (LDS_COLOR_TILE_WIDTH * LDS_COLOR_TILE_WIDTH) //-------------------------------- Group shared global. // Size of the LDS to be allocated. #define LDS_ARRAY_SIZE (LDS_COLOR_ARRAY_SIZE * LDS_COLOR_COMPONENT_COUNT) #if LDS_ARRAY_SIZE < LDS_DEPTH_ARRAY_SIZE #error LDS_ARRAY_SIZE assumed scene color caching is bigger than scene depth caching. #endif // Some compilers may have issues optimising LDS store instructions, therefore we give the compiler a hint by using a float4 LDS. #if defined(AA_PRECACHE_SCENE_DEPTH) #define LDS_USE_FLOAT4_ARRAY 0 #else #define LDS_USE_FLOAT4_ARRAY (LDS_COLOR_COMPONENT_COUNT == 4 && LDS_COLOR_LAYOUT == 0) #endif #if LDS_USE_FLOAT4_ARRAY groupshared float4 GroupSharedArrayF4[LDS_ARRAY_SIZE/4]; #else groupshared float GroupSharedArray[LDS_ARRAY_SIZE]; #endif //-------------------------------- Generic LDS tile functions. #if AA_UPSAMPLE // Get the pixel coordinate of the nearest input pixel K for group's thread 0. float2 GetGroupThread0InputPixelCoord(in FTAAInputParameters InputParams) { // Output pixel center position of the group thread index 0, relative to top left corner of the viewport. float2 Thread0SvPosition = InputParams.GroupId * uint2(THREADGROUP_SIZEX, THREADGROUP_SIZEY) + 0.5; // Output pixel's viewport UV group thread index 0. float2 Thread0ViewportUV = Thread0SvPosition * OutputViewportSize.zw; // Pixel coordinate of the center of output pixel O in the input viewport. float2 Thread0PPCo = Thread0ViewportUV * InputViewSize.xy + TemporalJitterPixels; // Pixel coordinate of the center of the nearest input pixel K. float2 Thread0PPCk = floor(Thread0PPCo) + 0.5; return InputViewMin.xy + Thread0PPCk; } #endif // Get the texel offset of a LDS tile's top left corner. uint2 GetGroupTileTexelOffset(in FTAAInputParameters InputParams, uint TileBorderSize) { #if AA_UPSAMPLE { // Pixel coordinate of the center of the nearest input pixel K. float2 Thread0PPCk = GetGroupThread0InputPixelCoord(InputParams); return uint2(floor(Thread0PPCk) - TileBorderSize); } #else // !AA_UPSAMPLE { return OutputViewportRect.xy + InputParams.GroupId * uint2(THREADGROUP_SIZEX, THREADGROUP_SIZEY) - TileBorderSize; } #endif } // Get the index within the LDS array. uint GetTileArrayIndexFromPixelOffset(in FTAAInputParameters InputParams, int2 PixelOffset, uint TileBorderSize) { #if AA_UPSAMPLE { const float2 RowMultiplier = float2(1, TileBorderSize * 2 + LDS_BASE_TILE_WIDTH); float2 Thread0PPCk = GetGroupThread0InputPixelCoord(InputParams); float2 PPCk = InputParams.NearestBufferUV * InputSceneColorSize.xy; float2 TilePos = floor(PPCk) - floor(Thread0PPCk); return uint(dot(TilePos, RowMultiplier) + dot(float2(PixelOffset) + float(TileBorderSize), RowMultiplier)); } #else { uint2 TilePos = InputParams.GroupThreadId + uint2(PixelOffset + TileBorderSize); return TilePos.x + TilePos.y * (TileBorderSize * 2 + LDS_BASE_TILE_WIDTH); } #endif } //-------------------------------- Share depth texture fetches. #if defined(AA_PRECACHE_SCENE_DEPTH) // Precache input scene depth into LDS. void PrecacheInputSceneDepthToLDS(in FTAAInputParameters InputParams) { uint2 GroupTexelOffset = GetGroupTileTexelOffset(InputParams, LDS_DEPTH_TILE_BORDER_SIZE); #if AA_PRECACHE_SCENE_DEPTH == 1 // Prefetch depth buffer using Load. { const uint LoadCount = (LDS_DEPTH_ARRAY_SIZE + THREADGROUP_TOTAL - 1) / THREADGROUP_TOTAL; uint LinearGroupThreadId = GetGroupIndex(InputParams); UNROLL for (uint i = 0; i < LoadCount; i++) { uint2 TexelLocation = GroupTexelOffset + uint2( LinearGroupThreadId % LDS_DEPTH_TILE_WIDTH, LinearGroupThreadId / LDS_DEPTH_TILE_WIDTH); if ((LinearGroupThreadId < LDS_DEPTH_ARRAY_SIZE) || (i != LoadCount - 1) || (LDS_DEPTH_ARRAY_SIZE % THREADGROUP_TOTAL) == 0) { GroupSharedArray[LinearGroupThreadId] = SceneDepthTexture.Load(uint3(TexelLocation, 0)).x; } LinearGroupThreadId += THREADGROUP_TOTAL; } } #elif AA_PRECACHE_SCENE_DEPTH == 2 // Prefetch depth buffer using Gather. { const uint LoadCount = (LDS_DEPTH_ARRAY_SIZE / 4 + THREADGROUP_TOTAL - 1) / THREADGROUP_TOTAL; uint LinearGroupThreadId = GetGroupIndex(InputParams); UNROLL for (uint i = 0; i < LoadCount; i++) { uint2 TileDest = uint2( (2 * LinearGroupThreadId) % LDS_DEPTH_TILE_WIDTH, 2 * ((2 * LinearGroupThreadId) / LDS_DEPTH_TILE_WIDTH)); uint2 TexelLocation = GroupTexelOffset + TileDest; uint DestI = TileDest.x + TileDest.y * LDS_DEPTH_TILE_WIDTH; if ((DestI < LDS_DEPTH_ARRAY_SIZE) || (i != LoadCount - 1) || ((LDS_DEPTH_ARRAY_SIZE / 4) % THREADGROUP_TOTAL) == 0) { float2 UV = float2(TexelLocation + 0.5) * InputSceneColorSize.zw; float4 Depth = SceneDepthTexture.Gather(SceneDepthTextureSampler, UV); GroupSharedArray[DestI + 1 * LDS_DEPTH_TILE_WIDTH + 0] = Depth.x; GroupSharedArray[DestI + 1 * LDS_DEPTH_TILE_WIDTH + 1] = Depth.y; GroupSharedArray[DestI + 0 * LDS_DEPTH_TILE_WIDTH + 1] = Depth.z; GroupSharedArray[DestI + 0 * LDS_DEPTH_TILE_WIDTH + 0] = Depth.w; } LinearGroupThreadId += THREADGROUP_TOTAL; } } #else #error Wrong AA_PRECACHE_SCENE_DEPTH #endif } float SampleCachedSceneDepthTexture(in FTAAInputParameters InputParams, int2 PixelOffset) { return GroupSharedArray[GetTileArrayIndexFromPixelOffset(InputParams, PixelOffset, LDS_DEPTH_TILE_BORDER_SIZE)]; } #endif // define(AA_PRECACHE_SCENE_DEPTH) //-------------------------------- Share color texture fetches. #if defined(AA_PRECACHE_SCENE_COLOR) // Return the index GroupSharedArray from a given ArrayIndex and ComponentId. uint GetSceneColorLDSIndex(uint ArrayIndex, uint ComponentId) { #if LDS_COLOR_LAYOUT == 0 // AoS return ArrayIndex * LDS_COLOR_COMPONENT_COUNT + ComponentId; #elif LDS_COLOR_LAYOUT == 1 // SoA return ArrayIndex + ComponentId * LDS_COLOR_ARRAY_SIZE; #else #error Unknown color layout. #endif } // Precache input scene color into LDS. void PrecacheInputSceneColorToLDS(in FTAAInputParameters InputParams) { const uint LoadCount = (LDS_COLOR_ARRAY_SIZE + THREADGROUP_TOTAL - 1) / THREADGROUP_TOTAL; #define LDS_FLOAT_UV AA_UPSAMPLE #if LDS_FLOAT_UV float LinearGroupThreadId = float(GetGroupIndex(InputParams)); float2 Thread0PPCk = GetGroupThread0InputPixelCoord(InputParams); float2 GroupTexelOffset = Thread0PPCk - LDS_COLOR_TILE_BORDER_SIZE; #else uint LinearGroupThreadId = GetGroupIndex(InputParams); uint2 GroupTexelOffset = GetGroupTileTexelOffset(InputParams, LDS_COLOR_TILE_BORDER_SIZE); #endif UNROLL for (uint i = 0; i < LoadCount; i++) { #if LDS_FLOAT_UV float Y = floor(LinearGroupThreadId * (1.0 / LDS_COLOR_TILE_WIDTH)); float X = LinearGroupThreadId - LDS_COLOR_TILE_WIDTH * Y; float2 TexelLocation = GroupTexelOffset + float2(X, Y); #else uint2 TexelLocation = GroupTexelOffset + uint2( LinearGroupThreadId % LDS_COLOR_TILE_WIDTH, LinearGroupThreadId / LDS_COLOR_TILE_WIDTH); #endif if ((LinearGroupThreadId < LDS_COLOR_ARRAY_SIZE) || (i != LoadCount - 1) || (LDS_COLOR_ARRAY_SIZE % THREADGROUP_TOTAL) == 0) { #if LDS_FLOAT_UV int2 Coord = TexelLocation; Coord = clamp(Coord, InputMinPixelCoord, InputMaxPixelCoord); float4 RawColor = InputSceneColor[Coord]; #else int2 Coord = int2(TexelLocation); Coord = clamp(Coord, InputMinPixelCoord, InputMaxPixelCoord); float4 RawColor = InputSceneColor.Load(uint3(Coord, 0)); #endif float4 Color = TransformCurrentFrameSceneColor(RawColor); // Precache scene color's HDR weight into alpha channel to reduce rcp() instructions in innerloops. #if AA_PRECACHE_SCENE_HDR_WEIGHT Color.a = GetSceneColorHdrWeight(InputParams, Color); #endif #if LDS_USE_FLOAT4_ARRAY GroupSharedArrayF4[uint(LinearGroupThreadId)] = Color; #else GroupSharedArray[GetSceneColorLDSIndex(uint(LinearGroupThreadId), 0)] = Color.r; GroupSharedArray[GetSceneColorLDSIndex(uint(LinearGroupThreadId), 1)] = Color.g; GroupSharedArray[GetSceneColorLDSIndex(uint(LinearGroupThreadId), 2)] = Color.b; #if LDS_COLOR_COMPONENT_COUNT == 4 GroupSharedArray[GetSceneColorLDSIndex(uint(LinearGroupThreadId), 3)] = Color.a; #endif #endif } LinearGroupThreadId += THREADGROUP_TOTAL; } } FTAASceneColorSample SampleCachedSceneColorTexture( in FTAAInputParameters InputParams, int2 PixelOffset) { uint ArrayPos = GetTileArrayIndexFromPixelOffset(InputParams, PixelOffset, LDS_COLOR_TILE_BORDER_SIZE); FTAASceneColorSample Sample; Sample.CocRadius = 0; #if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_COC || AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_OPACITY_COC #error Unsupported history payload with LDS scene color caching.Test #endif #if LDS_USE_FLOAT4_ARRAY Sample.Color = GroupSharedArrayF4[ArrayPos]; #if AA_PRECACHE_SCENE_HDR_WEIGHT Sample.HdrWeight = Sample.Color.a; Sample.Color.a = 0; #elif HISTORY_PAYLOAD_COMPONENTS != 4 #error LDS_USE_FLOAT4_ARRAY assumes 4 components. #endif #else Sample.Color.r = GroupSharedArray[GetSceneColorLDSIndex(ArrayPos, 0)]; Sample.Color.g = GroupSharedArray[GetSceneColorLDSIndex(ArrayPos, 1)]; Sample.Color.b = GroupSharedArray[GetSceneColorLDSIndex(ArrayPos, 2)]; Sample.Color.a = 0; #if HISTORY_PAYLOAD_COMPONENTS == 4 Sample.Color.a = GroupSharedArray[GetSceneColorLDSIndex(ArrayPos, 3)]; #elif AA_PRECACHE_SCENE_HDR_WEIGHT Sample.HdrWeight = GroupSharedArray[GetSceneColorLDSIndex(ArrayPos, 3)]; #endif #endif // if scene color weight was not precached in LDS, compute it. #if !AA_PRECACHE_SCENE_HDR_WEIGHT Sample.HdrWeight = GetSceneColorHdrWeight(InputParams, Sample.Color); #endif // Color has already been transformed in PrecacheInputSceneColor. return Sample; } #endif // defined(AA_PRECACHE_SCENE_COLOR) void PrecacheInputSceneDepth(in FTAAInputParameters InputParams) { #if defined(AA_PRECACHE_SCENE_DEPTH) PrecacheInputSceneDepthToLDS(InputParams); GroupMemoryBarrierWithGroupSync(); #endif } void PrecacheInputSceneColor(in FTAAInputParameters InputParams) { #if defined(AA_PRECACHE_SCENE_DEPTH) && defined(AA_PRECACHE_SCENE_COLOR) GroupMemoryBarrierWithGroupSync(); #endif #if defined(AA_PRECACHE_SCENE_COLOR) PrecacheInputSceneColorToLDS(InputParams); GroupMemoryBarrierWithGroupSync(); #endif } #endif // AA_SAMPLE_CACHE_METHOD == AA_SAMPLE_CACHE_METHOD_LDS //------------------------------------------------------- FALLBACK TO NO CACHING IMPLEMENTATIONS. #if !defined(AA_PRECACHE_SCENE_DEPTH) #if !defined(AA_PRECACHE_IMPLEMENTATIONS) // Silently do no scene depth precaching. void PrecacheInputSceneDepth(in FTAAInputParameters InputParams) { } #endif // Sample scene color. float SampleCachedSceneDepthTexture(in FTAAInputParameters InputParams, int2 PixelOffset) { return SceneDepthTexture.SampleLevel(SceneDepthTextureSampler, InputParams.NearestBufferUV, 0, PixelOffset).r; } #endif #if !defined(AA_PRECACHE_SCENE_COLOR) #if !defined(AA_PRECACHE_IMPLEMENTATIONS) // Silently do no scene color precaching. void PrecacheInputSceneColor(in FTAAInputParameters InputParams) { } #endif // Sample and transform scene color. FTAASceneColorSample SampleCachedSceneColorTexture( in FTAAInputParameters InputParams, int2 PixelOffset) { FTAASceneColorSample Sample; int2 Coord = int2(InputParams.NearestBufferUV * InputSceneColorSize.xy) + PixelOffset; Coord = clamp(Coord, InputMinPixelCoord, InputMaxPixelCoord); Sample.Color = TransformCurrentFrameSceneColor(InputSceneColor[Coord]); Sample.CocRadius = 0; #if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_COC Sample.CocRadius = Sample.Color.a; #elif AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_OPACITY_COC Sample.CocRadius = InputSceneMetadata[Coord].r; #endif Sample.HdrWeight = GetSceneColorHdrWeight(InputParams, Sample.Color); return Sample; } #endif //------------------------------------------------------- TEMPORAL UPSAMPLING #if AA_UPSAMPLE // Returns the weight of a pixels at a coordinate from the PDF highest point. float ComputeSampleWeigth(in FTAAIntermediaryResult IntermediaryResult, float2 PixelDelta) { float u2 = UpscaleFactor * UpscaleFactor; // The point of InvFilterScaleFactor is to blur current frame scene color when upscaling. // Therefore there is no need to do it when downscaling. if (!AA_DOWNSAMPLE) { u2 *= (IntermediaryResult.InvFilterScaleFactor * IntermediaryResult.InvFilterScaleFactor); } #if 1 // 1 - 1.9 * x^2 + 0.9 * x^4 float x2 = saturate(u2 * dot(PixelDelta, PixelDelta)); return (0.905 * x2 - 1.9) * x2 + 1; #else // original e ^ (- x^2 / (2 * s^2)) const float Sigma = 0.47; const float ExponentInputFactor = (-0.5 / (Sigma * Sigma)); float x2 = dot(PixelDelta, PixelDelta) * u2; return exp(ExponentInputFactor * x2); #endif } // Returns the weight of a pixels at a coordinate from the PDF highest point. float ComputePixelWeigth(in FTAAIntermediaryResult IntermediaryResult, float2 PixelDelta) { float u2 = UpscaleFactor * UpscaleFactor; // The point of InvFilterScaleFactor is to blur current frame scene color when upscaling. // Therefore there is no need to do it when downscaling. if (!AA_DOWNSAMPLE) { u2 *= (IntermediaryResult.InvFilterScaleFactor * IntermediaryResult.InvFilterScaleFactor); } #if 1 // 1 - 1.9 * x^2 + 0.9 * x^4 float x2 = saturate(u2 * dot(PixelDelta, PixelDelta)); float r = (0.905 * x2 - 1.9) * x2 + 1; #else // original e ^ (- x^2 / (2 * s^2)) const float Sigma = 0.47; const float ExponentInputFactor = (-0.5 / (Sigma * Sigma)); float x2 = dot(PixelDelta, PixelDelta); float r = exp(ExponentInputFactor * x2); #endif if (!AA_DOWNSAMPLE) { // Multiply pixel weight ^ 2 by upscale factor because have only a probability = screen percentage ^ 2 to return 1. // Only do it for upsampling to not converge slower than if doing screen percentage 100%. return u2 * r; } return r; } #endif // AA_UPSAMPLE //------------------------------------------------------- TAA MAJOR FUNCTIONS // Filter input pixels. void FilterCurrentFrameInputSamples( in FTAAInputParameters InputParams, inout FTAAIntermediaryResult IntermediaryResult) { #if !AA_FILTERED { IntermediaryResult.Filtered.Color = SampleCachedSceneColorTexture(InputParams, int2(0, 0)).Color; IntermediaryResult.Filtered.CocRadius = SampleCachedSceneColorTexture(InputParams, int2(0, 0)).CocRadius; return; } #endif FTAAHistoryPayload Filtered; { #if AA_UPSAMPLE // Pixel coordinate of the center of output pixel O in the input viewport. float2 PPCo = InputParams.ViewportUV * InputViewSize.xy + TemporalJitterPixels; // Pixel coordinate of the center of the nearest input pixel K. float2 PPCk = floor(PPCo) + 0.5; // Vector in pixel between pixel K -> O. float2 dKO = PPCo - PPCk; #endif #if AA_SAMPLES == 9 const uint SampleIndexes[9] = kSquareIndexes3x3; #elif AA_SAMPLES == 5 || AA_SAMPLES == 6 const uint SampleIndexes[5] = kPlusIndexes3x3; #endif #if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_COC || AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_OPACITY_COC // Fetches center pixel's Coc for the bilateral filtering. float CenterCocRadius = SampleCachedSceneColorTexture(InputParams, int2(0, 0)).CocRadius; #endif float NeighborsHdrWeight = 0; float NeighborsFinalWeight = 0; float4 NeighborsColor = 0; UNROLL for (uint i = 0; i < AA_SAMPLES; i++) { // Get the sample offset from the nearest input pixel. int2 SampleOffset; #if AA_UPSAMPLE && AA_SAMPLES == 6 if (i == 5) { SampleOffset = SignFastInt(dKO); } else #endif { const uint SampleIndex = SampleIndexes[i]; SampleOffset = kOffsets3x3[SampleIndex]; } float2 fSampleOffset = float2(SampleOffset); // When doing Coc bilateral, the center sample is accumulated last. #if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_COC && 0 if (all(SampleOffset == 0) && (AA_SAMPLES != 6 || i != 5)) { continue; } #endif // Finds out the spatial weight of this input sample. #if AA_UPSAMPLE // Compute the pixel delta between output pixels and input pixel I. // Note: abs() is unecessary because of the dot(dPP, dPP) latter on. float2 dPP = fSampleOffset - dKO; float SampleSpatialWeight = ComputeSampleWeigth(IntermediaryResult, dPP); #elif AA_SAMPLES == 9 float SampleSpatialWeight = GET_SCALAR_ARRAY_ELEMENT(SampleWeights, i); #elif AA_SAMPLES == 5 float SampleSpatialWeight = GET_SCALAR_ARRAY_ELEMENT(PlusWeights, i); #else #error Do not know how to compute filtering sample weight. #endif // Fetch sample. FTAASceneColorSample Sample = SampleCachedSceneColorTexture(InputParams, SampleOffset); // Finds out the sample's HDR weight. #if AA_TONE float SampleHdrWeight = Sample.HdrWeight; #else float SampleHdrWeight = 1; #endif // Finds out the sample's bilateral weight according to the payload. #if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_COC float BilateralWeight = ComputeNeightborSampleBilateralWeight(CenterCocRadius, Sample.CocRadius); #else float BilateralWeight = 1; #endif float SampleFinalWeight = SampleSpatialWeight * SampleHdrWeight * BilateralWeight; // Apply pixel. NeighborsColor += SampleFinalWeight * Sample.Color; NeighborsFinalWeight += SampleFinalWeight; NeighborsHdrWeight += SampleSpatialWeight * SampleHdrWeight; } #if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_COC || AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_OPACITY_COC { FTAASceneColorSample Sample = SampleCachedSceneColorTexture(InputParams, 0); #if AA_UPSAMPLE float SampleWeight = ComputeSampleWeigth(IntermediaryResult, -dKO); #elif AA_SAMPLES == 9 float SampleWeight = GET_SCALAR_ARRAY_ELEMENT(SampleWeights, 4); #else float SampleWeight = GET_SCALAR_ARRAY_ELEMENT(PlusWeights, 2); #endif if (AA_TONE) { SampleWeight *= Sample.HdrWeight; } // TODO: it feels wrong... //Filtered = NeighborsColor * (NeighborsHdrWeight * rcp(NeighborsFinalWeight)) + Sample.Color * SampleWeight; Filtered.Color = NeighborsColor * rcp(NeighborsFinalWeight); Filtered.CocRadius = CenterCocRadius; } #elif AA_TONE || AA_UPSAMPLE { // Reweight because SampleFinalWeight does not that have total sum = 1. Filtered.Color = NeighborsColor * rcp(NeighborsFinalWeight); Filtered.CocRadius = 0; } #else { Filtered.Color = NeighborsColor; Filtered.CocRadius = 0; } #endif #if AA_UPSAMPLE // Compute the temporal weight of the output pixel. IntermediaryResult.FilteredTemporalWeight = ComputePixelWeigth(IntermediaryResult, dKO); #endif } IntermediaryResult.Filtered = Filtered; } // Compute the neighborhood bounding box used to reject history. void ComputeNeighborhoodBoundingbox( in FTAAInputParameters InputParams, in FTAAIntermediaryResult IntermediaryResult, out FTAAHistoryPayload OutNeighborMin, out FTAAHistoryPayload OutNeighborMax) { // TODO: clean this up. FTAAHistoryPayload Neighbors[kNeighborsCount]; UNROLL for (uint i = 0; i < kNeighborsCount; i++) { Neighbors[i].Color = SampleCachedSceneColorTexture(InputParams, kOffsets3x3[i]).Color; Neighbors[i].CocRadius = SampleCachedSceneColorTexture(InputParams, kOffsets3x3[i]).CocRadius; } FTAAHistoryPayload NeighborMin; FTAAHistoryPayload NeighborMax; #if AA_HISTORY_CLAMPING_BOX == HISTORY_CLAMPING_BOX_VARIANCE { #if AA_SAMPLES == 9 const uint SampleIndexes[9] = kSquareIndexes3x3; #elif AA_SAMPLES == 5 const uint SampleIndexes[5] = kPlusIndexes3x3; #else #error Unknown number of samples. #endif float4 m1 = 0; float4 m2 = 0; for( uint i = 0; i < AA_SAMPLES; i++ ) { float4 SampleColor = Neighbors[ SampleIndexes[i] ]; m1 += SampleColor; m2 += Pow2( SampleColor ); } m1 *= (1.0 / AA_SAMPLES); m2 *= (1.0 / AA_SAMPLES); float4 StdDev = sqrt( abs(m2 - m1 * m1) ); NeighborMin = m1 - 1.25 * StdDev; NeighborMax = m1 + 1.25 * StdDev; NeighborMin = min( NeighborMin, IntermediaryResult.Filtered ); NeighborMax = max( NeighborMax, IntermediaryResult.Filtered ); } #elif AA_HISTORY_CLAMPING_BOX == HISTORY_CLAMPING_BOX_SAMPLE_DISTANCE // Do color clamping only within a radius. { float2 PPCo = InputParams.ViewportUV * InputViewSize.xy + TemporalJitterPixels; float2 PPCk = floor(PPCo) + 0.5; float2 dKO = PPCo - PPCk; // Sample 4 is is always going to be considered anyway. NeighborMin = Neighbors[4]; NeighborMax = Neighbors[4]; // Reduce distance threshold as upsacale factor increase to reduce ghosting. float DistthresholdLerp = UpscaleFactor - 1; float DistThreshold = lerp(1.51, 1.3, DistthresholdLerp); #if AA_SAMPLES == 9 const uint Indexes[9] = kSquareIndexes3x3; #else const uint Indexes[5] = kPlusIndexes3x3; #endif UNROLL for( uint i = 0; i < AA_SAMPLES; i++ ) { uint NeightborId = Indexes[i]; if (NeightborId != 4) { float2 dPP = float2(kOffsets3x3[NeightborId]) - dKO; FLATTEN if (dot(dPP, dPP) < (DistThreshold * DistThreshold)) { NeighborMin = MinPayload(NeighborMin, Neighbors[NeightborId]); NeighborMax = MaxPayload(NeighborMax, Neighbors[NeightborId]); } } } } #elif AA_HISTORY_CLAMPING_BOX == HISTORY_CLAMPING_BOX_MIN_MAX { NeighborMin = MinPayload3( Neighbors[1], Neighbors[3], Neighbors[4] ); NeighborMin = MinPayload3( NeighborMin, Neighbors[5], Neighbors[7] ); NeighborMax = MaxPayload3( Neighbors[1], Neighbors[3], Neighbors[4] ); NeighborMax = MaxPayload3( NeighborMax, Neighbors[5], Neighbors[7] ); #if AA_SAMPLES == 6 { float2 PPCo = InputParams.ViewportUV * InputViewSize.xy + TemporalJitterPixels; float2 PPCk = floor(PPCo) + 0.5; float2 dKO = PPCo - PPCk; int2 FifthNeighborOffset = SignFastInt(dKO); FTAAHistoryPayload FifthNeighbor; FifthNeighbor.Color = SampleCachedSceneColorTexture(InputParams, FifthNeighborOffset).Color; FifthNeighbor.CocRadius = SampleCachedSceneColorTexture(InputParams, FifthNeighborOffset).CocRadius; NeighborMin = MinPayload(NeighborMin, FifthNeighbor); NeighborMax = MaxPayload(NeighborMax, FifthNeighbor); } #elif AA_SAMPLES == 9 { FTAAHistoryPayload NeighborMinPlus = NeighborMin; FTAAHistoryPayload NeighborMaxPlus = NeighborMax; NeighborMin = MinPayload3( NeighborMin, Neighbors[0], Neighbors[2] ); NeighborMin = MinPayload3( NeighborMin, Neighbors[6], Neighbors[8] ); NeighborMax = MaxPayload3( NeighborMax, Neighbors[0], Neighbors[2] ); NeighborMax = MaxPayload3( NeighborMax, Neighbors[6], Neighbors[8] ); if( AA_ROUND ) { NeighborMin = AddPayload(MulPayload(NeighborMin, 0.5), MulPayload(NeighborMinPlus, 0.5)); NeighborMax = AddPayload(MulPayload(NeighborMax, 0.5), MulPayload(NeighborMaxPlus, 0.5)); } } #endif } #else #error Unknown history clamping box. #endif OutNeighborMin = NeighborMin; OutNeighborMax = NeighborMax; } // Sample history. FTAAHistoryPayload SampleHistory(in float2 HistoryScreenPosition) { float4 RawHistory0 = 0; float4 RawHistory1 = 0; // Sample the history using Catmull-Rom to reduce blur on motion. #if AA_BICUBIC { float2 HistoryBufferUV = HistoryScreenPosition * ScreenPosToHistoryBufferUV.xy + ScreenPosToHistoryBufferUV.zw; // Clamp HistoryBufferUV so that we don't have to do it entirely for each samples. #if AA_MANUALLY_CLAMP_HISTORY_UV HistoryBufferUV = clamp(HistoryBufferUV, HistoryBufferUVMinMax.xy, HistoryBufferUVMinMax.zw); #endif FCatmullRomSamples Samples = GetBicubic2DCatmullRomSamples(HistoryBufferUV, HistoryBufferSize.xy, HistoryBufferSize.zw); UNROLL for (uint i = 0; i < Samples.Count; i++) { float2 SampleUV = Samples.UV[i]; // Clamp SampleUV within HistoryBufferUVMinMax to avoid sampling potential NaN outside view rect. // This may look expensive, but Samples.UVDir is actually compile time constant to give a hint on what and how each component can be optimally clamped. if (AA_MANUALLY_CLAMP_HISTORY_UV) { if (Samples.UVDir[i].x < 0) { SampleUV.x = max(SampleUV.x, HistoryBufferUVMinMax.x); } else if (Samples.UVDir[i].x > 0) { SampleUV.x = min(SampleUV.x, HistoryBufferUVMinMax.z); } if (Samples.UVDir[i].y < 0) { SampleUV.y = max(SampleUV.y, HistoryBufferUVMinMax.y); } else if (Samples.UVDir[i].y > 0) { SampleUV.y = min(SampleUV.y, HistoryBufferUVMinMax.w); } } RawHistory0 += HistoryBuffer_0.SampleLevel(HistoryBufferSampler_0, SampleUV, 0) * Samples.Weight[i]; } RawHistory0 *= Samples.FinalMultiplier; } // Sample the history using bilinear sampler. #else { // Clamp HistoryScreenPosition to be within viewport. if (AA_MANUALLY_CLAMP_HISTORY_UV) { HistoryScreenPosition = clamp(HistoryScreenPosition, -ScreenPosAbsMax, ScreenPosAbsMax); } float2 HistoryBufferUV = HistoryScreenPosition * ScreenPosToHistoryBufferUV.xy + ScreenPosToHistoryBufferUV.zw; RawHistory0 = HistoryBuffer_0.SampleLevel(HistoryBufferSampler_0, HistoryBufferUV, 0); } #endif #if HISTORY_RENDER_TARGETS == 2 { if (AA_MANUALLY_CLAMP_HISTORY_UV) { HistoryScreenPosition = clamp(HistoryScreenPosition, -ScreenPosAbsMax, ScreenPosAbsMax); } float2 HistoryBufferUV = HistoryScreenPosition * ScreenPosToHistoryBufferUV.xy + ScreenPosToHistoryBufferUV.zw; RawHistory1 = HistoryBuffer_1.SampleLevel(HistoryBufferSampler_1, HistoryBufferUV, 0); } #endif FTAAHistoryPayload HistoryPayload; HistoryPayload.Color = RawHistory0; #if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_OPACITY_COC HistoryPayload.CocRadius = RawHistory1.r; #else HistoryPayload.CocRadius = RawHistory0.a; #endif #if COMPILER_SUPPORTS_HLSL2021 CorrectExposure(HistoryPayload.Color.rgb); #else HistoryPayload.Color.rgb *= HistoryPreExposureCorrection; #endif HistoryPayload.Color = TransformSceneColor(HistoryPayload.Color); return HistoryPayload; } // Clamp history. FTAAHistoryPayload ClampHistory(inout FTAAIntermediaryResult IntermediaryResult, FTAAHistoryPayload History, FTAAHistoryPayload NeighborMin, FTAAHistoryPayload NeighborMax) { #if !AA_CLAMP return History; #elif AA_CLIP // Clip history, this uses color AABB intersection for tighter fit. //float4 TargetColor = 0.5 * ( NeighborMin + NeighborMax ); float4 TargetColor = Filtered; float ClipBlend = HistoryClip( HistoryColor.rgb, TargetColor.rgb, NeighborMin.rgb, NeighborMax.rgb ); //float DistToClamp = saturate(-ClipBlend) / ( saturate(-ClipBlend) + 1 ); //float DistToClamp = abs( ClipBlend ) / ( 1 - ClipBlend ); ClipBlend = saturate( ClipBlend ); HistoryColor = lerp( HistoryColor, TargetColor, ClipBlend ); #if AA_FORCE_ALPHA_CLAMP HistoryColor.a = clamp( HistoryColor.a, NeighborMin.a, NeighborMax.a ); #endif return HistoryColor; #else //!AA_CLIP History.Color = clamp(History.Color, NeighborMin.Color, NeighborMax.Color); History.CocRadius = clamp(History.CocRadius, NeighborMin.CocRadius, NeighborMax.CocRadius); return History; #endif } //------------------------------------------------------- TAA MAIN FUNCTION FTAAHistoryPayload TemporalAASample(uint2 GroupId, uint2 GroupThreadId, uint GroupThreadIndex, float2 ViewportUV, float FrameExposureScale) { // SETUP // ----- FTAAInputParameters InputParams; // Per frame setup. InputParams.FrameExposureScale = ToScalarMemory(FrameExposureScale * View.OneOverPreExposure); // Per pixel setup. { InputParams.GroupId = GroupId; InputParams.GroupThreadId = GroupThreadId; InputParams.GroupThreadIndex = GroupThreadIndex; InputParams.ViewportUV = ViewportUV; InputParams.ScreenPos = ViewportUVToScreenPos(ViewportUV); InputParams.NearestBufferUV = ViewportUV * ViewportUVToInputBufferUV.xy + ViewportUVToInputBufferUV.zw; // Handle single or multi-pass responsive AA #if AA_SINGLE_PASS_RESPONSIVE { const uint kResponsiveStencilMask = 1 << 3; int2 SceneStencilUV = (int2)trunc(InputParams.NearestBufferUV * InputSceneColorSize.xy); uint SceneStencilRef = StencilTexture.Load(int3(SceneStencilUV, 0)) STENCIL_COMPONENT_SWIZZLE; InputParams.bIsResponsiveAAPixel = (SceneStencilRef & kResponsiveStencilMask) ? 1.f : 0.f; } #elif TAA_RESPONSIVE InputParams.bIsResponsiveAAPixel = 1.f; #else InputParams.bIsResponsiveAAPixel = 0.f; #endif #if AA_UPSAMPLE { // Pixel coordinate of the center of output pixel O in the input viewport. float2 PPCo = ViewportUV * InputViewSize.xy + TemporalJitterPixels; // Pixel coordinate of the center of the nearest input pixel K. float2 PPCk = floor(PPCo) + 0.5; // Pixel coordinate of the center of the nearest top left input pixel T. float2 PPCt = floor(PPCo - 0.5) + 0.5; InputParams.NearestBufferUV = InputSceneColorSize.zw * (InputViewMin + PPCk); InputParams.NearestTopLeftBufferUV = InputSceneColorSize.zw * (InputViewMin + PPCt); // TODO: because use nearest sampler, can be faster in this computation. } #endif } // Setup intermediary results. FTAAIntermediaryResult IntermediaryResult = CreateIntermediaryResult(); // FIND MOTION OF PIXEL AND NEAREST IN NEIGHBORHOOD // ------------------------------------------------ float3 PosN; // Position of this pixel, possibly later nearest pixel in neighborhood. PosN.xy = InputParams.ScreenPos; PrecacheInputSceneDepth(InputParams); PosN.z = SampleCachedSceneDepthTexture(InputParams, int2(0, 0)); // Screen position of minimum depth. float2 VelocityOffset = float2(0.0, 0.0); #if AA_CROSS // TODO: 2x2. { // For motion vector, use camera/dynamic motion from min depth pixel in pattern around pixel. // This enables better quality outline on foreground against different motion background. // Larger 2 pixel distance "x" works best (because AA dilates surface). float4 Depths; Depths.x = SampleCachedSceneDepthTexture(InputParams, int2(-AA_CROSS, -AA_CROSS)); Depths.y = SampleCachedSceneDepthTexture(InputParams, int2( AA_CROSS, -AA_CROSS)); Depths.z = SampleCachedSceneDepthTexture(InputParams, int2(-AA_CROSS, AA_CROSS)); Depths.w = SampleCachedSceneDepthTexture(InputParams, int2( AA_CROSS, AA_CROSS)); float2 DepthOffset = float2(AA_CROSS, AA_CROSS); float DepthOffsetXx = float(AA_CROSS); #if HAS_INVERTED_Z_BUFFER // Nearest depth is the largest depth (depth surface 0=far, 1=near). if(Depths.x > Depths.y) { DepthOffsetXx = -AA_CROSS; } if(Depths.z > Depths.w) { DepthOffset.x = -AA_CROSS; } float DepthsXY = max(Depths.x, Depths.y); float DepthsZW = max(Depths.z, Depths.w); if(DepthsXY > DepthsZW) { DepthOffset.y = -AA_CROSS; DepthOffset.x = DepthOffsetXx; } float DepthsXYZW = max(DepthsXY, DepthsZW); if(DepthsXYZW > PosN.z) { // This is offset for reading from velocity texture. // This supports half or fractional resolution velocity textures. // With the assumption that UV position scales between velocity and color. VelocityOffset = DepthOffset * InputSceneColorSize.zw; // This is [0 to 1] flipped in Y. //PosN.xy = ScreenPos + DepthOffset * OutputViewportSize.zw * 2.0; PosN.z = DepthsXYZW; } #else // !HAS_INVERTED_Z_BUFFER #error Fix me! #endif // !HAS_INVERTED_Z_BUFFER } #endif // AA_CROSS // Camera motion for pixel or nearest pixel (in ScreenPos space). bool OffScreen = false; float Velocity = 0; float HistoryBlur = 0; float2 HistoryScreenPosition = InputParams.ScreenPos; #if 1 { float4 ThisClip = float4( PosN.xy, PosN.z, 1 ); float4 PrevClip = mul( ThisClip, View.ClipToPrevClip ); float2 PrevScreen = PrevClip.xy / PrevClip.w; float2 BackN = PosN.xy - PrevScreen; float2 BackTemp = BackN * OutputViewportSize.xy; #if AA_DYNAMIC { ENCODED_VELOCITY_TYPE EncodedVelocity = SampleVelocityTexture(InputParams.NearestBufferUV + VelocityOffset); bool DynamicN = EncodedVelocity.x > 0.0; if(DynamicN) { BackN = DecodeVelocityFromTexture(EncodedVelocity).xy; } BackTemp = BackN * OutputViewportSize.xy; } #endif Velocity = sqrt(dot(BackTemp, BackTemp)); #if !AA_BICUBIC // Save the amount of pixel offset of just camera motion, used later as the amount of blur introduced by history. float HistoryBlurAmp = 2.0; HistoryBlur = saturate(abs(BackTemp.x) * HistoryBlurAmp + abs(BackTemp.y) * HistoryBlurAmp); #endif // Easier to do off screen check before conversion. // BackN is in units of 2pixels/viewportWidthInPixels // This converts back projection vector to [-1 to 1] offset in viewport. HistoryScreenPosition = InputParams.ScreenPos - BackN; // Detect if HistoryBufferUV would be outside of the viewport. OffScreen = max(abs(HistoryScreenPosition.x), abs(HistoryScreenPosition.y)) >= 1.0; } #endif // Precache input scene color. PrecacheInputSceneColor(/* inout = */ InputParams); // Filter input. #if AA_UPSAMPLE_ADAPTIVE_FILTERING == 0 FilterCurrentFrameInputSamples( InputParams, /* inout = */ IntermediaryResult); #endif // Compute neighborhood bounding box. FTAAHistoryPayload NeighborMin; FTAAHistoryPayload NeighborMax; ComputeNeighborhoodBoundingbox( InputParams, /* inout = */ IntermediaryResult, NeighborMin, NeighborMax); // Sample history. FTAAHistoryPayload History = SampleHistory(HistoryScreenPosition); // Whether the feedback needs to be reset. bool IgnoreHistory = OffScreen || bCameraCut; // DYNAMIC ANTI GHOSTING // --------------------- #if AA_DYNAMIC_ANTIGHOST && AA_DYNAMIC && HISTORY_PAYLOAD_COMPONENTS == 3 bool Dynamic4; { #if !AA_DYNAMIC #error AA_DYNAMIC_ANTIGHOST requires AA_DYNAMIC #endif // TODO: try a 2x2 for AA_UPSAMPLE bool Dynamic1 = SampleVelocityTexture(InputParams.NearestBufferUV, int2( 0, -1)).x > 0; bool Dynamic3 = SampleVelocityTexture(InputParams.NearestBufferUV, int2(-1, 0)).x > 0; Dynamic4 = SampleVelocityTexture(InputParams.NearestBufferUV).x > 0; bool Dynamic5 = SampleVelocityTexture(InputParams.NearestBufferUV, int2( 1, 0)).x > 0; bool Dynamic7 = SampleVelocityTexture(InputParams.NearestBufferUV, int2( 0, 1)).x > 0; bool Dynamic = Dynamic1 || Dynamic3 || Dynamic4 || Dynamic5 || Dynamic7; IgnoreHistory = IgnoreHistory || (!Dynamic && History.Color.a > 0); } #endif // Save off luma of history before the clamp. float LumaMin = GetSceneColorLuma4(NeighborMin.Color); float LumaMax = GetSceneColorLuma4(NeighborMax.Color); float LumaHistory = GetSceneColorLuma4(History.Color); // Clamp history. FTAAHistoryPayload PreClampingHistoryColor = History; History = ClampHistory(IntermediaryResult, History, NeighborMin, NeighborMax); // Filter input after color clamping. #if AA_UPSAMPLE_ADAPTIVE_FILTERING == 1 { #if AA_VARIANCE #error AA_VARIANCE and AA_UPSAMPLE_ADAPTIVE_FILTERING are not compatible because of circular code dependency. #endif if (IgnoreHistory) // || any(HistoryColor != PreClampingHistoryColor)) { // Set the input filter infinitely large when we know need to rely on it. IntermediaryResult.InvFilterScaleFactor = 0; } // Blur input according to input pixel velocity to reduce blocky filtering cause by narrow filter on low screen percentage. // Multiplied by upscale factor because Velocity is in output resolution based pixel velocity. IntermediaryResult.InvFilterScaleFactor -= (Velocity * UpscaleFactor) * 0.1; // Set a minimal filtering scale to screen percentage to not unecessarily blur the input more than the screen percentage. IntermediaryResult.InvFilterScaleFactor = max(IntermediaryResult.InvFilterScaleFactor, ScreenPercentage); FilterCurrentFrameInputSamples( InputParams, /* inout = */ IntermediaryResult); } #endif // ADD BACK IN ALIASING TO SHARPEN // ------------------------------- #if AA_FILTERED && !AA_BICUBIC { #if AA_UPSAMPLE #error Temporal upsample does not support sharpen. #endif // Blend in non-filtered based on the amount of sub-pixel motion. float AddAliasing = saturate(HistoryBlur) * 0.5; float LumaContrastFactor = 32.0; #if AA_YCOCG // TODO: Probably a bug arround here because using Luma4() even with YCOCG=0. // 1/4 as bright. LumaContrastFactor *= 4.0; #endif float LumaContrast = LumaMax - LumaMin; AddAliasing = saturate(AddAliasing + rcp(1.0 + LumaContrast * LumaContrastFactor)); IntermediaryResult.Filtered.Color = lerp(IntermediaryResult.Filtered.Color, SampleCachedSceneColorTexture(InputParams, int2(0, 0)).Color, AddAliasing); } #endif // COMPUTE BLEND AMOUNT // -------------------- float BlendFinal; { float LumaFiltered = GetSceneColorLuma4(IntermediaryResult.Filtered.Color); BlendFinal = IntermediaryResult.FilteredTemporalWeight * CurrentFrameWeight; BlendFinal = lerp(BlendFinal, 0.2, saturate(Velocity / 40)); #if 0 { // Anti-flicker float DistToClamp = 2 * abs(min(LumaHistory - LumaMin, LumaMax - LumaHistory) / (LumaMax - LumaMin)); //BlendFinal *= lerp( 0, 1, saturate(4 * DistToClamp) ); BlendFinal += 0.8 * saturate(0.02 * LumaHistory / abs(Filtered.x - LumaHistory)); BlendFinal *= (LumaMin * InExposureScale + 0.5) / (LumaMax * InExposureScale + 0.5); } #endif // Make sure to have at least some small contribution BlendFinal = max( BlendFinal, saturate( 0.01 * LumaHistory / abs( LumaFiltered - LumaHistory ) ) ); #if AA_NAN && (COMPILER_GLSL || COMPILER_METAL) // The current Metal & GLSL compilers don't handle saturate(NaN) -> 0, instead they return NaN/INF. BlendFinal = -min(-BlendFinal, 0.0); #endif // Responsive forces 1/4 of new frame. BlendFinal = InputParams.bIsResponsiveAAPixel ? (1.0/4.0) : BlendFinal; #if AA_LERP BlendFinal = 1.0/float(AA_LERP); #endif #if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_COC || AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_OPACITY_COC { float BilateralWeight = ComputeBilateralWeight(IntermediaryResult.Filtered.CocRadius, History.CocRadius); BlendFinal = lerp(BlendFinal, 1, (1-BilateralWeight)*CoCBilateralFilterStrength); } #endif if (bCameraCut) { BlendFinal = 1.0; } } // Offscreen feedback resets. if (IgnoreHistory) { History = IntermediaryResult.Filtered; #if HISTORY_PAYLOAD_COMPONENTS == 3 History.Color.a = 0.0; #endif } // DO FINAL BLEND BETWEEN HISTORY AND FILTERED COLOR // ------------------------------------------------- // Luma weighted blend float FilterWeight = GetSceneColorHdrWeight(InputParams, IntermediaryResult.Filtered.Color.x); float HistoryWeight = GetSceneColorHdrWeight(InputParams, History.Color.x); FTAAHistoryPayload OutputPayload; { float2 Weights = WeightedLerpFactors(HistoryWeight, FilterWeight, BlendFinal); OutputPayload = AddPayload(MulPayload(History, Weights.x), MulPayload(IntermediaryResult.Filtered, Weights.y)); } // Ensure that alpha values that are expected to be opaque (but are only close to opaque) are forced to be opaque. // (0.995 chosen to accomodate handling of 254/255) if (OutputPayload.Color.a > 0.995) { OutputPayload.Color.a = 1; } OutputPayload.Color = TransformBackToRawLinearSceneColor(OutputPayload.Color); #if AA_NAN // Transform NaNs to black, transform negative colors to black. OutputPayload.Color = -min(-OutputPayload.Color, 0.0); OutputPayload.CocRadius = isnan(OutputPayload.CocRadius) ? 0.0 : OutputPayload.CocRadius; #endif #if HISTORY_PAYLOAD_COMPONENTS == 3 #if AA_DYNAMIC_ANTIGHOST && AA_DYNAMIC OutputPayload.Color.a = Dynamic4 ? 1 : 0; #else // Zero out to remove any prior computation of alpha OutputPayload.Color.a = 0; #endif #endif return OutputPayload; } //------------------------------------------------------- ENTRY POINTS #if COMPUTESHADER [numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)] void MainCS( uint2 DispatchThreadId : SV_DispatchThreadID, uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID, uint GroupThreadIndex : SV_GroupIndex) { float2 ViewportUV = (float2(DispatchThreadId) + 0.5f) * OutputViewportSize.zw; #if AA_LOWER_RESOLUTION { ViewportUV = (float2(DispatchThreadId) + 0.5f) * MaxViewportUVAndSvPositionToViewportUV.zw; ViewportUV = min(ViewportUV, MaxViewportUVAndSvPositionToViewportUV.xy); } #endif float FrameExposureScale = EyeAdaptationLookup(); FTAAHistoryPayload OutputPayload = TemporalAASample(GroupId, GroupThreadId, GroupThreadIndex, ViewportUV, FrameExposureScale); float4 OutColor0 = 0; float4 OutColor1 = 0; #if AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_COC { OutColor0.rgb = OutputPayload.Color.rgb; OutColor0.a = OutputPayload.CocRadius; } #elif AA_HISTORY_PAYLOAD == HISTORY_PAYLOAD_RGB_OPACITY_COC { OutColor0 = OutputPayload.Color; OutColor1.r = OutputPayload.CocRadius; } #else { OutColor0 = OutputPayload.Color; } #endif uint2 PixelPos = DispatchThreadId + OutputViewportRect.xy; if (all(PixelPos < OutputViewportRect.zw)) { float4 FinalOutput0 = min(MaxHalfFloat.xxxx, OutColor0); #if AA_ENABLE_STOCASTIC_QUANTIZATION { FinalOutput0.rgb = QuantizeFloatColor(FinalOutput0.rgb, OutputQuantizationError, PixelPos, QUANTIZE_NOISE_HAMMERSLEY); } #endif OutComputeTex_0[PixelPos] = FinalOutput0; #if HISTORY_RENDER_TARGETS == 2 OutComputeTex_1[PixelPos] = OutColor1; #endif } #if TAA_DOWNSAMPLE { // This shader permutation outputs half resolution image in addition to main full-res one. // It is more efficient than performing a separate downsampling pass afterwards. // Assumes output resolution is always even. uint P0 = GroupThreadId.x + GroupThreadId.y * THREADGROUP_SIZEX; uint P1 = P0 + 1; uint P2 = P0 + THREADGROUP_SIZEX; uint P3 = P2 + 1; GroupSharedDownsampleArray[P0] = OutColor0; GroupMemoryBarrierWithGroupSync(); if (((GroupThreadId.x | GroupThreadId.y) & 1) == 0) { OutComputeTexDownsampled[PixelPos / 2] = (OutColor0 + GroupSharedDownsampleArray[P1] + GroupSharedDownsampleArray[P2] + GroupSharedDownsampleArray[P3]) * 0.25; } } #endif //TAA_DOWNSAMPLE } #elif PIXELSHADER // Mobile Only, supports Main and MainUpsampling void MainPS( float4 SvPosition : SV_POSITION, out float4 OutColor0 : SV_Target0 ) { float2 ViewportUV = (SvPosition.xy - OutputViewportRect.xy) * OutputViewportSize.zw; uint2 GroupId = int2(0, 0); uint2 GroupThreadId = int2(0, 0); uint GroupThreadIndex = 0; float FrameExposureScale = EyeAdaptationLookup(); FTAAHistoryPayload OutputPayload = TemporalAASample(GroupId, GroupThreadId, GroupThreadIndex, ViewportUV, FrameExposureScale); OutColor0 = OutputPayload.Color; } #endif