// Copyright Epic Games, Inc. All Rights Reserved. #include "TSRKernels.ush" #include "TSRSpatialAntiAliasing.ush" #include "TSRColorSpace.ush" #include "TSRReprojectionField.ush" //------------------------------------------------------- DEBUG #define DEBUG_ARRAY_SIZE 8 //------------------------------------------------------- ENUMS /** Shape of the scene color kernel. */ // One single sample #define CONFIG_SAMPLES_1X1 0 // Full 3x3 #define CONFIG_SAMPLES_3X3 1 // 3x3 plus #define CONFIG_SAMPLES_PLUS 2 // 3x3 plus with a 6th sample in the closest corner. #define CONFIG_SAMPLES_PLUS_AND_CORNER 3 // 3x3 plus but disable the furtherest samples for clamping box. #define CONFIG_SAMPLES_PLUS_DISABLE_FAR 4 // 3x3 plus but move the furtherest towards the closest corner. #define CONFIG_SAMPLES_PLUS_MOVE_FAR 5 //------------------------------------------------------- CONFIG #if DIM_UPDATE_QUALITY == 0 // Low #define CONFIG_SAMPLES CONFIG_SAMPLES_PLUS #define CONFIG_REJECTION_ANTI_ALIASING 0 #elif DIM_UPDATE_QUALITY == 1 // Medium #define CONFIG_SAMPLES CONFIG_SAMPLES_PLUS #define CONFIG_REJECTION_ANTI_ALIASING 1 #elif DIM_UPDATE_QUALITY == 2 // High #define CONFIG_SAMPLES CONFIG_SAMPLES_PLUS_MOVE_FAR #define CONFIG_REJECTION_ANTI_ALIASING 1 #elif DIM_UPDATE_QUALITY == 3 // Epic #define CONFIG_SAMPLES CONFIG_SAMPLES_PLUS_MOVE_FAR #define CONFIG_REJECTION_ANTI_ALIASING 1 #else #error Unknown history update quality #endif //------------------------------------------------------- DEFAULTS /** Controls whether the shader is capable to do history resurrection. */ #define CONFIG_HISTORY_RESURRECTION 1 /** Controls whether the shader is capable to use the reprojection field. */ #define CONFIG_REPROJECTION_FIELD 1 /** Whether lens distortion is supported. */ #define CONFIG_LENS_DISTORTION TSR_SUPPORT_LENS_DISTORTION /** Whether the history should do any rejection of any kind. */ #define CONFIG_CLAMP 1 /** Defines the number of samples of the scene color. */ #if CONFIG_SAMPLES == CONFIG_SAMPLES_1X1 #define CONFIG_SAMPLES_COUNT 1 #elif CONFIG_SAMPLES == CONFIG_SAMPLES_3X3 #define CONFIG_SAMPLES_COUNT 9 #elif CONFIG_SAMPLES == CONFIG_SAMPLES_PLUS_6 #define CONFIG_SAMPLES_COUNT 6 #else #define CONFIG_SAMPLES_COUNT 5 #endif /** Defines whether can completly skip the history reprojection when offscreen on fully rejected. */ #define CONFIG_SELECTIVE_REPROJECTION 1 /** Controls number of pixel to process per lane. */ #if CONFIG_COMPILE_FP16 // Take advantage of RDNA's v_pk_*_{uif}16 instructions #define CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION 1 #else #define CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION 0 #endif #ifndef PLATFORM_SPECIFIC_ISOLATE #define PLATFORM_SPECIFIC_ISOLATE ISOLATE #endif //------------------------------------------------------- CONSTANTS #define TILE_SIZE 8 #define CONFIG_METADATA_CHANNELS 1 #if CONFIG_METADATA_CHANNELS == 2 #define tsr_halfM tsr_half2 #define tsr_halfMx2 tsr_half2x2 #elif CONFIG_METADATA_CHANNELS == 1 #define tsr_halfM tsr_half #define tsr_halfMx2 tsr_half2 #else #error Unknown CONFIG_METADATA_CHANNELS #endif //------------------------------------------------------- PARAMETERS Texture2D InputSceneColorTexture; Texture2D HistoryRejectionTexture; #if CONFIG_REPROJECTION_FIELD Texture2D ReprojectionBoundaryTexture; Texture2D ReprojectionJacobianTexture; #endif Texture2D ReprojectionVectorTexture; Texture2D AntiAliasingTexture; #if CONFIG_LENS_DISTORTION Texture2D PrevDistortingDisplacementTexture; Texture2D ResurrectedDistortingDisplacementTexture; Texture2D UndistortingDisplacementTexture; float DistortionOverscan; #endif FScreenTransform HistoryPixelPosToViewportUV; FScreenTransform ViewportUVToInputPPCo; FScreenTransform HistoryPixelPosToScreenPos; FScreenTransform HistoryPixelPosToInputPPCo; float3 HistoryQuantizationError; float HistorySampleCount; float HistoryHisteresis; float WeightClampingRejection; float WeightClampingPixelSpeedAmplitude; float InvWeightClampingPixelSpeed; float InputToHistoryFactor; float ResurrectionFrameIndex; float PrevFrameIndex; #if CONFIG_LENS_DISTORTION uint bLensDistortion; #endif #if CONFIG_REPROJECTION_FIELD uint bReprojectionField; #endif uint bGenerateOutputMip1; uint bGenerateOutputMip2; uint bGenerateOutputMip3; uint HistoryArrayIndices_HighFrequency; Texture2DArray PrevHistoryColorTexture; Texture2DArray PrevHistoryMetadataTexture; RWTexture2DArray HistoryColorOutput; RWTexture2DArray HistoryMetadataOutput; RWTexture2DArray SceneColorOutputMip1; //------------------------------------------------------- LDS #if CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION #define GROUP_SHARED_SIZE (TILE_SIZE * TILE_SIZE / 2) #else #define GROUP_SHARED_SIZE (TILE_SIZE * TILE_SIZE) #endif groupshared tsr_half4 SharedArray0[GROUP_SHARED_SIZE]; groupshared tsr_half4 SharedArray1[GROUP_SHARED_SIZE]; groupshared tsr_half4 SharedArray2[GROUP_SHARED_SIZE]; groupshared tsr_half4 SharedArray3[GROUP_SHARED_SIZE]; groupshared tsr_half4 SharedArray4[GROUP_SHARED_SIZE]; groupshared tsr_half4 SharedArray5[GROUP_SHARED_SIZE]; groupshared tsr_half4 SharedArray6[GROUP_SHARED_SIZE]; groupshared tsr_half4 SharedArray7[GROUP_SHARED_SIZE]; groupshared tsr_half4 SharedArray8[GROUP_SHARED_SIZE]; groupshared tsr_half4 SharedArray9[GROUP_SHARED_SIZE]; groupshared tsr_half4 SharedArray10[GROUP_SHARED_SIZE]; groupshared tsr_half4 SharedArray11[GROUP_SHARED_SIZE]; //------------------------------------------------------- WAVE #if COMPILER_SUPPORTS_MED3 CALL_SITE_DEBUGLOC tsr_half3x2 med3(tsr_half3x2 a, tsr_half3x2 b, tsr_half3x2 c) { return tsr_half3x2( med3(a[0], b[0], c[0]), med3(a[1], b[1], c[1]), med3(a[2], b[2], c[2])); } CALL_SITE_DEBUGLOC tsr_half4x2 med3(tsr_half4x2 a, tsr_half4x2 b, tsr_half4x2 c) { return tsr_half4x2( med3(a[0], b[0], c[0]), med3(a[1], b[1], c[1]), med3(a[2], b[2], c[2]), med3(a[3], b[3], c[3])); } #endif #if PLATFORM_SUPPORTS_WAVE_BROADCAST CALL_SITE_DEBUGLOC tsr_half3x2 WaveBroadcast(const FWaveBroadcastSettings Settings, tsr_half3x2 v) { return tsr_half3x2( WaveBroadcast(Settings, v[0]), WaveBroadcast(Settings, v[1]), WaveBroadcast(Settings, v[2])); } CALL_SITE_DEBUGLOC tsr_half4x2 WaveBroadcast(const FWaveBroadcastSettings Settings, tsr_half4x2 v) { return tsr_half4x2( WaveBroadcast(Settings, v[0]), WaveBroadcast(Settings, v[1]), WaveBroadcast(Settings, v[2]), WaveBroadcast(Settings, v[3])); } #endif //------------------------------------------------------- FUNCTIONS CALL_SITE_DEBUGLOC tsr_ushort2x2 Map8x8Tile2x2LaneDPV(uint GroupThreadIndex) { tsr_ushort2 GroupId = Map8x8Tile2x2Lane(GroupThreadIndex); #if CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION return dpv_interleave_registers(GroupId, GroupId + tsr_ushort2(0, 8 / 2)); #else return dpv_interleave_mono_registers(GroupId); #endif } CALL_SITE_DEBUGLOC tsr_half3x2 GetColorChannel(tsr_half4x2 Color) { return dpv_interleave_registers( dpv_lo(Color).rgb, dpv_hi(Color).rgb); } void ComputeInputKernelSamplePosition( tsr_short2x2 PixelPos, tsr_half2x2 dKO, const uint SampleId, int2 PixelPosMin, int2 PixelPosMax, out tsr_short2x2 SamplePixelPos, out tsr_half2x2 PixelOffset, out bool2 bIsDuplicatedSample, out bool2 bIsDisabledSample) { tsr_short2x2 iKOSign; tsr_half2x2 KOSign; #if CONFIG_COMPILE_FP16 iKOSign[0] = int16_t(1) - int16_t2((asuint16(dKO[0]) & uint16_t(0x8000)) >> uint16_t(14)); iKOSign[1] = int16_t(1) - int16_t2((asuint16(dKO[1]) & uint16_t(0x8000)) >> uint16_t(14)); KOSign[0] = asfloat16(asuint16(half(1.0)).xx | (asuint16(dKO[0]) & uint16_t(0x8000))); KOSign[1] = asfloat16(asuint16(half(1.0)).xx | (asuint16(dKO[1]) & uint16_t(0x8000))); #else iKOSign = dpv_interleave_registers( SignFastInt(dpv_lo(dKO)), SignFastInt(dpv_hi(dKO))); KOSign[0] = asfloat(asuint(1.0).xx | (asuint(dKO[0]) & uint(0x80000000))); KOSign[1] = asfloat(asuint(1.0).xx | (asuint(dKO[1]) & uint(0x80000000))); #endif bIsDuplicatedSample = false; bIsDisabledSample = false; #if CONFIG_SAMPLES == CONFIG_SAMPLES_3X3 { tsr_short2 iPixelOffset = tsr_short2(kOffsets3x3[kSquareIndexes3x3[SampleId]]); PixelOffset = dpv_interleave_registers(tsr_half2(iPixelOffset), tsr_half2(iPixelOffset)); SamplePixelPos = PixelPos + ClampPixelOffset( PixelPos, dpv_interleave_registers(iPixelOffset, iPixelOffset), iPixelOffset, PixelPosMin, PixelPosMax); } #elif CONFIG_SAMPLES == CONFIG_SAMPLES_PLUS_MOVE_FAR { tsr_half2x2 SquareShape; #if CONFIG_COMPILE_FP16 SquareShape[0] = asfloat16(asuint16(half(1.0)).xx | (asuint16(abs(dKO[0]) - tsr_half(0.25).xx) & uint16_t(0x8000))); SquareShape[1] = asfloat16(asuint16(half(1.0)).xx | (asuint16(abs(dKO[1]) - tsr_half(0.25).xx) & uint16_t(0x8000))); #else SquareShape[0] = asfloat(asuint(1.0).xx | (asuint(abs(dKO[0]) - float(0.25).xx) & 0x8000u)); SquareShape[1] = asfloat(asuint(1.0).xx | (asuint(abs(dKO[1]) - float(0.25).xx) & 0x8000u)); #endif PixelOffset = tsr_half(0); SamplePixelPos = PixelPos; if (SampleId == 0) { // NOP } else if (SampleId == 1) { PixelOffset[0] = KOSign[0]; SamplePixelPos[0] += tsr_short2(PixelOffset[0]); SamplePixelPos[0] = ClampPixelOffset(SamplePixelPos, PixelPosMin, PixelPosMax)[0]; } else if (SampleId == 2) { PixelOffset[1] = KOSign[1]; SamplePixelPos[1] += tsr_short2(PixelOffset[1]); SamplePixelPos[1] = ClampPixelOffset(SamplePixelPos, PixelPosMin, PixelPosMax)[1]; } else if (SampleId == 3) { PixelOffset[0] = SquareShape[0] * KOSign[0]; PixelOffset[1] = saturate(SquareShape[0]) * KOSign[1]; SamplePixelPos[0] += tsr_short2(PixelOffset[0]); SamplePixelPos[1] += tsr_short2(PixelOffset[1]); SamplePixelPos = ClampPixelOffset(SamplePixelPos, PixelPosMin, PixelPosMax); } else // if (SampleId == 4) { PixelOffset[0] = saturate(SquareShape[1]) * KOSign[0]; PixelOffset[1] = SquareShape[1] * KOSign[1]; bIsDuplicatedSample = (SquareShape[0] + SquareShape[1]) == tsr_half(2.0); SamplePixelPos[0] += tsr_short2(PixelOffset[0]); SamplePixelPos[1] += tsr_short2(PixelOffset[1]); SamplePixelPos = ClampPixelOffset(SamplePixelPos, PixelPosMin, PixelPosMax); } } #elif CONFIG_SAMPLES == CONFIG_SAMPLES_PLUS || CONFIG_SAMPLES == CONFIG_SAMPLES_PLUS_DISABLE_FAR || CONFIG_SAMPLES == CONFIG_SAMPLES_PLUS_AND_CORNER { #if CONFIG_SAMPLES == CONFIG_SAMPLES_PLUS_AND_CORNER if (SampleId == 5) { PixelOffset = KOSign; SamplePixelPos = ClampPixelOffset(PixelPos + iKOSign, PixelPosMin, PixelPosMax); } else #endif { const tsr_short2 iPixelOffset = tsr_short2(kOffsets3x3[kPlusIndexes3x3[SampleId]]); PixelOffset = dpv_interleave_registers(tsr_half2(iPixelOffset), tsr_half2(iPixelOffset)); SamplePixelPos = PixelPos + ClampPixelOffset( PixelPos, dpv_interleave_registers(iPixelOffset, iPixelOffset), iPixelOffset, PixelPosMin, PixelPosMax); #if CONFIG_SAMPLES == CONFIG_SAMPLES_PLUS_DISABLE_FAR if (all(iPixelOffset == tsr_short2(+1, +0))) { bIsDisabledSample = dKO[0] < tsr_half(-0.25); } else if (all(iPixelOffset == tsr_short2(+0, +1))) { bIsDisabledSample = dKO[1] < tsr_half(-0.25); } else if (all(iPixelOffset == tsr_short2(-1, +0))) { bIsDisabledSample = dKO[0] > tsr_half(+0.25); } else if (all(iPixelOffset == tsr_short2(+0, -1))) { bIsDisabledSample = dKO[1] > tsr_half(+0.25); } #endif } } #elif CONFIG_SAMPLES == CONFIG_SAMPLES_1X1 { SamplePixelPos = PixelPos; PixelOffset = 0; } #else #error Unknown sample count #endif } template T BilinearSampleColorHistory(Texture2DArray Texture, float3 SampleUV) #if 1 { return Texture.SampleLevel(GlobalBilinearClampedSampler, SampleUV, 0); } #else { FBilinearSampleInfos BilinearInter = GetBilinearSampleLevelInfos(SampleUV.xy, PrevHistoryInfo_Extent, PrevHistoryInfo_ExtentInverse); T Return = 0.0; UNROLL_N(4) for (uint i = 0; i < 4; i++) { float BilinearWeight = GetSampleWeight(BilinearInter, i); uint2 PixelPos = GetSamplePixelCoord(BilinearInter, i); PixelPos = fastClamp(PixelPos, PrevHistoryInfo_ViewportMin, PrevHistoryInfo_ViewportMax - 1); T RawSample = Texture[uint3(PixelPos, SampleUV.z)]; if (View.GeneralPurposeTweak == 1.0) RawSample.rgb *= tsr_half(HdrWeight4(RawSample.rgb)); Return += RawSample * tsr_half(BilinearWeight); } if (View.GeneralPurposeTweak == 1.0) Return.rgb *= tsr_half(HdrWeightInvY(Luma4(Return.rgb))); return Return; } #endif tsr_halfCx2 DownsampleSceneColor(tsr_halfCx2 SceneColor, uint LocalGroupThreadIndex, const uint2 XorButterFly) { SceneColor = SceneColor * tsr_half(0.25); // Forces the * tsr_half(0.25) to be applied before to avoid turning bright pixels to +inf in the adds below. #if CONFIG_FP16_PRECISE_MULTIPLY_ORDER #if CONFIG_SCENE_COLOR_ALPHA const tsr_halfC ColorMax10BitsFloat = tsr_half(Max10BitsFloat * 0.25).xxxx; #else const tsr_halfC ColorMax10BitsFloat = tsr_half(Max10BitsFloat * 0.25).xxx; #endif SceneColor = dpv_min(SceneColor, ColorMax10BitsFloat); #endif #if PLATFORM_SUPPORTS_WAVE_BROADCAST if (uint(XorButterFly.x * 2) <= WaveGetLaneCount()) { FWaveBroadcastSettings Horizontal = InitWaveXorButterfly(XorButterFly.x); SceneColor += WaveBroadcast(Horizontal, SceneColor); } else #endif { #if CONFIG_SCENE_COLOR_ALPHA SharedArray0[LocalGroupThreadIndex] = dpv_lo(SceneColor); #if CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION SharedArray1[LocalGroupThreadIndex] = dpv_hi(SceneColor); #endif #else SharedArray0[LocalGroupThreadIndex] = tsr_half4(dpv_lo(SceneColor), 0.0); #if CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION SharedArray1[LocalGroupThreadIndex] = tsr_half4(dpv_hi(SceneColor), 0.0); #endif #endif #if PLATFORM_SUPPORTS_WAVE_BROADCAST const bool bBarrier = XorButterFly.x * 0x2 > WaveGetLaneCount(); #else const bool bBarrier = XorButterFly.x * 0x2 > 0x8; #endif if (bBarrier) { GroupMemoryBarrierWithGroupSync(); } uint OtherGroupThreadIndex = LocalGroupThreadIndex ^ (XorButterFly.x % GROUP_SHARED_SIZE); tsr_halfCx2 OtherSceneColor; #if CONFIG_SCENE_COLOR_ALPHA #if CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION OtherSceneColor = dpv_interleave_registers(SharedArray0[OtherGroupThreadIndex], SharedArray1[OtherGroupThreadIndex]); #else OtherSceneColor = dpv_interleave_mono_registers(SharedArray0[OtherGroupThreadIndex]); #endif #else #if CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION OtherSceneColor = dpv_interleave_registers(SharedArray0[OtherGroupThreadIndex].rgb, SharedArray1[OtherGroupThreadIndex].rgb); #else OtherSceneColor = dpv_interleave_mono_registers(SharedArray0[OtherGroupThreadIndex].rgb); #endif #endif if (bBarrier) { GroupMemoryBarrierWithGroupSync(); } SceneColor += OtherSceneColor; } if (XorButterFly.y == 0x20 && CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION) { SceneColor = dpv_interleave_mono_registers(dpv_lo(SceneColor) + dpv_hi(SceneColor)); } #if PLATFORM_SUPPORTS_WAVE_BROADCAST else if (uint(XorButterFly.y * 2) <= WaveGetLaneCount()) { FWaveBroadcastSettings Vertical = InitWaveXorButterfly(XorButterFly.y); SceneColor += WaveBroadcast(Vertical, SceneColor); } else #endif { #if CONFIG_SCENE_COLOR_ALPHA SharedArray0[LocalGroupThreadIndex] = dpv_lo(SceneColor); #if CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION SharedArray1[LocalGroupThreadIndex] = dpv_hi(SceneColor); #endif #else SharedArray0[LocalGroupThreadIndex] = tsr_half4(dpv_lo(SceneColor), 0.0); #if CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION SharedArray1[LocalGroupThreadIndex] = tsr_half4(dpv_hi(SceneColor), 0.0); #endif #endif #if PLATFORM_SUPPORTS_WAVE_BROADCAST const bool bBarrier = XorButterFly.y * 0x2 > WaveGetLaneCount(); #else const bool bBarrier = XorButterFly.y * 0x2 > 0x8; #endif if (bBarrier) { GroupMemoryBarrierWithGroupSync(); } uint OtherGroupThreadIndex = LocalGroupThreadIndex ^ (XorButterFly.y % GROUP_SHARED_SIZE); tsr_halfCx2 OtherSceneColor; #if CONFIG_SCENE_COLOR_ALPHA #if CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION OtherSceneColor = dpv_interleave_registers(SharedArray0[OtherGroupThreadIndex], SharedArray1[OtherGroupThreadIndex]); #else OtherSceneColor = dpv_interleave_mono_registers(SharedArray0[OtherGroupThreadIndex]); #endif #else #if CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION OtherSceneColor = dpv_interleave_registers(SharedArray0[OtherGroupThreadIndex].rgb, SharedArray1[OtherGroupThreadIndex].rgb); #else OtherSceneColor = dpv_interleave_mono_registers(SharedArray0[OtherGroupThreadIndex].rgb); #endif #endif if (bBarrier) { GroupMemoryBarrierWithGroupSync(); } SceneColor += OtherSceneColor; } return SceneColor; } #if CONFIG_LENS_DISTORTION /** Samples a displacement texture and crops any values beyond the texture's original bounds */ float2 SampleDisplacementTexture(Texture2D DisplacementTexture, float2 TexCoord, float2 ViewportUV) { // Convert the destination viewport UV to the displacement map's original frustum, and compute whether this value is in or out of that frustum float2 UnoverscannedUV = (ViewportUV - float2(0.5, 0.5)) * DistortionOverscan + float2(0.5, 0.5); bool bCropPixel = UnoverscannedUV.x < 0.0 || UnoverscannedUV.x > 1.0 || UnoverscannedUV.y < 0.0 || UnoverscannedUV.y > 1.0; return select(bCropPixel, float2(0.0, 0.0), DisplacementTexture.SampleLevel(GlobalBilinearClampedSampler, TexCoord, 0)); } float2 ApplyDisplacementTextureOnScreenPos(Texture2D DisplacementTexture, float2 DistortedScreenPos, float2 ViewportScreenPos) { float2 DistortedUV = ScreenPosToViewportUV(DistortedScreenPos); float2 ViewportUV = ScreenPosToViewportUV(ViewportScreenPos); float2 UndistortedUV = DistortedUV + SampleDisplacementTexture(DisplacementTexture, DistortedUV, ViewportUV); return ViewportUVToScreenPos(UndistortedUV); } #endif #if CONFIG_REPROJECTION_FIELD void FetchVelocityDilateBoundary( tsr_short2x2 InputPixelPos, out uint EncodedReprojectionBoundary[DPV_PIXEL_PER_LANE], out bool bApplyReprojectionBoundary) { BRANCH if (bReprojectionField) PLATFORM_SPECIFIC_ISOLATE { UNROLL_N(DPV_PIXEL_PER_LANE) for (uint PixelId = 0; PixelId < DPV_PIXEL_PER_LANE; PixelId++) { tsr_ushort2 LocalInputPixelPos = dpv_access_pixel(InputPixelPos, PixelId); EncodedReprojectionBoundary[PixelId] = ReprojectionBoundaryTexture[LocalInputPixelPos]; } const uint EncodedReprojectionNoBoundary = EncodeReprojectionBoundary(/* ReprojectionOffset = */ tsr_short2(0, 0), /* ReprojectionBoundary = */ kFullDilateBoundary); bApplyReprojectionBoundary = false; UNROLL_N(DPV_PIXEL_PER_LANE) for (uint PixelId = 0; PixelId < DPV_PIXEL_PER_LANE; PixelId++) { bApplyReprojectionBoundary |= EncodedReprojectionBoundary[PixelId] != EncodedReprojectionNoBoundary; } } else { UNROLL_N(DPV_PIXEL_PER_LANE) for (uint PixelId = 0; PixelId < DPV_PIXEL_PER_LANE; PixelId++) { EncodedReprojectionBoundary[PixelId] = 0; } bApplyReprojectionBoundary = false; } } // FetchVelocityDilateBoundary #endif // CONFIG_REPROJECTION_FIELD //------------------------------------------------------- ENTRY POINT #if CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION [numthreads(TILE_SIZE * TILE_SIZE / 2, 1, 1)] #else [numthreads(TILE_SIZE * TILE_SIZE, 1, 1)] #endif void MainCS( uint2 GroupId : SV_GroupID, uint GroupThreadIndex : SV_GroupIndex) { #if CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION uint GroupWaveIndex = GetGroupWaveIndex(GroupThreadIndex, TILE_SIZE * TILE_SIZE / 2); #else uint GroupWaveIndex = GetGroupWaveIndex(GroupThreadIndex, TILE_SIZE * TILE_SIZE); #endif #if DEBUG_OUTPUT float4x2 Debug[DEBUG_ARRAY_SIZE]; for (uint DebugId = 0; DebugId < DEBUG_ARRAY_SIZE; DebugId++) { Debug[DebugId] = 0.0; } #endif tsr_short2x2 HistoryPixelPos = dpv_add( tsr_short2(GroupId) * tsr_short2(TILE_SIZE, TILE_SIZE), tsr_short2x2(Map8x8Tile2x2LaneDPV(GroupThreadIndex))); float2x2 ScreenPos; float2x2 InputPPCo; #if CONFIG_LENS_DISTORTION BRANCH if (bLensDistortion) { float2x2 DestViewportUV = ApplyScreenTransform(float2x2(HistoryPixelPos), HistoryPixelPosToViewportUV); float2x2 SrcViewportUV; UNROLL_N(DPV_PIXEL_PER_LANE) for (uint PixelId = 0; PixelId < DPV_PIXEL_PER_LANE; PixelId++) { float2 LocalDestViewportUV = dpv_access_pixel(DestViewportUV, PixelId); float2 LocalSrcViewportUV = LocalDestViewportUV + SampleDisplacementTexture(UndistortingDisplacementTexture, LocalDestViewportUV, LocalDestViewportUV); SrcViewportUV[0][PixelId] = LocalSrcViewportUV.x; SrcViewportUV[1][PixelId] = LocalSrcViewportUV.y; } ScreenPos[0][0] = ViewportUVToScreenPos(dpv_access_pixel(SrcViewportUV, 0)).x; ScreenPos[1][0] = ViewportUVToScreenPos(dpv_access_pixel(SrcViewportUV, 0)).y; ScreenPos[0][1] = ViewportUVToScreenPos(dpv_access_pixel(SrcViewportUV, 1)).x; ScreenPos[1][1] = ViewportUVToScreenPos(dpv_access_pixel(SrcViewportUV, 1)).y; // Pixel coordinate of the center of output pixel O in the input viewport. InputPPCo = ApplyScreenTransform(SrcViewportUV, ViewportUVToInputPPCo); } else #endif { ScreenPos = ApplyScreenTransform(float2x2(HistoryPixelPos), HistoryPixelPosToScreenPos); // Pixel coordinate of the center of output pixel O in the input viewport. InputPPCo = ApplyScreenTransform(float2x2(HistoryPixelPos), HistoryPixelPosToInputPPCo); } // Pixel coordinate of the center of the nearest input pixel K in the input viewport. float2x2 InputPPCk = floor(InputPPCo) + 0.5; tsr_short2x2 InputPixelPos = ClampPixelOffset( tsr_short2x2(InputPPCo), InputPixelPosMin, InputPixelPosMax); // Fetch the reprojection anti-aliasing. #if CONFIG_REPROJECTION_FIELD uint EncodedReprojectionBoundary[DPV_PIXEL_PER_LANE]; bool bApplyReprojectionBoundary; FetchVelocityDilateBoundary(InputPixelPos, /* out */ EncodedReprojectionBoundary, /* out */ bApplyReprojectionBoundary); #endif // CONFIG_REPROJECTION_FIELD // Fetch reprojection-related information. float2x2 PrevScreenPos = ScreenPos; tsr_half2 LowFrequencyRejection = tsr_half(1.0).xx; tsr_half2 DisableHistoryClamp = tsr_half(1.0).xx; tsr_half2 DecreaseValidityMultiplier = tsr_half(0.0).xx; tsr_half2 OutputPixelVelocity = tsr_half(0.0).xx; tsr_half2 NoiseFiltering = tsr_half(0.0).xx; tsr_half2 ReprojectionUpscaleCorrection = tsr_half(1.0).xx; bool2 bIsParallaxRejected = false; #if CONFIG_HISTORY_RESURRECTION bool2 bIsHistoryResurrection = false; #else const bool2 bIsHistoryResurrection = false; #endif #if CONFIG_REJECTION_ANTI_ALIASING tsr_half2x2 SpatialAntiAliasingOffset = dpv_interleave_mono_registers(tsr_half(0.0).xx); #endif PLATFORM_SPECIFIC_ISOLATE { uint RawEncodedReprojectionVector[DPV_PIXEL_PER_LANE]; tsr_half4 RawHistoryRejection[DPV_PIXEL_PER_LANE]; #if CONFIG_REJECTION_ANTI_ALIASING tsr_ushort2 RawEncodedInputTexelOffset[DPV_PIXEL_PER_LANE]; #endif #if CONFIG_REPROJECTION_FIELD uint RawEncodedReprojectionJacobian[DPV_PIXEL_PER_LANE]; tsr_half2 JacobianCoordinate[DPV_PIXEL_PER_LANE]; tsr_half4 RawDilatedHistoryRejection[DPV_PIXEL_PER_LANE]; #endif // Issue overlapped texture fetches PLATFORM_SPECIFIC_ISOLATE { tsr_half2x2 dInputKO = tsr_half2x2(InputPPCo - InputPPCk); UNROLL_N(DPV_PIXEL_PER_LANE) for (uint PixelId = 0; PixelId < DPV_PIXEL_PER_LANE; PixelId++) { tsr_ushort2 LocalInputPixelPos = dpv_access_pixel(InputPixelPos, PixelId); #if CONFIG_REPROJECTION_FIELD tsr_short2 BoundaryDilateOffset; BRANCH if (bApplyReprojectionBoundary) { tsr_short2 ReprojectionOffset = DecodeReprojectionOffset(EncodedReprojectionBoundary[PixelId]); tsr_half2 ReprojectionBoundary = DecodeReprojectionBoundary(EncodedReprojectionBoundary[PixelId]); bool bHistoryPixelWithinOffsetBoundary = IsHistoryPixelWithinOffsetBoundary( dpv_access_pixel(dInputKO, PixelId), ReprojectionBoundary, 1.0 / InputToHistoryFactor); BoundaryDilateOffset = select(bHistoryPixelWithinOffsetBoundary, ReprojectionOffset, -ReprojectionOffset); //Debug[0][0][PixelId] = bHistoryPixelWithinOffsetBoundary && any(ReprojectionOffset != tsr_short(0)); //Debug[0][1][PixelId] = ((LocalInputPixelPos.x ^ LocalInputPixelPos.y) & 0x1) == 0; } else { BoundaryDilateOffset = tsr_short2(0, 0); } JacobianCoordinate[PixelId] = dpv_access_pixel(dInputKO, PixelId) - tsr_half2(BoundaryDilateOffset); tsr_ushort2 LocalInputPixelPosWithReprojectionAA = LocalInputPixelPos + BoundaryDilateOffset; #else tsr_ushort2 LocalInputPixelPosWithReprojectionAA = LocalInputPixelPos; #endif RawEncodedReprojectionVector[PixelId] = ReprojectionVectorTexture[LocalInputPixelPosWithReprojectionAA]; #if CONFIG_REPROJECTION_FIELD RawEncodedReprojectionJacobian[PixelId] = ReprojectionJacobianTexture[LocalInputPixelPosWithReprojectionAA]; RawDilatedHistoryRejection[PixelId] = HistoryRejectionTexture[LocalInputPixelPosWithReprojectionAA]; #endif RawHistoryRejection[PixelId] = HistoryRejectionTexture[LocalInputPixelPos]; #if CONFIG_REJECTION_ANTI_ALIASING { RawEncodedInputTexelOffset[PixelId] = AntiAliasingTexture[LocalInputPixelPos]; } #endif } } // Process texture fetches. PLATFORM_SPECIFIC_ISOLATE { // Process velocity { float2x2 ReprojectionVector; for (uint PixelId = 0; PixelId < DPV_PIXEL_PER_LANE; PixelId++) { #if CONFIG_REPROJECTION_FIELD float2 ReprojectionScreenPosCorrection; BRANCH if (bReprojectionField) { uint EncodedReprojectionJacobian = RawEncodedReprojectionJacobian[PixelId]; tsr_half2x2 ReprojectionJacobian = DecodeReprojectionJacobian(EncodedReprojectionJacobian); float2 ReprojectionPixelPosCorrection = float2(mul(JacobianCoordinate[PixelId], ReprojectionJacobian)); //ReprojectionScreenPosCorrection = ReprojectionPixelPosCorrection * (float2(2, -2) * InputInfo_ViewportSizeInverse); ReprojectionScreenPosCorrection = ReprojectionPixelPosCorrection * InputPixelPosToScreenPos.xy; ReprojectionUpscaleCorrection[PixelId] = rcp(max(ComputeReprojectionUpscaleFactorFromJacobian(ReprojectionJacobian), 1.0)); } else { ReprojectionUpscaleCorrection[PixelId] = tsr_half(1.0); ReprojectionScreenPosCorrection = float(0.0); } #else // !CONFIG_REPROJECTION_FIELD const float2 ReprojectionScreenPosCorrection = 0.0; #endif uint EncodedReprojectionVector = RawEncodedReprojectionVector[PixelId]; float2 LocalReprojectionVector = DecodeReprojectionVector(EncodedReprojectionVector) + float2(ReprojectionScreenPosCorrection); ReprojectionVector[0][PixelId] = LocalReprojectionVector[0]; ReprojectionVector[1][PixelId] = LocalReprojectionVector[1]; } PrevScreenPos = ScreenPos - ReprojectionVector; OutputPixelVelocity = tsr_half2(dpv_length(dpv_mul(ReprojectionVector, HistoryInfo_ViewportSize))); } // Unpacks bits masks { #if CONFIG_REPROJECTION_FIELD tsr_ushort2 BitMask = tsr_ushort2(round(dpv_interleave_registers_array(RawDilatedHistoryRejection)[3] * tsr_half(255.0))); #else tsr_ushort2 BitMask = tsr_ushort2(round(dpv_interleave_registers_array(RawHistoryRejection)[3] * tsr_half(255.0))); #endif bIsParallaxRejected = (BitMask & tsr_ushort(0x1)) == tsr_ushort(0); #if CONFIG_HISTORY_RESURRECTION bIsHistoryResurrection = (BitMask & tsr_ushort(0x2)) != tsr_ushort(0); #endif } // Process input texel, forcing to pack register. LowFrequencyRejection = dpv_force_interleave_registers_array(RawHistoryRejection)[0]; DisableHistoryClamp = dpv_force_interleave_registers_array(RawHistoryRejection)[1]; DecreaseValidityMultiplier = dpv_force_interleave_registers_array(RawHistoryRejection)[2]; #if CONFIG_REPROJECTION_FIELD { LowFrequencyRejection = min(LowFrequencyRejection , dpv_force_interleave_registers_array(RawDilatedHistoryRejection)[0]); DisableHistoryClamp = min(DisableHistoryClamp , dpv_force_interleave_registers_array(RawDilatedHistoryRejection)[1]); DecreaseValidityMultiplier = max(DecreaseValidityMultiplier, dpv_force_interleave_registers_array(RawDilatedHistoryRejection)[2]); } #endif } #if CONFIG_REJECTION_ANTI_ALIASING { tsr_ushort2 EncodedInputTexelOffset = dpv_force_interleave_registers_array(RawEncodedInputTexelOffset)[0]; NoiseFiltering = tsr_half2(dpv_force_interleave_registers_array(RawEncodedInputTexelOffset)[1]) * rcp(tsr_half(255.0)); SpatialAntiAliasingOffset = DecodeSpatialAntiAliasingOffset(EncodedInputTexelOffset); } #endif } // Apply lens distortion on the reprojection. #if CONFIG_LENS_DISTORTION BRANCH if (bLensDistortion) { float2x2 UndistortedScreenPos = ApplyScreenTransform(float2x2(HistoryPixelPos), HistoryPixelPosToScreenPos); UNROLL_N(DPV_PIXEL_PER_LANE) for (uint PixelId = 0; PixelId < DPV_PIXEL_PER_LANE; PixelId++) { float2 LocalUndistortedScreenPos = dpv_access_pixel(UndistortedScreenPos, PixelId); float2 LocalPrevScreenPos = dpv_access_pixel(PrevScreenPos, PixelId); float2 LocalSrcScreenPos = ApplyDisplacementTextureOnScreenPos(PrevDistortingDisplacementTexture, LocalPrevScreenPos, LocalUndistortedScreenPos); //LocalSrcScreenPos = lerp(LocalUndistortedScreenPos, LocalSrcScreenPos, saturate(OutputPixelVelocity[PixelId] * tsr_half(8.0))); #if CONFIG_HISTORY_RESURRECTION float2 LocalSrcResurrectedScreenPos = ApplyDisplacementTextureOnScreenPos(ResurrectedDistortingDisplacementTexture, LocalPrevScreenPos, LocalUndistortedScreenPos); LocalSrcScreenPos = select(bIsHistoryResurrection[PixelId], LocalSrcResurrectedScreenPos, LocalSrcScreenPos); #endif PrevScreenPos[0][PixelId] = LocalSrcScreenPos.x; PrevScreenPos[1][PixelId] = LocalSrcScreenPos.y; } } #endif // Detect whether the history reprojection is successful bool2 bIsOffScreen; bool2 bIsDisoccluded; float Overscan = 1.0f; #if CONFIG_LENS_DISTORTION Overscan = DistortionOverscan; #endif // When there is a distortion overscan, this causes a hard edge where the distortion map ends at its original frustum. Consider // this edge as off screen (which it will be after being cropped at the end of the post process chain) to prevent ghosting from the edge IsOffScreenOrDisoccluded( bCameraCut, PrevScreenPos * Overscan, bIsParallaxRejected, /* out */ bIsOffScreen, /* out */ bIsDisoccluded); // Final post processing. #if !CONFIG_CLAMP { bIsDisoccluded = false; #if CONFIG_HISTORY_RESURRECTION bIsHistoryResurrection = false; #endif LowFrequencyRejection = tsr_half(1.0).xx; LowFrequencyClamp = tsr_half(1.0).xx; } #endif // Reproject history tsr_halfCx2 PrevHighFrequencyColor; tsr_half2 PrevHistoryValidity; #if CONFIG_SELECTIVE_REPROJECTION BRANCH if (all(or(LowFrequencyRejection <= tsr_half(0.0).xx, bIsOffScreen))) { PrevHighFrequencyColor = tsr_half(0.0); PrevHistoryValidity = tsr_half(0.0); } else #endif PLATFORM_SPECIFIC_ISOLATE { tsr_halfC RawHighFrequency[BICUBIC_CATMULL_ROM_SAMPLES][DPV_PIXEL_PER_LANE]; tsr_halfM RawMetadata[BICUBIC_CATMULL_ROM_SAMPLES][DPV_PIXEL_PER_LANE]; tsr_half2 RawKernelWeight[BICUBIC_CATMULL_ROM_SAMPLES]; // Issues texture fetches. PLATFORM_SPECIFIC_ISOLATE { float2x2 PrevHistoryBufferUV = ApplyScreenTransform(PrevScreenPos, ScreenPosToPrevHistoryBufferUV); PrevHistoryBufferUV[0] = fastClamp(PrevHistoryBufferUV[0], PrevHistoryInfo_UVViewportBilinearMin[0], PrevHistoryInfo_UVViewportBilinearMax[0]); PrevHistoryBufferUV[1] = fastClamp(PrevHistoryBufferUV[1], PrevHistoryInfo_UVViewportBilinearMin[1], PrevHistoryInfo_UVViewportBilinearMax[1]); FCatmullRomSamples Samples0 = GetBicubic2DCatmullRomSamples_Stubbe( dpv_lo(PrevHistoryBufferUV), PrevHistoryInfo_Extent, PrevHistoryInfo_ExtentInverse); FCatmullRomSamples Samples1 = GetBicubic2DCatmullRomSamples_Stubbe( dpv_hi(PrevHistoryBufferUV), PrevHistoryInfo_Extent, PrevHistoryInfo_ExtentInverse); UNROLL_N(BICUBIC_CATMULL_ROM_SAMPLES) for (uint i = 0; i < BICUBIC_CATMULL_ROM_SAMPLES; i++) { float2 SampleUV0 = fastClamp(Samples0.UV[i], PrevHistoryInfo_UVViewportBilinearMin, PrevHistoryInfo_UVViewportBilinearMax); float2 SampleUV1 = fastClamp(Samples1.UV[i], PrevHistoryInfo_UVViewportBilinearMin, PrevHistoryInfo_UVViewportBilinearMax); tsr_half2 KernelWeight = dpv_interleave_registers(tsr_half(Samples0.Weight[i]), tsr_half(Samples1.Weight[i])); RawKernelWeight[i] = KernelWeight; UNROLL_N(2) for (uint PixelId = 0; PixelId < DPV_PIXEL_PER_LANE; PixelId++) { float2 SampleUV = PixelId == 0 ? SampleUV0 : SampleUV1; #if CONFIG_HISTORY_RESURRECTION float FrameIndex = select(bIsHistoryResurrection[PixelId], ResurrectionFrameIndex, PrevFrameIndex); #else float FrameIndex = PrevFrameIndex; #endif RawHighFrequency[i][PixelId] = BilinearSampleColorHistory(PrevHistoryColorTexture, float3(SampleUV, FrameIndex)); RawMetadata[i][PixelId] = PrevHistoryMetadataTexture.SampleLevel(GlobalBilinearClampedSampler, float3(SampleUV, FrameIndex), 0); } } // for (uint i = 0; i < BICUBIC_CATMULL_ROM_SAMPLES; i++) } #if CONFIG_HISTORY_RESURRECTION const tsr_half2 PreExposureCorrection = select( bIsHistoryResurrection, tsr_half(ResurrectionPreExposureCorrection), tsr_half(HistoryPreExposureCorrection)); #else const tsr_half2 PreExposureCorrection = tsr_half(HistoryPreExposureCorrection); #endif // Process history texture fetches. tsr_halfCx2 AccumulateHighFrequency = tsr_half(0); tsr_halfMx2 AccumulateMetadata = tsr_half(0); PLATFORM_SPECIFIC_ISOLATE { UNROLL_N(BICUBIC_CATMULL_ROM_SAMPLES) for (uint i = 0; i < BICUBIC_CATMULL_ROM_SAMPLES; i++) { // TODO: Should use dpv_force_interleave_registers_array() but there is a shader compiler bug tsr_halfCx2 SampleHighFrequency = dpv_interleave_registers_array(RawHighFrequency[i]); tsr_halfMx2 SampleMetadata = dpv_interleave_registers_array(RawMetadata[i]); tsr_half2 KernelWeight = RawKernelWeight[i]; AccumulateHighFrequency[0] += SampleHighFrequency[0] * (KernelWeight * PreExposureCorrection); AccumulateHighFrequency[1] += SampleHighFrequency[1] * (KernelWeight * PreExposureCorrection); AccumulateHighFrequency[2] += SampleHighFrequency[2] * (KernelWeight * PreExposureCorrection); #if CONFIG_SCENE_COLOR_ALPHA AccumulateHighFrequency[3] += SampleHighFrequency[3] * KernelWeight; #endif AccumulateMetadata += dpv_scale(SampleMetadata, KernelWeight); } } // Corrects history. { // Super bight highlights have a tendy to generate negatives bool2 bHasAnyNegativeOrNaN = or(AccumulateHighFrequency[0] < 0.0, or(AccumulateHighFrequency[1] < 0.0, AccumulateHighFrequency[2] < 0.0)); #if CONFIG_SCENE_COLOR_ALPHA bHasAnyNegativeOrNaN = or(bHasAnyNegativeOrNaN, AccumulateHighFrequency[3] < 0.0); #endif BRANCH if (any(bHasAnyNegativeOrNaN)) { AccumulateHighFrequency[0] = select(bHasAnyNegativeOrNaN, dpv_interleave_registers_array(RawHighFrequency[2])[0] * PreExposureCorrection, AccumulateHighFrequency[0]); AccumulateHighFrequency[1] = select(bHasAnyNegativeOrNaN, dpv_interleave_registers_array(RawHighFrequency[2])[1] * PreExposureCorrection, AccumulateHighFrequency[1]); AccumulateHighFrequency[2] = select(bHasAnyNegativeOrNaN, dpv_interleave_registers_array(RawHighFrequency[2])[2] * PreExposureCorrection, AccumulateHighFrequency[2]); #if CONFIG_SCENE_COLOR_ALPHA AccumulateHighFrequency[3] = select(bHasAnyNegativeOrNaN, dpv_interleave_registers_array(RawHighFrequency[2])[3], AccumulateHighFrequency[3]); #endif #if CONFIG_SCENE_COLOR_ALPHA AccumulateHighFrequency = -dpv_min(-AccumulateHighFrequency, tsr_half(0.0).xxxx); #else AccumulateHighFrequency = -dpv_min(-AccumulateHighFrequency, tsr_half(0.0).xxx); #endif } AccumulateMetadata = -dpv_min(-AccumulateMetadata, tsr_half(0.0).xx); } // Unpack history. { PrevHighFrequencyColor = AccumulateHighFrequency; #if CONFIG_METADATA_CHANNELS == 1 PrevHistoryValidity = AccumulateMetadata; #else PrevHistoryValidity = AccumulateMetadata[0]; #endif } } // TODO: CONFIG_MANUAL_LDS_SPILL // Filter input scene color at predictor frequency. tsr_halfCx2 FilteredInputColor; tsr_halfCx2 InputMinColor; tsr_halfCx2 InputMaxColor; tsr_half2 InputPixelAlignement; tsr_half2 BlendClamp; tsr_half2 PrevWeight; tsr_half2 CurrentWeight; PLATFORM_SPECIFIC_ISOLATE { tsr_halfC RawInputColorArray[CONFIG_SAMPLES_COUNT][DPV_PIXEL_PER_LANE]; tsr_half2x2 RawdPPArray[CONFIG_SAMPLES_COUNT]; tsr_half2 RawSampleSpatialWeightArray[CONFIG_SAMPLES_COUNT]; bool2 bIsDisabledSampleArray[CONFIG_SAMPLES_COUNT]; // Issues overlapped texture fetches PLATFORM_SPECIFIC_ISOLATE { // Detect if HistoryBufferUV would be outside of the viewport. tsr_half2 SpatialAntiAliasingLerp = select(or(bIsOffScreen, and(bIsDisoccluded, !bIsHistoryResurrection)), tsr_half(1.0).xx, saturate(tsr_half(1.0) - LowFrequencyRejection * tsr_half(4.0))); #if CONFIG_REJECTION_ANTI_ALIASING { InputPPCo += dpv_scale(SpatialAntiAliasingOffset, SpatialAntiAliasingLerp); InputPPCk = floor(InputPPCo) + 0.5; InputPixelPos = ClampPixelOffset( tsr_short2x2(InputPPCo), InputPixelPosMin, InputPixelPosMax); } #endif // Vector in pixel between pixel K -> O. tsr_half2x2 dInputKO = tsr_half2x2(InputPPCo - InputPPCk); // Compute upscaling kernel size based of the rejections and number of samples already in history. tsr_half2 KernelInputToHistoryFactor; { const tsr_half ActualHistorySampleCount = tsr_half(HistorySampleCount); const tsr_half ActualHistoryHisteresis = tsr_half(HistoryHisteresis); tsr_half2 MinRejectionBlendFactor = RejectionFactorToBlendFactor(min(LowFrequencyRejection, ReprojectionUpscaleCorrection)); MinRejectionBlendFactor = select(bIsOffScreen, tsr_half(1.0).xx, MinRejectionBlendFactor); tsr_half2 CoarseInputPixelAlignement = ComputeSampleWeigth(tsr_half(1.0), dInputKO, /* MinimalContribution = */ float(0.0)); tsr_half2 IdealInputPixelAlignement = ComputeSampleWeigth(tsr_half(InputToHistoryFactor), dInputKO, /* MinimalContribution = */ float(0.0)); tsr_half2 CoarseCurrentContribution = CoarseInputPixelAlignement * tsr_half(ActualHistoryHisteresis); tsr_half2 IdealCurrentContribution = IdealInputPixelAlignement * tsr_half(ActualHistoryHisteresis); // Reduces the number of pixels when rejecting history tsr_half2 ClampedPrevHistoryValidity = PrevHistoryValidity; #if 1 { tsr_half2 MaxValidity = tsr_half(1.0) - tsr_half(WeightClampingRejection) * DecreaseValidityMultiplier; ClampedPrevHistoryValidity = min(ClampedPrevHistoryValidity, MaxValidity); } #endif const tsr_half MaxWeight = tsr_half(1.0); // Compute the histeresis if was refining the history tsr_half2 RefiningHisteresis = select( ClampedPrevHistoryValidity + IdealCurrentContribution > tsr_half(0.0), IdealCurrentContribution * rcp(ClampedPrevHistoryValidity + IdealCurrentContribution), tsr_half(1.0)); // Compute the prev weight with coarse pixel alignment when rejecting the history. tsr_half2 CoarseRejectedPrevWeight = min( CoarseCurrentContribution * ComputePrevWeightMultiplier(MinRejectionBlendFactor), MaxWeight); // Compute the prev weight with coarse pixel alignment when refining. tsr_half2 CoarseRefiningPrevWeight = min( CoarseCurrentContribution * ComputePrevWeightMultiplier(RefiningHisteresis), MaxWeight); // Do not refine when rejected previous weight is lower than refining with coarse pixel alignment. // bIsNotRefining = CoarseRejectedPrevWeight < CoarseRefiningPrevWeight tsr_half2 bIsRefining = select(CoarseRejectedPrevWeight < CoarseRefiningPrevWeight, tsr_half(0.0), tsr_half(1.0)); bIsRefining = min(bIsRefining, saturate(ClampedPrevHistoryValidity * ActualHistorySampleCount)); // Soften the spatial kernel when there is high noise in the input to make parallax disocclusion of detailed geometry less distracting. tsr_half2 OffScreenInputToHistoryFactor = tsr_half(1.0) - tsr_half(0.5) * NoiseFiltering; tsr_half2 KernelInputToHistoryLerp = select(or(bIsOffScreen, and(bIsDisoccluded, !bIsHistoryResurrection)), tsr_half(0.0).xx, saturate(LowFrequencyRejection * tsr_half(16.0) - tsr_half(13.0)) * bIsRefining); KernelInputToHistoryFactor = lerp(OffScreenInputToHistoryFactor, tsr_half(InputToHistoryFactor), KernelInputToHistoryLerp); tsr_half2 KernelInputToHistoryAlignmentFactor = lerp(tsr_half(1.0).xx, tsr_half(InputToHistoryFactor), KernelInputToHistoryLerp); InputPixelAlignement = ComputeSampleWeigth(KernelInputToHistoryAlignmentFactor, dInputKO, /* MinimalContribution = */ float(0.0)); CurrentWeight = InputPixelAlignement * tsr_half(ActualHistoryHisteresis); PrevWeight = min(select(CurrentWeight > tsr_half(0.0), CurrentWeight, CoarseCurrentContribution) * ComputePrevWeightMultiplier(MinRejectionBlendFactor), ClampedPrevHistoryValidity); PrevWeight = min(PrevWeight, MaxWeight - CurrentWeight); } UNROLL_N(CONFIG_SAMPLES_COUNT) for (uint SampleId = 0; SampleId < CONFIG_SAMPLES_COUNT; SampleId++) { tsr_short2x2 InputSamplePixelPos; tsr_half2x2 InputPixelOffset; bool2 bIsDuplicatedSample; bool2 bIsDisabledSample; ComputeInputKernelSamplePosition( InputPixelPos, dInputKO, SampleId, InputPixelPosMin, InputPixelPosMax, /* out */ InputSamplePixelPos, /* out */ InputPixelOffset, /* out */ bIsDuplicatedSample, /* out */ bIsDisabledSample); tsr_half2x2 dPP = InputPixelOffset - dInputKO; tsr_half2 SampleSpatialWeight = ComputeSampleWeigth(KernelInputToHistoryFactor, dPP, /* MinimalContribution = */ float(0.005)); SampleSpatialWeight = select(bIsDuplicatedSample, tsr_half(0.0), SampleSpatialWeight); bIsDisabledSampleArray[SampleId] = bIsDisabledSample; RawdPPArray[SampleId] = dPP; RawSampleSpatialWeightArray[SampleId] = SampleSpatialWeight; UNROLL_N(DPV_PIXEL_PER_LANE) for (uint PixelId = 0; PixelId < DPV_PIXEL_PER_LANE; PixelId++) { RawInputColorArray[SampleId][PixelId] = InputSceneColorTexture[dpv_access_pixel(InputSamplePixelPos, PixelId)]; } } // for (uint SampleId = 0; SampleId < CONFIG_SAMPLES_COUNT; SampleId++) } tsr_halfCx2 InputColorCenter = dpv_force_interleave_registers_array(RawInputColorArray[0]); tsr_half2 FilteredInputColorWeight = tsr_half(0.0); FilteredInputColor = tsr_half(0.0); InputMinColor = InputColorCenter; InputMaxColor = InputColorCenter; UNROLL_N(CONFIG_SAMPLES_COUNT) for (uint SampleId = 0; SampleId < CONFIG_SAMPLES_COUNT; SampleId++) { bool2 bIsDisabledSample = bIsDisabledSampleArray[SampleId]; tsr_half2 SampleSpatialWeight = RawSampleSpatialWeightArray[SampleId]; tsr_halfCx2 InputColor = dpv_force_interleave_registers_array(RawInputColorArray[SampleId]); tsr_half2 ToneWeight = HdrWeight4(InputColor); FilteredInputColor += dpv_scale((SampleSpatialWeight * ToneWeight), InputColor); FilteredInputColorWeight += (SampleSpatialWeight * ToneWeight); if (SampleId != 0) { tsr_halfCx2 ClampBoxSample; ClampBoxSample[0] = select(bIsDisabledSample, InputColorCenter[0], InputColor[0]); ClampBoxSample[1] = select(bIsDisabledSample, InputColorCenter[1], InputColor[1]); ClampBoxSample[2] = select(bIsDisabledSample, InputColorCenter[2], InputColor[2]); #if CONFIG_SCENE_COLOR_ALPHA ClampBoxSample[3] = select(bIsDisabledSample, InputColorCenter[3], InputColor[3]); #endif InputMinColor = min(InputMinColor, ClampBoxSample); InputMaxColor = max(InputMaxColor, ClampBoxSample); } } // // for (uint SampleId = 0; SampleId < CONFIG_SAMPLES_COUNT; SampleId++) FilteredInputColor = dpv_scale(FilteredInputColor, rcp(FilteredInputColorWeight)); #if CONFIG_SCENE_COLOR_OVERFLOW { #if CONFIG_SCENE_COLOR_ALPHA FilteredInputColor = min(FilteredInputColor, dpv_interleave_mono_registers(LargestSceneColorRGBA)); #else FilteredInputColor = min(FilteredInputColor, dpv_interleave_mono_registers(LargestSceneColorRGB)); #endif } #endif } // Contribute current frame input into the predictor for next frame. tsr_halfCx2 FinalHighFrequencyColor; tsr_half2 FinalHistoryValidity; { tsr_halfCx2 BlendedPrevHighFrequencyColor; { tsr_halfCx2 ClampedPrevHighFrequencyColor = fastClamp(PrevHighFrequencyColor, InputMinColor, InputMaxColor); tsr_half2x2 Weight = WeightedLerpFactors(HdrWeight4(ClampedPrevHighFrequencyColor), HdrWeight4(PrevHighFrequencyColor), DisableHistoryClamp); BlendedPrevHighFrequencyColor = dpv_scale(ClampedPrevHighFrequencyColor, Weight[0]) + dpv_scale(PrevHighFrequencyColor, Weight[1]); } // Clamp the validity due to motion to maintain better sharpness in history reprojection under motion. #if 1 { tsr_half2 MaxValidity = tsr_half(1.0) - tsr_half(WeightClampingPixelSpeedAmplitude) * saturate(OutputPixelVelocity * tsr_half(InvWeightClampingPixelSpeed)); // Clamp up the max validity to favor stability under motion on high contrast edges. #if 1 { tsr_half2 PrevHistoryLuma = Luma4(BlendedPrevHighFrequencyColor); tsr_half2 FilteredLuma = Luma4(FilteredInputColor); tsr_half2 MinValidityForStability = abs(FilteredLuma - PrevHistoryLuma) / max(FilteredLuma, PrevHistoryLuma); MaxValidity = max(MaxValidity, MinValidityForStability); } #endif PrevWeight = min(PrevWeight, MaxValidity); } #endif tsr_half2 OutputValidity = CurrentWeight + PrevWeight; tsr_half2 PrevHistoryToneWeight = HdrWeightY(Luma4(BlendedPrevHighFrequencyColor)); tsr_half2 FilteredInputToneWeight = HdrWeight4(FilteredInputColor); tsr_half2 BlendPrevHistory = PrevWeight * PrevHistoryToneWeight; tsr_half2 BlendFilteredInput = CurrentWeight * FilteredInputToneWeight; tsr_half2 CommonWeight = SafeRcp(BlendPrevHistory + BlendFilteredInput); FinalHighFrequencyColor = ( dpv_scale(BlendedPrevHighFrequencyColor, CommonWeight * BlendPrevHistory) + dpv_scale(FilteredInputColor, CommonWeight * BlendFilteredInput)); // Quantize validity for the 8bit encoding to avoid numerical shift between color and validity. FinalHistoryValidity = ceil(tsr_half(255.0) * OutputValidity) * rcp(tsr_half(255.0)); } PLATFORM_SPECIFIC_ISOLATE { #if CONFIG_SCENE_COLOR_ALPHA const tsr_halfC ColorNull = tsr_half(0.0).xxxx; const tsr_halfC ColorMax10BitsFloat = tsr_half(Max10BitsFloat).xxxx; #else const tsr_halfC ColorNull = tsr_half(0.0).xxx; const tsr_halfC ColorMax10BitsFloat = tsr_half(Max10BitsFloat).xxx; #endif uint LocalGroupThreadIndex = GetGroupThreadIndex(GroupThreadIndex, GroupWaveIndex); #if 1 tsr_short2x2 LocalHistoryPixelPos = dpv_add( tsr_short2(GroupId) * tsr_short2(TILE_SIZE, TILE_SIZE), tsr_short2x2(Map8x8Tile2x2LaneDPV(LocalGroupThreadIndex))); #else tsr_short2x2 LocalHistoryPixelPos = HistoryPixelPos; #endif LocalHistoryPixelPos = InvalidateOutputPixelPos(LocalHistoryPixelPos, HistoryInfo_ViewportMax); #if CONFIG_METADATA_CHANNELS == 1 tsr_halfMx2 FinalMetadata = FinalHistoryValidity; #else tsr_halfMx2 FinalMetadata; FinalMetadata[0] = FinalHistoryValidity; #endif // Stochastically round up or down using the hardware RWTexture2D truncation unit to take into precision // loss due to pixel format encoding. #if CONFIG_ENABLE_STOCASTIC_QUANTIZATION { uint2 Random = Rand3DPCG16(int3(dpv_lo(LocalHistoryPixelPos), View.StateFrameIndexMod8)).xy; tsr_half E = tsr_half(Hammersley16(0, 1, Random).x); FinalHighFrequencyColor = QuantizeForFloatRenderTarget(FinalHighFrequencyColor, E, HistoryQuantizationError); } #endif // Protect from NaN and +Inf when writing out the history. { FinalHighFrequencyColor = -dpv_min(-FinalHighFrequencyColor, ColorNull); FinalHighFrequencyColor = dpv_min(FinalHighFrequencyColor, ColorMax10BitsFloat); } // Ensure that alpha values that are expected to be opaque (but are only close to opaque) are forced to be opaque. // (0.995 chosen to accommodate handling of 254/255) #if CONFIG_SCENE_COLOR_ALPHA { FinalHighFrequencyColor[3] = select(FinalHighFrequencyColor[3] > tsr_half(0.995), tsr_half(1.0), FinalHighFrequencyColor[3]); FinalHighFrequencyColor[3] = select(FinalHighFrequencyColor[3] < tsr_half(0.005), tsr_half(0.0), FinalHighFrequencyColor[3]); } #endif // Output full res history { // Output final history lo pixel. { HistoryColorOutput[tsr_short3(dpv_lo(LocalHistoryPixelPos), HistoryArrayIndices_HighFrequency)] = dpv_lo(FinalHighFrequencyColor); HistoryMetadataOutput[tsr_short3(dpv_lo(LocalHistoryPixelPos), 0)] = dpv_lo(FinalMetadata); } // Output final history hi pixel. #if CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION { HistoryColorOutput[tsr_short3(dpv_hi(LocalHistoryPixelPos), HistoryArrayIndices_HighFrequency)] = dpv_hi(FinalHighFrequencyColor); HistoryMetadataOutput[tsr_short3(dpv_hi(LocalHistoryPixelPos), 0)] = dpv_hi(FinalMetadata); } #endif // !CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION } // Output final scene color Mip1 { tsr_halfCx2 HalfResOutput = FinalHighFrequencyColor; tsr_short2x2 HalfResOutputPixelPos = dpv_interleave_mono_registers(tsr_short(-1).xx); BRANCH if (bGenerateOutputMip1 || bGenerateOutputMip2 || bGenerateOutputMip3) { HalfResOutput = DownsampleSceneColor(HalfResOutput, LocalGroupThreadIndex, /* XorButterFly = */ uint2(0x01, 0x02)); BRANCH if (bGenerateOutputMip3) { HalfResOutput = DownsampleSceneColor(HalfResOutput, LocalGroupThreadIndex, /* XorButterFly = */ uint2(0x04, 0x10)); HalfResOutput = DownsampleSceneColor(HalfResOutput, LocalGroupThreadIndex, /* XorButterFly = */ uint2(0x08, 0x20)); HalfResOutputPixelPos[0] = (LocalHistoryPixelPos[0] >> tsr_short(3)) | (((LocalHistoryPixelPos[0] & tsr_short(0x7))) * tsr_short(~0)); HalfResOutputPixelPos[1] = (LocalHistoryPixelPos[1] >> tsr_short(3)) | (((LocalHistoryPixelPos[1] & tsr_short(0x7))) * tsr_short(~0)); } else if (bGenerateOutputMip2) { HalfResOutput = DownsampleSceneColor(HalfResOutput, LocalGroupThreadIndex, /* XorButterFly = */ uint2(0x04, 0x10)); HalfResOutputPixelPos[0] = (LocalHistoryPixelPos[0] >> tsr_short(2)) | (((LocalHistoryPixelPos[0] & tsr_short(0x3))) * tsr_short(~0)); HalfResOutputPixelPos[1] = (LocalHistoryPixelPos[1] >> tsr_short(2)) | (((LocalHistoryPixelPos[1] & tsr_short(0x3))) * tsr_short(~0)); } else { HalfResOutputPixelPos[0] = (LocalHistoryPixelPos[0] >> tsr_short(1)) | (((LocalHistoryPixelPos[0] & tsr_short(0x1))) * tsr_short(~0)); HalfResOutputPixelPos[1] = (LocalHistoryPixelPos[1] >> tsr_short(1)) | (((LocalHistoryPixelPos[1] & tsr_short(0x1))) * tsr_short(~0)); } } SceneColorOutputMip1[tsr_short3(dpv_lo(HalfResOutputPixelPos), 0)] = dpv_lo(HalfResOutput); #if CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION SceneColorOutputMip1[tsr_short3(dpv_hi(HalfResOutputPixelPos), 0)] = dpv_hi(HalfResOutput); #endif } #if DEBUG_OUTPUT for (uint DebugId = 0; DebugId < DEBUG_ARRAY_SIZE; DebugId++) { DebugOutput[tsr_short3(dpv_lo(LocalHistoryPixelPos), DebugId)] = dpv_lo(Debug[DebugId]); #if CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION DebugOutput[tsr_short3(dpv_hi(LocalHistoryPixelPos), DebugId)] = dpv_hi(Debug[DebugId]); #endif } #endif } }