// Copyright Epic Games, Inc. All Rights Reserved. #include "VRSShadingRateCommon.ush" #include "../Common.ush" #include "../SceneTexturesCommon.ush" float2 InputSRIDimensions; float2 ScaledSRIDimensions; float2 TextureDimensions; float2 InvTextureDimensions; float2 ScaledUVOffset; float InvDynamicResolutionScale; Texture2D InputSRITexture; RWTexture2D ScaledSRITexture; RWTexture2D ScaledConservativeSRITexture; groupshared float CachedDepth[THREADGROUP_SIZE / 2][THREADGROUP_SIZE / 2]; // This is a large approximation of reprojection in a couple of ways in favor of performance: // 1. We only look at reprojection based on the camera, not velocity vectors. I.E. this will be incorrect for dynamic objects. In most cases // motion blur will hide this entirely, though I found no perceptable artifacts even with motion blur off // 2. The reprojection only does one depth texture fetch in the center of each 16x16 pixel block. // // Both of these approximations are fine because VRS doesn't require a whole lot of accuracy since the side-effect is // just lower resolution in areas that are "wrong" rather than the type of ghosting you'd get from bad reprojection in something // like TAA. In practice, I found reprojection is only required in a really small set of scenarios since VRS already preserves // silhouettes anyways. The main artifact I saw was materials that have a strip of metal that's not on the edge of a mesh and can // cause noticeble aliasing when it's got a bright specular highlight. float2 ReprojectBufferUV(uint3 DispatchThreadId, uint3 GroupThreadId, float2 BufferUV, bool bValidThread) { // Only read depth once in the top left thread for each 2x2 block of thread IDs // This shader is primarily bound on reading from the depth buffer on Scarlett. At 4k, // doing a single depth buffer tap that gets shared by each 2x2 block of threads ends // up making the shader almost 4x faster uint2 CachedDepthIndex = uint2(GroupThreadId.x / 2, GroupThreadId.y / 2); if( GroupThreadId.x % 2 == 0 && GroupThreadId.y % 2 == 0 && bValidThread) { const float2 pixelUVSize = InvTextureDimensions; // Center the UV in the middle of the 2x2 block float2 UVOffset = ScaledUVOffset; float2 DepthUV = UVOffset + (float2(DispatchThreadId.xy) + float2(1, 1)) * pixelUVSize; CachedDepth[CachedDepthIndex.x][CachedDepthIndex.y] = LookupDeviceZ(DepthUV); } GroupMemoryBarrierWithGroupSync(); if(bValidThread) { float DeviceZ = CachedDepth[CachedDepthIndex.x][CachedDepthIndex.y]; float2 ScreenPosition = ViewportUVToScreenPos(BufferUVToViewportUV(BufferUV)); float4 ThisClip = float4(ScreenPosition, DeviceZ, 1); float4 PrevClip = mul(ThisClip, View.ClipToPrevClip); float2 PrevScreen = PrevClip.xy / PrevClip.w; BufferUV = ViewportUVToBufferUV(ScreenPosToViewportUV(PrevScreen)); } return BufferUV; } [numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)] void RescaleVariableRateShading( uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID) { bool bValidThread = DispatchThreadId.x < ScaledSRIDimensions.x && DispatchThreadId.y < ScaledSRIDimensions.y; uint shadingRate = D3D12_SHADING_RATE_2X2 | (D3D12_SHADING_RATE_2X2 << CONSERVATIVE_SHADING_RATE_SHIFT); float2 pixelUVSize = InvTextureDimensions; const float epsilon = 0.0001; const float2 HalfPixelOffset = float2(0.5, 0.5); float2 CenterUV = (float2(DispatchThreadId.xy) + HalfPixelOffset) * pixelUVSize; CenterUV = ReprojectBufferUV(DispatchThreadId, GroupThreadId, CenterUV, bValidThread); // FXC doesn't allow a thread to terminate before GroupMemoryBarrierWithGroupSync so // this return isn't allowed until right after ReprojectBufferUV if(!bValidThread) { return; } // Map from pre-scaled UV to post-scaled UV. float2 uvStart = CenterUV - pixelUVSize * 0.5 + epsilon; float2 uvEnd = CenterUV + pixelUVSize * 0.5 - epsilon; // Convert from UV space to Texel space of the source (pre-scaled) image int2 inputCoordStart = uvStart * InvDynamicResolutionScale * TextureDimensions.xy; int2 inputCoordEnd = uvEnd * InvDynamicResolutionScale * TextureDimensions.xy; // Clamp start to top left corner of the input SRI. inputCoordStart.x = max(inputCoordStart.x, 0.0f); inputCoordStart.y = max(inputCoordStart.y, 0.0f); // Clamp end to bottom right corner of the input SRI. inputCoordEnd.x = min(inputCoordEnd.x, InputSRIDimensions.x); inputCoordEnd.y = min(inputCoordEnd.y, InputSRIDimensions.y); // start.x or y will be greater than end.x or y iff both corners were out of view on the same side. // start is never clamped against the bottom-right and end is never clamped against the top-left // So the only way start can be greater than end, is if both corners were out of view and only // one of them got clamped passed the other. bool bOnScreen = all(inputCoordStart <= inputCoordEnd); if(bOnScreen) { for(uint x = inputCoordStart.x; x <= inputCoordEnd.x; x++) { for(uint y = inputCoordStart.y; y <= inputCoordEnd.y; y++) { shadingRate &= InputSRITexture[uint2(x, y)]; } } } else { // Don't apply VRS to anything coming offscreen shadingRate = D3D12_SHADING_RATE_1X1 | (D3D12_SHADING_RATE_1X1 << CONSERVATIVE_SHADING_RATE_SHIFT); } ScaledSRITexture[DispatchThreadId.xy + ScaledUVOffset * TextureDimensions] = shadingRate & D3D12_SHADING_RATE_2X2; ScaledConservativeSRITexture[DispatchThreadId.xy + ScaledUVOffset * TextureDimensions] = (shadingRate >> CONSERVATIVE_SHADING_RATE_SHIFT) & D3D12_SHADING_RATE_2X2; }