Files
UnrealEngine/Engine/Shaders/Private/VariableRateShading/VRSShadingRateReproject.usf
2025-05-18 13:04:45 +08:00

124 lines
5.4 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
#include "VRSShadingRateCommon.ush"
#include "../Common.ush"
#include "../SceneTexturesCommon.ush"
float2 InputSRIDimensions;
float2 ScaledSRIDimensions;
float2 TextureDimensions;
float2 InvTextureDimensions;
float2 ScaledUVOffset;
float InvDynamicResolutionScale;
Texture2D<uint> InputSRITexture;
RWTexture2D<uint> ScaledSRITexture;
RWTexture2D<uint> ScaledConservativeSRITexture;
groupshared float CachedDepth[THREADGROUP_SIZE / 2][THREADGROUP_SIZE / 2];
// This is a large approximation of reprojection in a couple of ways in favor of performance:
// 1. We only look at reprojection based on the camera, not velocity vectors. I.E. this will be incorrect for dynamic objects. In most cases
// motion blur will hide this entirely, though I found no perceptable artifacts even with motion blur off
// 2. The reprojection only does one depth texture fetch in the center of each 16x16 pixel block.
//
// Both of these approximations are fine because VRS doesn't require a whole lot of accuracy since the side-effect is
// just lower resolution in areas that are "wrong" rather than the type of ghosting you'd get from bad reprojection in something
// like TAA. In practice, I found reprojection is only required in a really small set of scenarios since VRS already preserves
// silhouettes anyways. The main artifact I saw was materials that have a strip of metal that's not on the edge of a mesh and can
// cause noticeble aliasing when it's got a bright specular highlight.
float2 ReprojectBufferUV(uint3 DispatchThreadId, uint3 GroupThreadId, float2 BufferUV, bool bValidThread)
{
// Only read depth once in the top left thread for each 2x2 block of thread IDs
// This shader is primarily bound on reading from the depth buffer on Scarlett. At 4k,
// doing a single depth buffer tap that gets shared by each 2x2 block of threads ends
// up making the shader almost 4x faster
uint2 CachedDepthIndex = uint2(GroupThreadId.x / 2, GroupThreadId.y / 2);
if( GroupThreadId.x % 2 == 0 && GroupThreadId.y % 2 == 0 && bValidThread)
{
const float2 pixelUVSize = InvTextureDimensions;
// Center the UV in the middle of the 2x2 block
float2 UVOffset = ScaledUVOffset;
float2 DepthUV = UVOffset + (float2(DispatchThreadId.xy) + float2(1, 1)) * pixelUVSize;
CachedDepth[CachedDepthIndex.x][CachedDepthIndex.y] = LookupDeviceZ(DepthUV);
}
GroupMemoryBarrierWithGroupSync();
if(bValidThread)
{
float DeviceZ = CachedDepth[CachedDepthIndex.x][CachedDepthIndex.y];
float2 ScreenPosition = ViewportUVToScreenPos(BufferUVToViewportUV(BufferUV));
float4 ThisClip = float4(ScreenPosition, DeviceZ, 1);
float4 PrevClip = mul(ThisClip, View.ClipToPrevClip);
float2 PrevScreen = PrevClip.xy / PrevClip.w;
BufferUV = ViewportUVToBufferUV(ScreenPosToViewportUV(PrevScreen));
}
return BufferUV;
}
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)]
void RescaleVariableRateShading(
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
bool bValidThread = DispatchThreadId.x < ScaledSRIDimensions.x && DispatchThreadId.y < ScaledSRIDimensions.y;
uint shadingRate = D3D12_SHADING_RATE_2X2 | (D3D12_SHADING_RATE_2X2 << CONSERVATIVE_SHADING_RATE_SHIFT);
float2 pixelUVSize = InvTextureDimensions;
const float epsilon = 0.0001;
const float2 HalfPixelOffset = float2(0.5, 0.5);
float2 CenterUV = (float2(DispatchThreadId.xy) + HalfPixelOffset) * pixelUVSize;
CenterUV = ReprojectBufferUV(DispatchThreadId, GroupThreadId, CenterUV, bValidThread);
// FXC doesn't allow a thread to terminate before GroupMemoryBarrierWithGroupSync so
// this return isn't allowed until right after ReprojectBufferUV
if(!bValidThread)
{
return;
}
// Map from pre-scaled UV to post-scaled UV.
float2 uvStart = CenterUV - pixelUVSize * 0.5 + epsilon;
float2 uvEnd = CenterUV + pixelUVSize * 0.5 - epsilon;
// Convert from UV space to Texel space of the source (pre-scaled) image
int2 inputCoordStart = uvStart * InvDynamicResolutionScale * TextureDimensions.xy;
int2 inputCoordEnd = uvEnd * InvDynamicResolutionScale * TextureDimensions.xy;
// Clamp start to top left corner of the input SRI.
inputCoordStart.x = max(inputCoordStart.x, 0.0f);
inputCoordStart.y = max(inputCoordStart.y, 0.0f);
// Clamp end to bottom right corner of the input SRI.
inputCoordEnd.x = min(inputCoordEnd.x, InputSRIDimensions.x);
inputCoordEnd.y = min(inputCoordEnd.y, InputSRIDimensions.y);
// start.x or y will be greater than end.x or y iff both corners were out of view on the same side.
// start is never clamped against the bottom-right and end is never clamped against the top-left
// So the only way start can be greater than end, is if both corners were out of view and only
// one of them got clamped passed the other.
bool bOnScreen = all(inputCoordStart <= inputCoordEnd);
if(bOnScreen)
{
for(uint x = inputCoordStart.x; x <= inputCoordEnd.x; x++)
{
for(uint y = inputCoordStart.y; y <= inputCoordEnd.y; y++)
{
shadingRate &= InputSRITexture[uint2(x, y)];
}
}
}
else
{
// Don't apply VRS to anything coming offscreen
shadingRate = D3D12_SHADING_RATE_1X1 | (D3D12_SHADING_RATE_1X1 << CONSERVATIVE_SHADING_RATE_SHIFT);
}
ScaledSRITexture[DispatchThreadId.xy + ScaledUVOffset * TextureDimensions] = shadingRate & D3D12_SHADING_RATE_2X2;
ScaledConservativeSRITexture[DispatchThreadId.xy + ScaledUVOffset * TextureDimensions] = (shadingRate >> CONSERVATIVE_SHADING_RATE_SHIFT) & D3D12_SHADING_RATE_2X2;
}